rsCpuScriptGroup2.cpp revision b0abb140ac51b93d1a85aadaa63fe057f2d29850
1a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsCpuScriptGroup2.h" 2a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 3a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <dlfcn.h> 4a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <stdio.h> 5a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <stdlib.h> 6a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <unistd.h> 7a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 8a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <set> 9a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <sstream> 10a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <string> 11a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <vector> 12a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 13a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#ifndef RS_COMPATIBILITY_LIB 14a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "bcc/Config/Config.h" 15a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include <sys/wait.h> 16a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#endif 17a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 18a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "cpu_ref/rsCpuCore.h" 19a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsClosure.h" 20a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsContext.h" 21a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsCpuCore.h" 22caf4126512b2152ea5f6573ce5d9ca29767b9678Tim Murray#include "rsCpuExecutable.h" 23a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsCpuScript.h" 24a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsScript.h" 25a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsScriptGroup2.h" 26a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray#include "rsScriptIntrinsic.h" 27a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 28a89eef413da39b013f2e931c9f207ef2587eef01Tim Murrayusing std::string; 29a89eef413da39b013f2e931c9f207ef2587eef01Tim Murrayusing std::vector; 3044bef6fba6244292b751387f3d6c31cca96c28adChris Wailes 3144bef6fba6244292b751387f3d6c31cca96c28adChris Wailesnamespace android { 32a89eef413da39b013f2e931c9f207ef2587eef01Tim Murraynamespace renderscript { 33a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 34a89eef413da39b013f2e931c9f207ef2587eef01Tim Murraynamespace { 35a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 3644bef6fba6244292b751387f3d6c31cca96c28adChris Wailesconst size_t DefaultKernelArgCount = 2; 37a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 38caf4126512b2152ea5f6573ce5d9ca29767b9678Tim Murrayvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 39caf4126512b2152ea5f6573ce5d9ca29767b9678Tim Murray uint32_t xend, uint32_t outstep) { 40a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 41a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 42a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 43a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray const size_t oldInLen = mutable_kinfo->inLen; 44a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 45a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray decltype(mutable_kinfo->inStride) oldInStride; 46a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 47a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 48a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray for (CPUClosure* cpuClosure : closures) { 49a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray const Closure* closure = cpuClosure->mClosure; 50a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 51a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray // There had better be enough space in mutable_kinfo 52a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 53a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray 54a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray for (size_t i = 0; i < closure->mNumArg; i++) { 55a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray const void* arg = closure->mArgs[i]; 56a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray const Allocation* a = (const Allocation*)arg; 57a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray const uint32_t eStride = a->mHal.state.elementSizeBytes; 58a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 59a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray eStride * xstart; 60a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray if (kinfo->dim.y > 1) { 61a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 62a89eef413da39b013f2e931c9f207ef2587eef01Tim Murray } 63 mutable_kinfo->inPtr[i] = ptr; 64 mutable_kinfo->inStride[i] = eStride; 65 } 66 mutable_kinfo->inLen = closure->mNumArg; 67 68 const Allocation* out = closure->mReturnValue; 69 const uint32_t ostep = out->mHal.state.elementSizeBytes; 70 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 71 ostep * xstart; 72 if (kinfo->dim.y > 1) { 73 ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 74 } 75 76 rsAssert(kinfo->outLen <= 1); 77 mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 78 79 cpuClosure->mFunc(kinfo, xstart, xend, ostep); 80 } 81 82 mutable_kinfo->inLen = oldInLen; 83 memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 84} 85 86} // namespace 87 88Batch::Batch(CpuScriptGroup2Impl* group, const char* name) : 89 mGroup(group), mFunc(nullptr) { 90 mName = strndup(name, strlen(name)); 91} 92 93Batch::~Batch() { 94 for (CPUClosure* c : mClosures) { 95 delete c; 96 } 97 free(mName); 98} 99 100bool Batch::conflict(CPUClosure* cpuClosure) const { 101 if (mClosures.empty()) { 102 return false; 103 } 104 105 const Closure* closure = cpuClosure->mClosure; 106 107 if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 108 // An invoke should be in a batch by itself, so it conflicts with any other 109 // closure. 110 return true; 111 } 112 113 const auto& globalDeps = closure->mGlobalDeps; 114 const auto& argDeps = closure->mArgDeps; 115 116 for (CPUClosure* c : mClosures) { 117 const Closure* batched = c->mClosure; 118 if (globalDeps.find(batched) != globalDeps.end()) { 119 return true; 120 } 121 const auto& it = argDeps.find(batched); 122 if (it != argDeps.end()) { 123 const auto& args = (*it).second; 124 for (const auto &p1 : *args) { 125 if (p1.second->get() != nullptr) { 126 return true; 127 } 128 } 129 } 130 } 131 132 return false; 133} 134 135CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 136 const ScriptGroupBase *sg) : 137 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 138 mExecutable(nullptr), mScriptObj(nullptr) { 139 rsAssert(!mGroup->mClosures.empty()); 140 141 Batch* batch = new Batch(this, "Batch0"); 142 int i = 0; 143 for (Closure* closure: mGroup->mClosures) { 144 CPUClosure* cc; 145 const IDBase* funcID = closure->mFunctionID.get(); 146 RsdCpuScriptImpl* si = 147 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 148 if (closure->mIsKernel) { 149 MTLaunchStruct mtls; 150 si->forEachKernelSetup(funcID->mSlot, &mtls); 151 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 152 } else { 153 cc = new CPUClosure(closure, si); 154 } 155 156 if (batch->conflict(cc)) { 157 mBatches.push_back(batch); 158 std::stringstream ss; 159 ss << "Batch" << ++i; 160 batch = new Batch(this, ss.str().c_str()); 161 } 162 163 batch->mClosures.push_back(cc); 164 } 165 166 rsAssert(!batch->mClosures.empty()); 167 mBatches.push_back(batch); 168 169#ifndef RS_COMPATIBILITY_LIB 170 compile(mGroup->mCacheDir); 171 if (mScriptObj != nullptr && mExecutable != nullptr) { 172 for (Batch* batch : mBatches) { 173 batch->resolveFuncPtr(mScriptObj); 174 } 175 } 176#endif // RS_COMPATIBILITY_LIB 177} 178 179void Batch::resolveFuncPtr(void* sharedObj) { 180 std::string funcName(mName); 181 if (mClosures.front()->mClosure->mIsKernel) { 182 funcName.append(".expand"); 183 } 184 mFunc = dlsym(sharedObj, funcName.c_str()); 185 rsAssert (mFunc != nullptr); 186} 187 188CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 189 for (Batch* batch : mBatches) { 190 delete batch; 191 } 192 // TODO: move this dlclose into ~ScriptExecutable(). 193 if (mScriptObj != nullptr) { 194 dlclose(mScriptObj); 195 } 196 delete mExecutable; 197} 198 199namespace { 200 201#ifndef RS_COMPATIBILITY_LIB 202 203string getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 204 *coreLibRelaxedPath = ""; 205 206 // If we're debugging, use the debug library. 207 if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 208 return SYSLIBPATH"/libclcore_debug.bc"; 209 } 210 211 // Check for a platform specific library 212 213#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 214 // NEON-capable ARMv7a devices can use an accelerated math library 215 // for all reduced precision scripts. 216 // ARMv8 does not use NEON, as ASIMD can be used with all precision 217 // levels. 218 *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; 219#endif 220 221#if defined(__i386__) || defined(__x86_64__) 222 // x86 devices will use an optimized library. 223 return SYSLIBPATH"/libclcore_x86.bc"; 224#else 225 return SYSLIBPATH"/libclcore.bc"; 226#endif 227} 228 229string getFileName(string path) { 230 unsigned found = path.find_last_of("/\\"); 231 return path.substr(found + 1); 232} 233 234void setupCompileArguments( 235 const vector<string>& inputs, const vector<string>& kernelBatches, 236 const vector<string>& invokeBatches, 237 const string& output_dir, const string& output_filename, 238 const string& coreLibPath, const string& coreLibRelaxedPath, 239 vector<const char*>* args) { 240 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 241 args->push_back("-fPIC"); 242 args->push_back("-embedRSInfo"); 243 args->push_back("-mtriple"); 244 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 245 args->push_back("-bclib"); 246 args->push_back(coreLibPath.c_str()); 247 args->push_back("-bclib_relaxed"); 248 args->push_back(coreLibRelaxedPath.c_str()); 249 for (const string& input : inputs) { 250 args->push_back(input.c_str()); 251 } 252 for (const string& batch : kernelBatches) { 253 args->push_back("-merge"); 254 args->push_back(batch.c_str()); 255 } 256 for (const string& batch : invokeBatches) { 257 args->push_back("-invoke"); 258 args->push_back(batch.c_str()); 259 } 260 args->push_back("-output_path"); 261 args->push_back(output_dir.c_str()); 262 args->push_back("-o"); 263 args->push_back(output_filename.c_str()); 264 args->push_back(nullptr); 265} 266 267bool fuseAndCompile(const char** arguments, 268 const string& commandLine) { 269 const pid_t pid = fork(); 270 271 if (pid == -1) { 272 ALOGE("Couldn't fork for bcc execution"); 273 return false; 274 } 275 276 if (pid == 0) { 277 // Child process 278 ALOGV("Invoking BCC with: %s", commandLine.c_str()); 279 execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); 280 281 ALOGE("execv() failed: %s", strerror(errno)); 282 abort(); 283 return false; 284 } 285 286 // Parent process 287 int status = 0; 288 const pid_t w = waitpid(pid, &status, 0); 289 if (w == -1) { 290 return false; 291 } 292 293 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { 294 ALOGE("bcc terminated unexpectedly"); 295 return false; 296 } 297 298 return true; 299} 300 301void generateSourceSlot(const Closure& closure, 302 const std::vector<std::string>& inputs, 303 std::stringstream& ss) { 304 const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 305 const Script* script = funcID->mScript; 306 307 rsAssert (!script->isIntrinsic()); 308 309 const RsdCpuScriptImpl *cpuScript = 310 (const RsdCpuScriptImpl*)script->mHal.drv; 311 const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 312 313 const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 314 inputs.begin(); 315 316 ss << index << "," << funcID->mSlot << "."; 317} 318 319#endif // RS_COMPATIBILTY_LIB 320 321} // anonymous namespace 322 323void CpuScriptGroup2Impl::compile(const char* cacheDir) { 324#ifndef RS_COMPATIBILITY_LIB 325 if (mGroup->mClosures.size() < 2) { 326 return; 327 } 328 329 //===--------------------------------------------------------------------===// 330 // Fuse the input kernels and generate native code in an object file 331 //===--------------------------------------------------------------------===// 332 333 std::set<string> inputSet; 334 for (Closure* closure : mGroup->mClosures) { 335 const Script* script = closure->mFunctionID.get()->mScript; 336 337 // If any script is an intrinsic, give up trying fusing the kernels. 338 if (script->isIntrinsic()) { 339 return; 340 } 341 342 const RsdCpuScriptImpl *cpuScript = 343 (const RsdCpuScriptImpl*)script->mHal.drv; 344 const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 345 inputSet.insert(bitcodeFilename); 346 } 347 348 std::vector<string> inputs(inputSet.begin(), inputSet.end()); 349 350 std::vector<string> kernelBatches; 351 std::vector<string> invokeBatches; 352 353 int i = 0; 354 for (const auto& batch : mBatches) { 355 rsAssert(batch->size() > 0); 356 357 std::stringstream ss; 358 ss << batch->mName << ":"; 359 360 if (!batch->mClosures.front()->mClosure->mIsKernel) { 361 rsAssert(batch->size() == 1); 362 generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); 363 invokeBatches.push_back(ss.str()); 364 } else { 365 for (const auto& cpuClosure : batch->mClosures) { 366 generateSourceSlot(*cpuClosure->mClosure, inputs, ss); 367 } 368 kernelBatches.push_back(ss.str()); 369 } 370 } 371 372 rsAssert(cacheDir != nullptr); 373 string objFilePath(cacheDir); 374 objFilePath.append("/fusedXXXXXX.o"); 375 // Find unique object file name, to make following file names unique. 376 int tempfd = mkstemps(&objFilePath[0], 2); 377 if (tempfd == -1) { 378 return; 379 } 380 TEMP_FAILURE_RETRY(close(tempfd)); 381 382 string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); 383 string coreLibRelaxedPath; 384 const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 385 &coreLibRelaxedPath); 386 vector<const char*> arguments; 387 setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 388 outputFileName, coreLibPath, coreLibRelaxedPath, &arguments); 389 std::unique_ptr<const char> joined( 390 rsuJoinStrings(arguments.size() - 1, arguments.data())); 391 string commandLine (joined.get()); 392 393 if (!fuseAndCompile(arguments.data(), commandLine)) { 394 unlink(objFilePath.c_str()); 395 return; 396 } 397 398 //===--------------------------------------------------------------------===// 399 // Create and load the shared lib 400 //===--------------------------------------------------------------------===// 401 402 const char* resName = outputFileName.c_str(); 403 404 if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { 405 ALOGE("Failed to link object file '%s'", resName); 406 return; 407 } 408 409 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 410 if (mScriptObj == nullptr) { 411 ALOGE("Unable to load '%s'", resName); 412 return; 413 } 414 415 mExecutable = ScriptExecutable::createFromSharedObject( 416 nullptr, // RS context. Unused. 417 mScriptObj); 418 419#endif // RS_COMPATIBILITY_LIB 420} 421 422void CpuScriptGroup2Impl::execute() { 423 for (auto batch : mBatches) { 424 batch->setGlobalsForBatch(); 425 batch->run(); 426 } 427} 428 429void Batch::setGlobalsForBatch() { 430 for (CPUClosure* cpuClosure : mClosures) { 431 const Closure* closure = cpuClosure->mClosure; 432 const IDBase* funcID = closure->mFunctionID.get(); 433 Script* s = funcID->mScript;; 434 for (const auto& p : closure->mGlobals) { 435 const void* value = p.second.first; 436 int size = p.second.second; 437 if (value == nullptr && size == 0) { 438 // This indicates the current closure depends on another closure for a 439 // global in their shared module (script). In this case we don't need to 440 // copy the value. For example, an invoke intializes a global variable 441 // which a kernel later reads. 442 continue; 443 } 444 rsAssert(p.first != nullptr); 445 ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)", 446 closure, p.first, p.first->mScript, p.first->mSlot); 447 Script* script = p.first->mScript; 448 const RsdCpuScriptImpl *cpuScript = 449 (const RsdCpuScriptImpl*)script->mHal.drv; 450 int slot = p.first->mSlot; 451 ScriptExecutable* exec = mGroup->getExecutable(); 452 if (exec != nullptr) { 453 const char* varName = cpuScript->getFieldName(slot); 454 void* addr = exec->getFieldAddress(varName); 455 if (size < 0) { 456 rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 457 (rs_object_base*)addr, (ObjectBase*)value); 458 } else { 459 memcpy(addr, (const void*)&value, size); 460 } 461 } else { 462 // We use -1 size to indicate an ObjectBase rather than a primitive type 463 if (size < 0) { 464 s->setVarObj(slot, (ObjectBase*)value); 465 } else { 466 s->setVar(slot, (const void*)&value, size); 467 } 468 } 469 } 470 } 471} 472 473void Batch::run() { 474 if (!mClosures.front()->mClosure->mIsKernel) { 475 rsAssert(mClosures.size() == 1); 476 477 // This batch contains a single closure for an invoke function 478 CPUClosure* cc = mClosures.front(); 479 const Closure* c = cc->mClosure; 480 481 if (mFunc != nullptr) { 482 // TODO: Need align pointers for x86_64. 483 // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 484 ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 485 } else { 486 const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 487 rsAssert(invokeID != nullptr); 488 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 489 } 490 491 return; 492 } 493 494 if (mFunc != nullptr) { 495 MTLaunchStruct mtls; 496 const CPUClosure* firstCpuClosure = mClosures.front(); 497 const CPUClosure* lastCpuClosure = mClosures.back(); 498 499 firstCpuClosure->mSi->forEachMtlsSetup( 500 (const Allocation**)firstCpuClosure->mClosure->mArgs, 501 firstCpuClosure->mClosure->mNumArg, 502 lastCpuClosure->mClosure->mReturnValue, 503 nullptr, 0, nullptr, &mtls); 504 505 mtls.script = nullptr; 506 mtls.fep.usr = nullptr; 507 mtls.kernel = (ForEachFunc_t)mFunc; 508 509 mGroup->getCpuRefImpl()->launchThreads( 510 (const Allocation**)firstCpuClosure->mClosure->mArgs, 511 firstCpuClosure->mClosure->mNumArg, 512 lastCpuClosure->mClosure->mReturnValue, 513 nullptr, &mtls); 514 515 return; 516 } 517 518 for (CPUClosure* cpuClosure : mClosures) { 519 const Closure* closure = cpuClosure->mClosure; 520 const ScriptKernelID* kernelID = 521 (const ScriptKernelID*)closure->mFunctionID.get(); 522 cpuClosure->mSi->preLaunch(kernelID->mSlot, 523 (const Allocation**)closure->mArgs, 524 closure->mNumArg, closure->mReturnValue, 525 nullptr, 0, nullptr); 526 } 527 528 const CPUClosure* cpuClosure = mClosures.front(); 529 const Closure* closure = cpuClosure->mClosure; 530 MTLaunchStruct mtls; 531 532 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 533 closure->mNumArg, 534 closure->mReturnValue, 535 nullptr, 0, nullptr, &mtls)) { 536 537 mtls.script = nullptr; 538 mtls.kernel = (void (*)())&groupRoot; 539 mtls.fep.usr = &mClosures; 540 541 mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 542 } 543 544 for (CPUClosure* cpuClosure : mClosures) { 545 const Closure* closure = cpuClosure->mClosure; 546 const ScriptKernelID* kernelID = 547 (const ScriptKernelID*)closure->mFunctionID.get(); 548 cpuClosure->mSi->postLaunch(kernelID->mSlot, 549 (const Allocation**)closure->mArgs, 550 closure->mNumArg, closure->mReturnValue, 551 nullptr, 0, nullptr); 552 } 553} 554 555} // namespace renderscript 556} // namespace android 557