rsCpuScriptGroup2.cpp revision cbff7bcc4aacdc39d56628fa5c7c50518d52748c
1282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsCpuScriptGroup2.h" 2282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 3282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <dlfcn.h> 4282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <stdio.h> 5282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <stdlib.h> 6282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <unistd.h> 7282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 8282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <set> 9282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <sstream> 10282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <string> 11282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include <vector> 12282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 13282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#ifndef RS_COMPATIBILITY_LIB 14282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "bcc/Config/Config.h" 15282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#endif 16282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 17282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "cpu_ref/rsCpuCore.h" 18282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsClosure.h" 19282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsContext.h" 20282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsCpuCore.h" 21282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsCpuExecutable.h" 22282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsCpuScript.h" 23282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsScript.h" 24282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsScriptGroup2.h" 25282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski#include "rsScriptIntrinsic.h" 26282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 27282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskiusing std::string; 28282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskiusing std::vector; 29282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 30282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskinamespace android { 31282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskinamespace renderscript { 32282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 33282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskinamespace { 34282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 35282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskiconst size_t DefaultKernelArgCount = 2; 36282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 37282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinskivoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 38282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski uint32_t xend, uint32_t outstep) { 39282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 40282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 41282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 42282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const size_t oldInLen = mutable_kinfo->inLen; 43282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 44282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski decltype(mutable_kinfo->inStride) oldInStride; 45282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 46282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 47282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski for (CPUClosure* cpuClosure : closures) { 48282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const Closure* closure = cpuClosure->mClosure; 49282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 50282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski // There had better be enough space in mutable_kinfo 51282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 52282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 53282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski for (size_t i = 0; i < closure->mNumArg; i++) { 54282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const void* arg = closure->mArgs[i]; 55282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const Allocation* a = (const Allocation*)arg; 56282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const uint32_t eStride = a->mHal.state.elementSizeBytes; 57282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 58282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski eStride * xstart; 59282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski if (kinfo->dim.y > 1) { 60282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 61282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski } 62282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski mutable_kinfo->inPtr[i] = ptr; 63282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski mutable_kinfo->inStride[i] = eStride; 64282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski } 65282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski mutable_kinfo->inLen = closure->mNumArg; 66282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 67282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const Allocation* out = closure->mReturnValue; 68282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const uint32_t ostep = out->mHal.state.elementSizeBytes; 69282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski ostep * xstart; 71282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski if (kinfo->dim.y > 1) { 72282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 73282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski } 74282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 75282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski rsAssert(kinfo->outLen <= 1); 76282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 77282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 78282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski cpuClosure->mFunc(kinfo, xstart, xend, ostep); 79282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski } 80282e181b58cf72b6ca770dc7ca5f91f135444502Adam Lesinski 81 mutable_kinfo->inLen = oldInLen; 82 memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 83} 84 85} // namespace 86 87Batch::Batch(CpuScriptGroup2Impl* group, const char* name) : 88 mGroup(group), mFunc(nullptr) { 89 mName = strndup(name, strlen(name)); 90} 91 92Batch::~Batch() { 93 for (CPUClosure* c : mClosures) { 94 delete c; 95 } 96 free(mName); 97} 98 99bool Batch::conflict(CPUClosure* cpuClosure) const { 100 if (mClosures.empty()) { 101 return false; 102 } 103 104 const Closure* closure = cpuClosure->mClosure; 105 106 if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 107 // An invoke should be in a batch by itself, so it conflicts with any other 108 // closure. 109 return true; 110 } 111 112 const auto& globalDeps = closure->mGlobalDeps; 113 const auto& argDeps = closure->mArgDeps; 114 115 for (CPUClosure* c : mClosures) { 116 const Closure* batched = c->mClosure; 117 if (globalDeps.find(batched) != globalDeps.end()) { 118 return true; 119 } 120 const auto& it = argDeps.find(batched); 121 if (it != argDeps.end()) { 122 const auto& args = (*it).second; 123 for (const auto &p1 : *args) { 124 if (p1.second.get() != nullptr) { 125 return true; 126 } 127 } 128 } 129 } 130 131 return false; 132} 133 134CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 135 const ScriptGroupBase *sg) : 136 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 137 mExecutable(nullptr), mScriptObj(nullptr) { 138 rsAssert(!mGroup->mClosures.empty()); 139 140 Batch* batch = new Batch(this, "Batch0"); 141 int i = 0; 142 for (Closure* closure: mGroup->mClosures) { 143 CPUClosure* cc; 144 const IDBase* funcID = closure->mFunctionID.get(); 145 RsdCpuScriptImpl* si = 146 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 147 if (closure->mIsKernel) { 148 MTLaunchStruct mtls; 149 si->forEachKernelSetup(funcID->mSlot, &mtls); 150 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 151 } else { 152 cc = new CPUClosure(closure, si); 153 } 154 155 if (batch->conflict(cc)) { 156 mBatches.push_back(batch); 157 std::stringstream ss; 158 ss << "Batch" << ++i; 159 batch = new Batch(this, ss.str().c_str()); 160 } 161 162 batch->mClosures.push_back(cc); 163 } 164 165 rsAssert(!batch->mClosures.empty()); 166 mBatches.push_back(batch); 167 168#ifndef RS_COMPATIBILITY_LIB 169 compile(mGroup->mCacheDir); 170 if (mScriptObj != nullptr && mExecutable != nullptr) { 171 for (Batch* batch : mBatches) { 172 batch->resolveFuncPtr(mScriptObj); 173 } 174 } 175#endif // RS_COMPATIBILITY_LIB 176} 177 178void Batch::resolveFuncPtr(void* sharedObj) { 179 std::string funcName(mName); 180 if (mClosures.front()->mClosure->mIsKernel) { 181 funcName.append(".expand"); 182 } 183 mFunc = dlsym(sharedObj, funcName.c_str()); 184 rsAssert (mFunc != nullptr); 185} 186 187CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 188 for (Batch* batch : mBatches) { 189 delete batch; 190 } 191 delete mExecutable; 192 // TODO: move this dlclose into ~ScriptExecutable(). 193 if (mScriptObj != nullptr) { 194 dlclose(mScriptObj); 195 } 196} 197 198namespace { 199 200#ifndef RS_COMPATIBILITY_LIB 201 202string getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 203 *coreLibRelaxedPath = ""; 204 205 // If we're debugging, use the debug library. 206 if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 207 return SYSLIBPATH"/libclcore_debug.bc"; 208 } 209 210 // Check for a platform specific library 211 212#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 213 // NEON-capable ARMv7a devices can use an accelerated math library 214 // for all reduced precision scripts. 215 // ARMv8 does not use NEON, as ASIMD can be used with all precision 216 // levels. 217 *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; 218#endif 219 220#if defined(__i386__) || defined(__x86_64__) 221 // x86 devices will use an optimized library. 222 return SYSLIBPATH"/libclcore_x86.bc"; 223#else 224 return SYSLIBPATH"/libclcore.bc"; 225#endif 226} 227 228void setupCompileArguments( 229 const vector<const char*>& inputs, const vector<string>& kernelBatches, 230 const vector<string>& invokeBatches, 231 const char* outputDir, const char* outputFileName, 232 const char* coreLibPath, const char* coreLibRelaxedPath, 233 const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant, 234 vector<const char*>* args) { 235 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 236 args->push_back("-fPIC"); 237 args->push_back("-embedRSInfo"); 238 if (emitGlobalInfo) { 239 args->push_back("-rs-global-info"); 240 if (emitGlobalInfoSkipConstant) { 241 args->push_back("-rs-global-info-skip-constant"); 242 } 243 } 244 args->push_back("-mtriple"); 245 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 246 args->push_back("-bclib"); 247 args->push_back(coreLibPath); 248 args->push_back("-bclib_relaxed"); 249 args->push_back(coreLibRelaxedPath); 250 for (const char* input : inputs) { 251 args->push_back(input); 252 } 253 for (const string& batch : kernelBatches) { 254 args->push_back("-merge"); 255 args->push_back(batch.c_str()); 256 } 257 for (const string& batch : invokeBatches) { 258 args->push_back("-invoke"); 259 args->push_back(batch.c_str()); 260 } 261 args->push_back("-output_path"); 262 args->push_back(outputDir); 263 args->push_back("-o"); 264 args->push_back(outputFileName); 265} 266 267void generateSourceSlot(RsdCpuReferenceImpl* ctxt, 268 const Closure& closure, 269 const std::vector<const char*>& inputs, 270 std::stringstream& ss) { 271 const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 272 const Script* script = funcID->mScript; 273 274 rsAssert (!script->isIntrinsic()); 275 276 const RsdCpuScriptImpl *cpuScript = 277 (const RsdCpuScriptImpl *)ctxt->lookupScript(script); 278 const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 279 280 const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 281 inputs.begin(); 282 283 ss << index << "," << funcID->mSlot << "."; 284} 285 286#endif // RS_COMPATIBILTY_LIB 287 288} // anonymous namespace 289 290void CpuScriptGroup2Impl::compile(const char* cacheDir) { 291#ifndef RS_COMPATIBILITY_LIB 292 if (mGroup->mClosures.size() < 2) { 293 return; 294 } 295 296 auto comparator = [](const char* str1, const char* str2) -> bool { 297 return strcmp(str1, str2) < 0; 298 }; 299 std::set<const char*, decltype(comparator)> inputSet(comparator); 300 301 for (Closure* closure : mGroup->mClosures) { 302 const Script* script = closure->mFunctionID.get()->mScript; 303 304 // If any script is an intrinsic, give up trying fusing the kernels. 305 if (script->isIntrinsic()) { 306 return; 307 } 308 309 const RsdCpuScriptImpl *cpuScript = 310 (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script); 311 312 const char* bitcodeFilename = cpuScript->getBitcodeFilePath(); 313 inputSet.insert(bitcodeFilename); 314 } 315 316 std::vector<const char*> inputs(inputSet.begin(), inputSet.end()); 317 318 std::vector<string> kernelBatches; 319 std::vector<string> invokeBatches; 320 321 int i = 0; 322 for (const auto& batch : mBatches) { 323 rsAssert(batch->size() > 0); 324 325 std::stringstream ss; 326 ss << batch->mName << ":"; 327 328 if (!batch->mClosures.front()->mClosure->mIsKernel) { 329 rsAssert(batch->size() == 1); 330 generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss); 331 invokeBatches.push_back(ss.str()); 332 } else { 333 for (const auto& cpuClosure : batch->mClosures) { 334 generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss); 335 } 336 kernelBatches.push_back(ss.str()); 337 } 338 } 339 340 rsAssert(cacheDir != nullptr); 341 string objFilePath(cacheDir); 342 objFilePath.append("/"); 343 objFilePath.append(mGroup->mName); 344 objFilePath.append(".o"); 345 346 const char* resName = mGroup->mName; 347 string coreLibRelaxedPath; 348 const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 349 &coreLibRelaxedPath); 350 351 vector<const char*> arguments; 352 bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo(); 353 bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant(); 354 setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 355 resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(), 356 emitGlobalInfo, emitGlobalInfoSkipConstant, 357 &arguments); 358 359 std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1, 360 arguments.data())); 361 362 inputs.push_back(coreLibPath.c_str()); 363 inputs.push_back(coreLibRelaxedPath.c_str()); 364 365 uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(), 366 inputs.data(), inputs.size()); 367 368 if (checksum == 0) { 369 return; 370 } 371 372 std::stringstream ss; 373 ss << std::hex << checksum; 374 const char* checksumStr = ss.str().c_str(); 375 376 //===--------------------------------------------------------------------===// 377 // Try to load a shared lib from code cache matching filename and checksum 378 //===--------------------------------------------------------------------===// 379 380 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 381 if (mScriptObj != nullptr) { 382 mExecutable = ScriptExecutable::createFromSharedObject( 383 getCpuRefImpl()->getContext(), mScriptObj, checksum); 384 if (mExecutable != nullptr) { 385 return; 386 } else { 387 ALOGE("Failed to create an executable object from so file"); 388 } 389 dlclose(mScriptObj); 390 mScriptObj = nullptr; 391 } 392 393 //===--------------------------------------------------------------------===// 394 // Fuse the input kernels and generate native code in an object file 395 //===--------------------------------------------------------------------===// 396 397 arguments.push_back("-build-checksum"); 398 arguments.push_back(checksumStr); 399 arguments.push_back(nullptr); 400 401 bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, 402 arguments.size()-1, 403 arguments.data()); 404 if (!compiled) { 405 return; 406 } 407 408 //===--------------------------------------------------------------------===// 409 // Create and load the shared lib 410 //===--------------------------------------------------------------------===// 411 412 if (!SharedLibraryUtils::createSharedLibrary( 413 getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) { 414 ALOGE("Failed to link object file '%s'", resName); 415 unlink(objFilePath.c_str()); 416 return; 417 } 418 419 unlink(objFilePath.c_str()); 420 421 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 422 if (mScriptObj == nullptr) { 423 ALOGE("Unable to load '%s'", resName); 424 return; 425 } 426 427 mExecutable = ScriptExecutable::createFromSharedObject( 428 getCpuRefImpl()->getContext(), 429 mScriptObj); 430 431#endif // RS_COMPATIBILITY_LIB 432} 433 434void CpuScriptGroup2Impl::execute() { 435 for (auto batch : mBatches) { 436 batch->setGlobalsForBatch(); 437 batch->run(); 438 } 439} 440 441void Batch::setGlobalsForBatch() { 442 for (CPUClosure* cpuClosure : mClosures) { 443 const Closure* closure = cpuClosure->mClosure; 444 const IDBase* funcID = closure->mFunctionID.get(); 445 Script* s = funcID->mScript;; 446 for (const auto& p : closure->mGlobals) { 447 const void* value = p.second.first; 448 int size = p.second.second; 449 if (value == nullptr && size == 0) { 450 // This indicates the current closure depends on another closure for a 451 // global in their shared module (script). In this case we don't need to 452 // copy the value. For example, an invoke intializes a global variable 453 // which a kernel later reads. 454 continue; 455 } 456 rsAssert(p.first != nullptr); 457 Script* script = p.first->mScript; 458 RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl(); 459 const RsdCpuScriptImpl *cpuScript = 460 (const RsdCpuScriptImpl *)ctxt->lookupScript(script); 461 int slot = p.first->mSlot; 462 ScriptExecutable* exec = mGroup->getExecutable(); 463 if (exec != nullptr) { 464 const char* varName = cpuScript->getFieldName(slot); 465 void* addr = exec->getFieldAddress(varName); 466 if (size < 0) { 467 rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 468 (rs_object_base*)addr, (ObjectBase*)value); 469 } else { 470 memcpy(addr, (const void*)&value, size); 471 } 472 } else { 473 // We use -1 size to indicate an ObjectBase rather than a primitive type 474 if (size < 0) { 475 s->setVarObj(slot, (ObjectBase*)value); 476 } else { 477 s->setVar(slot, (const void*)&value, size); 478 } 479 } 480 } 481 } 482} 483 484void Batch::run() { 485 if (!mClosures.front()->mClosure->mIsKernel) { 486 rsAssert(mClosures.size() == 1); 487 488 // This batch contains a single closure for an invoke function 489 CPUClosure* cc = mClosures.front(); 490 const Closure* c = cc->mClosure; 491 492 if (mFunc != nullptr) { 493 // TODO: Need align pointers for x86_64. 494 // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 495 ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 496 } else { 497 const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 498 rsAssert(invokeID != nullptr); 499 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 500 } 501 502 return; 503 } 504 505 if (mFunc != nullptr) { 506 MTLaunchStruct mtls; 507 const CPUClosure* firstCpuClosure = mClosures.front(); 508 const CPUClosure* lastCpuClosure = mClosures.back(); 509 510 firstCpuClosure->mSi->forEachMtlsSetup( 511 (const Allocation**)firstCpuClosure->mClosure->mArgs, 512 firstCpuClosure->mClosure->mNumArg, 513 lastCpuClosure->mClosure->mReturnValue, 514 nullptr, 0, nullptr, &mtls); 515 516 mtls.script = nullptr; 517 mtls.fep.usr = nullptr; 518 mtls.kernel = (ForEachFunc_t)mFunc; 519 520 mGroup->getCpuRefImpl()->launchThreads( 521 (const Allocation**)firstCpuClosure->mClosure->mArgs, 522 firstCpuClosure->mClosure->mNumArg, 523 lastCpuClosure->mClosure->mReturnValue, 524 nullptr, &mtls); 525 526 return; 527 } 528 529 for (CPUClosure* cpuClosure : mClosures) { 530 const Closure* closure = cpuClosure->mClosure; 531 const ScriptKernelID* kernelID = 532 (const ScriptKernelID*)closure->mFunctionID.get(); 533 cpuClosure->mSi->preLaunch(kernelID->mSlot, 534 (const Allocation**)closure->mArgs, 535 closure->mNumArg, closure->mReturnValue, 536 nullptr, 0, nullptr); 537 } 538 539 const CPUClosure* cpuClosure = mClosures.front(); 540 const Closure* closure = cpuClosure->mClosure; 541 MTLaunchStruct mtls; 542 543 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 544 closure->mNumArg, 545 closure->mReturnValue, 546 nullptr, 0, nullptr, &mtls)) { 547 548 mtls.script = nullptr; 549 mtls.kernel = (void (*)())&groupRoot; 550 mtls.fep.usr = &mClosures; 551 552 mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 553 } 554 555 for (CPUClosure* cpuClosure : mClosures) { 556 const Closure* closure = cpuClosure->mClosure; 557 const ScriptKernelID* kernelID = 558 (const ScriptKernelID*)closure->mFunctionID.get(); 559 cpuClosure->mSi->postLaunch(kernelID->mSlot, 560 (const Allocation**)closure->mArgs, 561 closure->mNumArg, closure->mReturnValue, 562 nullptr, 0, nullptr); 563 } 564} 565 566} // namespace renderscript 567} // namespace android 568