rsCpuScript.cpp revision 818cfa034e257c7bb48356257f5cb67334e19aa6
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsCpuCore.h" 18#include "rsCpuScript.h" 19 20#ifdef RS_COMPATIBILITY_LIB 21 #include <set> 22 #include <string> 23 #include <dlfcn.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <sys/stat.h> 28 #include <unistd.h> 29#else 30 #include <bcc/BCCContext.h> 31 #include <bcc/Config/Config.h> 32 #include <bcc/Renderscript/RSCompilerDriver.h> 33 #include <bcc/Renderscript/RSExecutable.h> 34 #include <bcc/Renderscript/RSInfo.h> 35 #include <bcinfo/MetadataExtractor.h> 36 #include <cutils/properties.h> 37 38 #include <sys/types.h> 39 #include <sys/wait.h> 40 #include <unistd.h> 41 42 #include <string> 43 #include <vector> 44#endif 45 46namespace { 47#ifdef RS_COMPATIBILITY_LIB 48 49// Create a len length string containing random characters from [A-Za-z0-9]. 50static std::string getRandomString(size_t len) { 51 char buf[len + 1]; 52 for (size_t i = 0; i < len; i++) { 53 uint32_t r = arc4random() & 0xffff; 54 r %= 62; 55 if (r < 26) { 56 // lowercase 57 buf[i] = 'a' + r; 58 } else if (r < 52) { 59 // uppercase 60 buf[i] = 'A' + (r - 26); 61 } else { 62 // Use a number 63 buf[i] = '0' + (r - 52); 64 } 65 } 66 buf[len] = '\0'; 67 return std::string(buf); 68} 69 70// Check if a path exists and attempt to create it if it doesn't. 71static bool ensureCacheDirExists(const char *path) { 72 if (access(path, R_OK | W_OK | X_OK) == 0) { 73 // Done if we can rwx the directory 74 return true; 75 } 76 if (mkdir(path, 0700) == 0) { 77 return true; 78 } 79 return false; 80} 81 82// Attempt to load the shared library from origName, but then fall back to 83// creating the symlinked shared library if necessary (to ensure instancing). 84// This function returns the dlopen()-ed handle if successful. 85static void *loadSOHelper(const char *origName, const char *cacheDir, 86 const char *resName) { 87 // Keep track of which .so libraries have been loaded. Once a library is 88 // in the set (per-process granularity), we must instead make a symlink to 89 // the original shared object (randomly named .so file) and load that one 90 // instead. If we don't do this, we end up aliasing global data between 91 // the various Script instances (which are supposed to be completely 92 // independent). 93 static std::set<std::string> LoadedLibraries; 94 95 void *loaded = NULL; 96 97 // Skip everything if we don't even have the original library available. 98 if (access(origName, F_OK) != 0) { 99 return NULL; 100 } 101 102 // Common path is that we have not loaded this Script/library before. 103 if (LoadedLibraries.find(origName) == LoadedLibraries.end()) { 104 loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL); 105 if (loaded) { 106 LoadedLibraries.insert(origName); 107 } 108 return loaded; 109 } 110 111 std::string newName(cacheDir); 112 newName.append("/com.android.renderscript.cache/"); 113 114 if (!ensureCacheDirExists(newName.c_str())) { 115 ALOGE("Could not verify or create cache dir: %s", cacheDir); 116 return NULL; 117 } 118 119 // Construct an appropriately randomized filename for the symlink. 120 newName.append("librs."); 121 newName.append(resName); 122 newName.append("#"); 123 newName.append(getRandomString(6)); // 62^6 potential filename variants. 124 newName.append(".so"); 125 126 int r = symlink(origName, newName.c_str()); 127 if (r != 0) { 128 ALOGE("Could not create symlink %s -> %s", newName.c_str(), origName); 129 return NULL; 130 } 131 loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL); 132 r = unlink(newName.c_str()); 133 if (r != 0) { 134 ALOGE("Could not unlink symlink %s", newName.c_str()); 135 } 136 if (loaded) { 137 LoadedLibraries.insert(newName.c_str()); 138 } 139 140 return loaded; 141} 142 143// Load the shared library referred to by cacheDir and resName. If we have 144// already loaded this library, we instead create a new symlink (in the 145// cache dir) and then load that. We then immediately destroy the symlink. 146// This is required behavior to implement script instancing for the support 147// library, since shared objects are loaded and de-duped by name only. 148static void *loadSharedLibrary(const char *cacheDir, const char *resName) { 149 void *loaded = NULL; 150 //arc4random_stir(); 151#ifndef RS_SERVER 152 std::string scriptSOName(cacheDir); 153 size_t cutPos = scriptSOName.rfind("cache"); 154 if (cutPos != std::string::npos) { 155 scriptSOName.erase(cutPos); 156 } else { 157 ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir); 158 } 159 scriptSOName.append("/lib/librs."); 160#else 161 std::string scriptSOName("lib"); 162#endif 163 scriptSOName.append(resName); 164 scriptSOName.append(".so"); 165 166 // We should check if we can load the library from the standard app 167 // location for shared libraries first. 168 loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName); 169 170 if (loaded == NULL) { 171 ALOGE("Unable to open shared library (%s): %s", 172 scriptSOName.c_str(), dlerror()); 173 174 // One final attempt to find the library in "/system/lib". 175 // We do this to allow bundled applications to use the compatibility 176 // library fallback path. Those applications don't have a private 177 // library path, so they need to install to the system directly. 178 // Note that this is really just a testing path. 179 android::String8 scriptSONameSystem("/system/lib/librs."); 180 scriptSONameSystem.append(resName); 181 scriptSONameSystem.append(".so"); 182 loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir, 183 resName); 184 if (loaded == NULL) { 185 ALOGE("Unable to open system shared library (%s): %s", 186 scriptSONameSystem.c_str(), dlerror()); 187 } 188 } 189 190 return loaded; 191} 192 193#else // RS_COMPATIBILITY_LIB is not defined 194 195static bool is_force_recompile() { 196#ifdef RS_SERVER 197 return false; 198#else 199 char buf[PROPERTY_VALUE_MAX]; 200 201 // Re-compile if floating point precision has been overridden. 202 property_get("debug.rs.precision", buf, ""); 203 if (buf[0] != '\0') { 204 return true; 205 } 206 207 // Re-compile if debug.rs.forcerecompile is set. 208 property_get("debug.rs.forcerecompile", buf, "0"); 209 if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) { 210 return true; 211 } else { 212 return false; 213 } 214#endif // RS_SERVER 215} 216 217const static char *BCC_EXE_PATH = "/system/bin/bcc"; 218 219static void setCompileArguments(std::vector<const char*>* args, const android::String8& bcFileName, 220 const char* cacheDir, const char* resName, const char* core_lib, 221 bool useRSDebugContext, const char* bccPluginName) { 222 rsAssert(cacheDir && resName && core_lib); 223 args->push_back(BCC_EXE_PATH); 224 args->push_back("-o"); 225 args->push_back(resName); 226 args->push_back("-output_path"); 227 args->push_back(cacheDir); 228 args->push_back("-bclib"); 229 args->push_back(core_lib); 230 args->push_back("-mtriple"); 231 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 232 233 // Execute the bcc compiler. 234 if (useRSDebugContext) { 235 args->push_back("-rs-debug-ctx"); 236 } else { 237 // Only load additional libraries for compiles that don't use 238 // the debug context. 239 if (bccPluginName && strlen(bccPluginName) > 0) { 240 args->push_back("-load"); 241 args->push_back(bccPluginName); 242 } 243 } 244 245 args->push_back(bcFileName.string()); 246 args->push_back(NULL); 247} 248 249static bool compileBitcode(const android::String8& bcFileName, 250 const char *bitcode, 251 size_t bitcodeSize, 252 const char** compileArguments, 253 const std::string& compileCommandLine) { 254 rsAssert(bitcode && bitcodeSize); 255 256 FILE *bcfile = fopen(bcFileName.string(), "w"); 257 if (!bcfile) { 258 ALOGE("Could not write to %s", bcFileName.string()); 259 return false; 260 } 261 size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile); 262 fclose(bcfile); 263 if (nwritten != bitcodeSize) { 264 ALOGE("Could not write %zu bytes to %s", bitcodeSize, 265 bcFileName.string()); 266 return false; 267 } 268 269 pid_t pid = fork(); 270 271 switch (pid) { 272 case -1: { // Error occurred (we attempt no recovery) 273 ALOGE("Couldn't fork for bcc compiler execution"); 274 return false; 275 } 276 case 0: { // Child process 277 ALOGV("Invoking BCC with: %s", compileCommandLine.c_str()); 278 execv(BCC_EXE_PATH, (char* const*)compileArguments); 279 280 ALOGE("execv() failed: %s", strerror(errno)); 281 abort(); 282 return false; 283 } 284 default: { // Parent process (actual driver) 285 // Wait on child process to finish compiling the source. 286 int status = 0; 287 pid_t w = waitpid(pid, &status, 0); 288 if (w == -1) { 289 ALOGE("Could not wait for bcc compiler"); 290 return false; 291 } 292 293 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 294 return true; 295 } 296 297 ALOGE("bcc compiler terminated unexpectedly"); 298 return false; 299 } 300 } 301} 302 303#endif // !defined(RS_COMPATIBILITY_LIB) 304} // namespace 305 306namespace android { 307namespace renderscript { 308 309#ifdef RS_COMPATIBILITY_LIB 310#define MAXLINE 500 311#define MAKE_STR_HELPER(S) #S 312#define MAKE_STR(S) MAKE_STR_HELPER(S) 313#define EXPORT_VAR_STR "exportVarCount: " 314#define EXPORT_FUNC_STR "exportFuncCount: " 315#define EXPORT_FOREACH_STR "exportForEachCount: " 316#define OBJECT_SLOT_STR "objectSlotCount: " 317 318// Copy up to a newline or size chars from str -> s, updating str 319// Returns s when successful and NULL when '\0' is finally reached. 320static char* strgets(char *s, int size, const char **ppstr) { 321 if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) { 322 return NULL; 323 } 324 325 int i; 326 for (i = 0; i < (size - 1); i++) { 327 s[i] = **ppstr; 328 (*ppstr)++; 329 if (s[i] == '\0') { 330 return s; 331 } else if (s[i] == '\n') { 332 s[i+1] = '\0'; 333 return s; 334 } 335 } 336 337 // size has been exceeded. 338 s[i] = '\0'; 339 340 return s; 341} 342#endif 343 344RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) { 345 mCtx = ctx; 346 mScript = s; 347 348#ifdef RS_COMPATIBILITY_LIB 349 mScriptSO = NULL; 350 mInvokeFunctions = NULL; 351 mForEachFunctions = NULL; 352 mFieldAddress = NULL; 353 mFieldIsObject = NULL; 354 mForEachSignatures = NULL; 355#else 356 mCompilerContext = NULL; 357 mCompilerDriver = NULL; 358 mExecutable = NULL; 359#endif 360 361 362 mRoot = NULL; 363 mRootExpand = NULL; 364 mInit = NULL; 365 mFreeChildren = NULL; 366 367 368 mBoundAllocs = NULL; 369 mIntrinsicData = NULL; 370 mIsThreadable = true; 371} 372 373 374bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir, 375 uint8_t const *bitcode, size_t bitcodeSize, 376 uint32_t flags, char const *bccPluginName) { 377 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 378 //ALOGE("rsdScriptInit %p %p", rsc, script); 379 380 mCtx->lockMutex(); 381#ifndef RS_COMPATIBILITY_LIB 382 bool useRSDebugContext = false; 383 384 mCompilerContext = NULL; 385 mCompilerDriver = NULL; 386 mExecutable = NULL; 387 388 mCompilerContext = new bcc::BCCContext(); 389 if (mCompilerContext == NULL) { 390 ALOGE("bcc: FAILS to create compiler context (out of memory)"); 391 mCtx->unlockMutex(); 392 return false; 393 } 394 395 mCompilerDriver = new bcc::RSCompilerDriver(); 396 if (mCompilerDriver == NULL) { 397 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 398 mCtx->unlockMutex(); 399 return false; 400 } 401 402 // Configure symbol resolvers (via compiler-rt and the RS runtime). 403 mRSRuntime.setLookupFunction(lookupRuntimeStub); 404 mRSRuntime.setContext(this); 405 mResolver.chainResolver(mCompilerRuntime); 406 mResolver.chainResolver(mRSRuntime); 407 408 // Run any compiler setup functions we have been provided with. 409 RSSetupCompilerCallback setupCompilerCallback = 410 mCtx->getSetupCompilerCallback(); 411 if (setupCompilerCallback != NULL) { 412 setupCompilerCallback(mCompilerDriver); 413 } 414 415 bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize); 416 if (!bitcodeMetadata.extract()) { 417 ALOGE("Could not extract metadata from bitcode"); 418 mCtx->unlockMutex(); 419 return false; 420 } 421 422 const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize); 423 424 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 425 mCompilerDriver->setDebugContext(true); 426 useRSDebugContext = true; 427 } 428 429 android::String8 bcFileName(cacheDir); 430 bcFileName.append("/"); 431 bcFileName.append(resName); 432 bcFileName.append(".bc"); 433 434 std::vector<const char*> compileArguments; 435 setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib, 436 useRSDebugContext, bccPluginName); 437 // The last argument of compileArguments ia a NULL, so remove 1 from the size. 438 std::string compileCommandLine = 439 bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data()); 440 441 if (!is_force_recompile()) { 442 // Load the compiled script that's in the cache, if any. 443 mExecutable = bcc::RSCompilerDriver::loadScript(cacheDir, resName, (const char*)bitcode, 444 bitcodeSize, compileCommandLine.c_str(), 445 mResolver); 446 } 447 448 // If we can't, it's either not there or out of date. We compile the bit code and try loading 449 // again. 450 if (mExecutable == NULL) { 451 if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize, compileArguments.data(), 452 compileCommandLine)) { 453 ALOGE("bcc: FAILS to compile '%s'", resName); 454 mCtx->unlockMutex(); 455 return false; 456 } 457 mExecutable = bcc::RSCompilerDriver::loadScript(cacheDir, resName, (const char*)bitcode, 458 bitcodeSize, compileCommandLine.c_str(), 459 mResolver); 460 if (mExecutable == NULL) { 461 ALOGE("bcc: FAILS to load freshly compiled executable for '%s'", resName); 462 mCtx->unlockMutex(); 463 return false; 464 } 465 } 466 467 mExecutable->setThreadable(mIsThreadable); 468 if (!mExecutable->syncInfo()) { 469 ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); 470 } 471 472 mRoot = reinterpret_cast<int (*)()>(mExecutable->getSymbolAddress("root")); 473 mRootExpand = 474 reinterpret_cast<int (*)()>(mExecutable->getSymbolAddress("root.expand")); 475 mInit = reinterpret_cast<void (*)()>(mExecutable->getSymbolAddress("init")); 476 mFreeChildren = 477 reinterpret_cast<void (*)()>(mExecutable->getSymbolAddress(".rs.dtor")); 478 479 480 if (bitcodeMetadata.getExportVarCount()) { 481 mBoundAllocs = new Allocation *[bitcodeMetadata.getExportVarCount()]; 482 memset(mBoundAllocs, 0, sizeof(void *) * bitcodeMetadata.getExportVarCount()); 483 } 484 485 for (size_t i = 0; i < bitcodeMetadata.getExportForEachSignatureCount(); i++) { 486 char* name = new char[strlen(bitcodeMetadata.getExportForEachNameList()[i]) + 1]; 487 mExportedForEachFuncList.push_back( 488 std::make_pair(name, bitcodeMetadata.getExportForEachSignatureList()[i])); 489 } 490 491#else // RS_COMPATIBILITY_LIB is defined 492 493 mScriptSO = loadSharedLibrary(cacheDir, resName); 494 495 if (mScriptSO) { 496 char line[MAXLINE]; 497 mRoot = (RootFunc_t) dlsym(mScriptSO, "root"); 498 if (mRoot) { 499 //ALOGE("Found root(): %p", mRoot); 500 } 501 mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand"); 502 if (mRootExpand) { 503 //ALOGE("Found root.expand(): %p", mRootExpand); 504 } 505 mInit = (InvokeFunc_t) dlsym(mScriptSO, "init"); 506 if (mInit) { 507 //ALOGE("Found init(): %p", mInit); 508 } 509 mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor"); 510 if (mFreeChildren) { 511 //ALOGE("Found .rs.dtor(): %p", mFreeChildren); 512 } 513 514 const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info"); 515 if (rsInfo) { 516 //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo); 517 } 518 519 size_t varCount = 0; 520 if (strgets(line, MAXLINE, &rsInfo) == NULL) { 521 goto error; 522 } 523 if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) { 524 ALOGE("Invalid export var count!: %s", line); 525 goto error; 526 } 527 528 mExportedVariableCount = varCount; 529 //ALOGE("varCount: %zu", varCount); 530 if (varCount > 0) { 531 // Start by creating/zeroing this member, since we don't want to 532 // accidentally clean up invalid pointers later (if we error out). 533 mFieldIsObject = new bool[varCount]; 534 if (mFieldIsObject == NULL) { 535 goto error; 536 } 537 memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject)); 538 mFieldAddress = new void*[varCount]; 539 if (mFieldAddress == NULL) { 540 goto error; 541 } 542 for (size_t i = 0; i < varCount; ++i) { 543 if (strgets(line, MAXLINE, &rsInfo) == NULL) { 544 goto error; 545 } 546 char *c = strrchr(line, '\n'); 547 if (c) { 548 *c = '\0'; 549 } 550 mFieldAddress[i] = dlsym(mScriptSO, line); 551 if (mFieldAddress[i] == NULL) { 552 ALOGE("Failed to find variable address for %s: %s", 553 line, dlerror()); 554 // Not a critical error if we don't find a global variable. 555 } 556 else { 557 //ALOGE("Found variable %s at %p", line, 558 //mFieldAddress[i]); 559 } 560 } 561 } 562 563 size_t funcCount = 0; 564 if (strgets(line, MAXLINE, &rsInfo) == NULL) { 565 goto error; 566 } 567 if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) { 568 ALOGE("Invalid export func count!: %s", line); 569 goto error; 570 } 571 572 mExportedFunctionCount = funcCount; 573 //ALOGE("funcCount: %zu", funcCount); 574 575 if (funcCount > 0) { 576 mInvokeFunctions = new InvokeFunc_t[funcCount]; 577 if (mInvokeFunctions == NULL) { 578 goto error; 579 } 580 for (size_t i = 0; i < funcCount; ++i) { 581 if (strgets(line, MAXLINE, &rsInfo) == NULL) { 582 goto error; 583 } 584 char *c = strrchr(line, '\n'); 585 if (c) { 586 *c = '\0'; 587 } 588 589 mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line); 590 if (mInvokeFunctions[i] == NULL) { 591 ALOGE("Failed to get function address for %s(): %s", 592 line, dlerror()); 593 goto error; 594 } 595 else { 596 //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]); 597 } 598 } 599 } 600 601 size_t forEachCount = 0; 602 if (strgets(line, MAXLINE, &rsInfo) == NULL) { 603 goto error; 604 } 605 if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) { 606 ALOGE("Invalid export forEach count!: %s", line); 607 goto error; 608 } 609 610 if (forEachCount > 0) { 611 612 mForEachSignatures = new uint32_t[forEachCount]; 613 if (mForEachSignatures == NULL) { 614 goto error; 615 } 616 mForEachFunctions = new ForEachFunc_t[forEachCount]; 617 if (mForEachFunctions == NULL) { 618 goto error; 619 } 620 for (size_t i = 0; i < forEachCount; ++i) { 621 unsigned int tmpSig = 0; 622 char tmpName[MAXLINE]; 623 624 if (strgets(line, MAXLINE, &rsInfo) == NULL) { 625 goto error; 626 } 627 if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s", 628 &tmpSig, tmpName) != 2) { 629 ALOGE("Invalid export forEach!: %s", line); 630 goto error; 631 } 632 633 // Lookup the expanded ForEach kernel. 634 strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName)); 635 mForEachSignatures[i] = tmpSig; 636 mForEachFunctions[i] = 637 (ForEachFunc_t) dlsym(mScriptSO, tmpName); 638 if (i != 0 && mForEachFunctions[i] == NULL) { 639 // Ignore missing root.expand functions. 640 // root() is always specified at location 0. 641 ALOGE("Failed to find forEach function address for %s: %s", 642 tmpName, dlerror()); 643 goto error; 644 } 645 else { 646 //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]); 647 } 648 } 649 } 650 651 size_t objectSlotCount = 0; 652 if (strgets(line, MAXLINE, &rsInfo) == NULL) { 653 goto error; 654 } 655 if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) { 656 ALOGE("Invalid object slot count!: %s", line); 657 goto error; 658 } 659 660 if (objectSlotCount > 0) { 661 rsAssert(varCount > 0); 662 for (size_t i = 0; i < objectSlotCount; ++i) { 663 uint32_t varNum = 0; 664 if (strgets(line, MAXLINE, &rsInfo) == NULL) { 665 goto error; 666 } 667 if (sscanf(line, "%u", &varNum) != 1) { 668 ALOGE("Invalid object slot!: %s", line); 669 goto error; 670 } 671 672 if (varNum < varCount) { 673 mFieldIsObject[varNum] = true; 674 } 675 } 676 } 677 678 if (varCount > 0) { 679 mBoundAllocs = new Allocation *[varCount]; 680 memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs)); 681 } 682 683 if (mScriptSO == (void*)1) { 684 //rsdLookupRuntimeStub(script, "acos"); 685 } 686 } else { 687 goto error; 688 } 689#endif 690 mCtx->unlockMutex(); 691 return true; 692 693#ifdef RS_COMPATIBILITY_LIB 694error: 695 696 mCtx->unlockMutex(); 697 delete[] mInvokeFunctions; 698 delete[] mForEachFunctions; 699 delete[] mFieldAddress; 700 delete[] mFieldIsObject; 701 delete[] mForEachSignatures; 702 delete[] mBoundAllocs; 703 if (mScriptSO) { 704 dlclose(mScriptSO); 705 } 706 return false; 707#endif 708} 709 710#ifndef RS_COMPATIBILITY_LIB 711 712#ifdef __LP64__ 713#define SYSLIBPATH "/system/lib64" 714#else 715#define SYSLIBPATH "/system/lib" 716#endif 717 718const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode, 719 size_t bitcodeSize) { 720 const char* defaultLib = SYSLIBPATH"/libclcore.bc"; 721 722 // If we're debugging, use the debug library. 723 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 724 return SYSLIBPATH"/libclcore_debug.bc"; 725 } 726 727 // If a callback has been registered to specify a library, use that. 728 RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback(); 729 if (selectRTCallback != NULL) { 730 return selectRTCallback((const char*)bitcode, bitcodeSize); 731 } 732 733 // Check for a platform specific library 734#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 735 enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision(); 736 if (prec == bcinfo::RS_FP_Relaxed) { 737 // NEON-capable ARMv7a devices can use an accelerated math library 738 // for all reduced precision scripts. 739 // ARMv8 does not use NEON, as ASIMD can be used with all precision 740 // levels. 741 return SYSLIBPATH"/libclcore_neon.bc"; 742 } else { 743 return defaultLib; 744 } 745#elif defined(__i386__) || defined(__x86_64__) 746 // x86 devices will use an optimized library. 747 return SYSLIBPATH"/libclcore_x86.bc"; 748#else 749 return defaultLib; 750#endif 751} 752 753#endif 754 755void RsdCpuScriptImpl::populateScript(Script *script) { 756#ifndef RS_COMPATIBILITY_LIB 757 // Copy info over to runtime 758 script->mHal.info.exportedFunctionCount = mExecutable->getExportFuncAddrs().size(); 759 script->mHal.info.exportedVariableCount = mExecutable->getExportVarAddrs().size(); 760 script->mHal.info.exportedForeachFuncList = &mExportedForEachFuncList[0]; 761 script->mHal.info.exportedPragmaCount = mExecutable->getPragmaKeys().size(); 762 script->mHal.info.exportedPragmaKeyList = 763 const_cast<const char**>(mExecutable->getPragmaKeys().array()); 764 script->mHal.info.exportedPragmaValueList = 765 const_cast<const char**>(mExecutable->getPragmaValues().array()); 766 767 if (mRootExpand) { 768 script->mHal.info.root = mRootExpand; 769 } else { 770 script->mHal.info.root = mRoot; 771 } 772#else 773 // Copy info over to runtime 774 script->mHal.info.exportedFunctionCount = mExportedFunctionCount; 775 script->mHal.info.exportedVariableCount = mExportedVariableCount; 776 script->mHal.info.exportedPragmaCount = 0; 777 script->mHal.info.exportedPragmaKeyList = 0; 778 script->mHal.info.exportedPragmaValueList = 0; 779 780 // Bug, need to stash in metadata 781 if (mRootExpand) { 782 script->mHal.info.root = mRootExpand; 783 } else { 784 script->mHal.info.root = mRoot; 785 } 786#endif 787} 788 789 790typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 791 792void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, 793 uint32_t inLen, 794 Allocation * aout, 795 const void * usr, uint32_t usrLen, 796 const RsScriptCall *sc, 797 MTLaunchStruct *mtls) { 798 799 memset(mtls, 0, sizeof(MTLaunchStruct)); 800 801 for (int index = inLen; --index >= 0;) { 802 const Allocation* ain = ains[index]; 803 804 // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface 805 if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { 806 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 807 "rsForEach called with null in allocations"); 808 return; 809 } 810 } 811 812 if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { 813 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 814 "rsForEach called with null out allocations"); 815 return; 816 } 817 818 if (inLen > 0) { 819 const Allocation *ain0 = ains[0]; 820 const Type *inType = ain0->getType(); 821 822 mtls->fep.dimX = inType->getDimX(); 823 mtls->fep.dimY = inType->getDimY(); 824 mtls->fep.dimZ = inType->getDimZ(); 825 826 for (int Index = inLen; --Index >= 1;) { 827 if (!ain0->hasSameDims(ains[Index])) { 828 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 829 "Failed to launch kernel; dimensions of input and output allocations do not match."); 830 831 return; 832 } 833 } 834 835 } else if (aout != NULL) { 836 const Type *outType = aout->getType(); 837 838 mtls->fep.dimX = outType->getDimX(); 839 mtls->fep.dimY = outType->getDimY(); 840 mtls->fep.dimZ = outType->getDimZ(); 841 842 } else { 843 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 844 "rsForEach called with null allocations"); 845 return; 846 } 847 848 if (inLen > 0 && aout != NULL) { 849 if (!ains[0]->hasSameDims(aout)) { 850 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 851 "Failed to launch kernel; dimensions of input and output allocations do not match."); 852 853 return; 854 } 855 } 856 857 if (!sc || (sc->xEnd == 0)) { 858 mtls->xEnd = mtls->fep.dimX; 859 } else { 860 rsAssert(sc->xStart < mtls->fep.dimX); 861 rsAssert(sc->xEnd <= mtls->fep.dimX); 862 rsAssert(sc->xStart < sc->xEnd); 863 mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); 864 mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); 865 if (mtls->xStart >= mtls->xEnd) return; 866 } 867 868 if (!sc || (sc->yEnd == 0)) { 869 mtls->yEnd = mtls->fep.dimY; 870 } else { 871 rsAssert(sc->yStart < mtls->fep.dimY); 872 rsAssert(sc->yEnd <= mtls->fep.dimY); 873 rsAssert(sc->yStart < sc->yEnd); 874 mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); 875 mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); 876 if (mtls->yStart >= mtls->yEnd) return; 877 } 878 879 if (!sc || (sc->zEnd == 0)) { 880 mtls->zEnd = mtls->fep.dimZ; 881 } else { 882 rsAssert(sc->zStart < mtls->fep.dimZ); 883 rsAssert(sc->zEnd <= mtls->fep.dimZ); 884 rsAssert(sc->zStart < sc->zEnd); 885 mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart); 886 mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd); 887 if (mtls->zStart >= mtls->zEnd) return; 888 } 889 890 mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); 891 mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); 892 mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); 893 mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); 894 895 rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0)); 896 897 mtls->rsc = mCtx; 898 mtls->ains = ains; 899 mtls->aout = aout; 900 mtls->fep.usr = usr; 901 mtls->fep.usrLen = usrLen; 902 mtls->mSliceSize = 1; 903 mtls->mSliceNum = 0; 904 905 mtls->fep.inPtrs = NULL; 906 mtls->fep.inStrides = NULL; 907 mtls->isThreadable = mIsThreadable; 908 909 if (inLen > 0) { 910 911 if (inLen <= RS_KERNEL_INPUT_THRESHOLD) { 912 mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff; 913 mtls->fep.inStrides = mtls->inStridesBuff; 914 } else { 915 mtls->fep.heapAllocatedArrays = true; 916 917 mtls->fep.inPtrs = new const uint8_t*[inLen]; 918 mtls->fep.inStrides = new StridePair[inLen]; 919 } 920 921 mtls->fep.inLen = inLen; 922 923 for (int index = inLen; --index >= 0;) { 924 const Allocation *ain = ains[index]; 925 926 mtls->fep.inPtrs[index] = 927 (const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr; 928 929 mtls->fep.inStrides[index].eStride = 930 ain->getType()->getElementSizeBytes(); 931 mtls->fep.inStrides[index].yStride = 932 ain->mHal.drvState.lod[0].stride; 933 } 934 } 935 936 mtls->fep.outPtr = NULL; 937 mtls->fep.outStride.eStride = 0; 938 mtls->fep.outStride.yStride = 0; 939 if (aout != NULL) { 940 mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; 941 942 mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes(); 943 mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride; 944 } 945} 946 947 948void RsdCpuScriptImpl::invokeForEach(uint32_t slot, 949 const Allocation ** ains, 950 uint32_t inLen, 951 Allocation * aout, 952 const void * usr, 953 uint32_t usrLen, 954 const RsScriptCall *sc) { 955 956 MTLaunchStruct mtls; 957 958 forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls); 959 forEachKernelSetup(slot, &mtls); 960 961 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 962 mCtx->launchThreads(ains, inLen, aout, sc, &mtls); 963 mCtx->setTLS(oldTLS); 964} 965 966void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { 967 mtls->script = this; 968 mtls->fep.slot = slot; 969#ifndef RS_COMPATIBILITY_LIB 970 rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size()); 971 mtls->kernel = reinterpret_cast<ForEachFunc_t>( 972 mExecutable->getExportForeachFuncAddrs()[slot]); 973 rsAssert(mtls->kernel != NULL); 974 mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second; 975#else 976 mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]); 977 rsAssert(mtls->kernel != NULL); 978 mtls->sig = mForEachSignatures[slot]; 979#endif 980} 981 982int RsdCpuScriptImpl::invokeRoot() { 983 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 984 int ret = mRoot(); 985 mCtx->setTLS(oldTLS); 986 return ret; 987} 988 989void RsdCpuScriptImpl::invokeInit() { 990 if (mInit) { 991 mInit(); 992 } 993} 994 995void RsdCpuScriptImpl::invokeFreeChildren() { 996 if (mFreeChildren) { 997 mFreeChildren(); 998 } 999} 1000 1001void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, 1002 size_t paramLength) { 1003 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 1004 1005 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 1006 reinterpret_cast<void (*)(const void *, uint32_t)>( 1007#ifndef RS_COMPATIBILITY_LIB 1008 mExecutable->getExportFuncAddrs()[slot])(params, paramLength); 1009#else 1010 mInvokeFunctions[slot])(params, paramLength); 1011#endif 1012 mCtx->setTLS(oldTLS); 1013} 1014 1015void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 1016 //rsAssert(!script->mFieldIsObject[slot]); 1017 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 1018 1019 //if (mIntrinsicID) { 1020 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); 1021 //return; 1022 //} 1023 1024#ifndef RS_COMPATIBILITY_LIB 1025 int32_t *destPtr = reinterpret_cast<int32_t *>( 1026 mExecutable->getExportVarAddrs()[slot]); 1027#else 1028 int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1029#endif 1030 if (!destPtr) { 1031 //ALOGV("Calling setVar on slot = %i which is null", slot); 1032 return; 1033 } 1034 1035 memcpy(destPtr, data, dataLength); 1036} 1037 1038void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) { 1039 //rsAssert(!script->mFieldIsObject[slot]); 1040 //ALOGE("getGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 1041 1042#ifndef RS_COMPATIBILITY_LIB 1043 int32_t *srcPtr = reinterpret_cast<int32_t *>( 1044 mExecutable->getExportVarAddrs()[slot]); 1045#else 1046 int32_t *srcPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1047#endif 1048 if (!srcPtr) { 1049 //ALOGV("Calling setVar on slot = %i which is null", slot); 1050 return; 1051 } 1052 memcpy(data, srcPtr, dataLength); 1053} 1054 1055 1056void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, 1057 const Element *elem, 1058 const uint32_t *dims, size_t dimLength) { 1059 1060#ifndef RS_COMPATIBILITY_LIB 1061 int32_t *destPtr = reinterpret_cast<int32_t *>( 1062 mExecutable->getExportVarAddrs()[slot]); 1063#else 1064 int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1065#endif 1066 if (!destPtr) { 1067 //ALOGV("Calling setVar on slot = %i which is null", slot); 1068 return; 1069 } 1070 1071 // We want to look at dimension in terms of integer components, 1072 // but dimLength is given in terms of bytes. 1073 dimLength /= sizeof(int); 1074 1075 // Only a single dimension is currently supported. 1076 rsAssert(dimLength == 1); 1077 if (dimLength == 1) { 1078 // First do the increment loop. 1079 size_t stride = elem->getSizeBytes(); 1080 const char *cVal = reinterpret_cast<const char *>(data); 1081 for (uint32_t i = 0; i < dims[0]; i++) { 1082 elem->incRefs(cVal); 1083 cVal += stride; 1084 } 1085 1086 // Decrement loop comes after (to prevent race conditions). 1087 char *oldVal = reinterpret_cast<char *>(destPtr); 1088 for (uint32_t i = 0; i < dims[0]; i++) { 1089 elem->decRefs(oldVal); 1090 oldVal += stride; 1091 } 1092 } 1093 1094 memcpy(destPtr, data, dataLength); 1095} 1096 1097void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) { 1098 1099 //rsAssert(!script->mFieldIsObject[slot]); 1100 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 1101 1102#ifndef RS_COMPATIBILITY_LIB 1103 int32_t *destPtr = reinterpret_cast<int32_t *>( 1104 mExecutable->getExportVarAddrs()[slot]); 1105#else 1106 int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1107#endif 1108 if (!destPtr) { 1109 //ALOGV("Calling setVar on slot = %i which is null", slot); 1110 return; 1111 } 1112 1113 void *ptr = NULL; 1114 mBoundAllocs[slot] = data; 1115 if(data) { 1116 ptr = data->mHal.drvState.lod[0].mallocPtr; 1117 } 1118 memcpy(destPtr, &ptr, sizeof(void *)); 1119} 1120 1121void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { 1122 1123 //rsAssert(script->mFieldIsObject[slot]); 1124 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 1125 1126#ifndef RS_COMPATIBILITY_LIB 1127 int32_t *destPtr = reinterpret_cast<int32_t *>( 1128 mExecutable->getExportVarAddrs()[slot]); 1129#else 1130 int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1131#endif 1132 1133 if (!destPtr) { 1134 //ALOGV("Calling setVar on slot = %i which is null", slot); 1135 return; 1136 } 1137 1138 rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data); 1139} 1140 1141RsdCpuScriptImpl::~RsdCpuScriptImpl() { 1142#ifndef RS_COMPATIBILITY_LIB 1143 if (mExecutable) { 1144 Vector<void *>::const_iterator var_addr_iter = 1145 mExecutable->getExportVarAddrs().begin(); 1146 Vector<void *>::const_iterator var_addr_end = 1147 mExecutable->getExportVarAddrs().end(); 1148 1149 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = 1150 mExecutable->getInfo().getObjectSlots().begin(); 1151 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = 1152 mExecutable->getInfo().getObjectSlots().end(); 1153 1154 while ((var_addr_iter != var_addr_end) && 1155 (is_object_iter != is_object_end)) { 1156 // The field address can be NULL if the script-side has optimized 1157 // the corresponding global variable away. 1158 rs_object_base *obj_addr = 1159 reinterpret_cast<rs_object_base *>(*var_addr_iter); 1160 if (*is_object_iter) { 1161 if (*var_addr_iter != NULL && mCtx->getContext() != NULL) { 1162 rsrClearObject(mCtx->getContext(), obj_addr); 1163 } 1164 } 1165 var_addr_iter++; 1166 is_object_iter++; 1167 } 1168 } 1169 1170 if (mCompilerContext) { 1171 delete mCompilerContext; 1172 } 1173 if (mCompilerDriver) { 1174 delete mCompilerDriver; 1175 } 1176 if (mExecutable) { 1177 delete mExecutable; 1178 } 1179 if (mBoundAllocs) { 1180 delete[] mBoundAllocs; 1181 } 1182 1183 for (size_t i = 0; i < mExportedForEachFuncList.size(); i++) { 1184 delete[] mExportedForEachFuncList[i].first; 1185 } 1186#else 1187 if (mFieldIsObject) { 1188 for (size_t i = 0; i < mExportedVariableCount; ++i) { 1189 if (mFieldIsObject[i]) { 1190 if (mFieldAddress[i] != NULL) { 1191 rs_object_base *obj_addr = 1192 reinterpret_cast<rs_object_base *>(mFieldAddress[i]); 1193 rsrClearObject(mCtx->getContext(), obj_addr); 1194 } 1195 } 1196 } 1197 } 1198 1199 if (mInvokeFunctions) delete[] mInvokeFunctions; 1200 if (mForEachFunctions) delete[] mForEachFunctions; 1201 if (mFieldAddress) delete[] mFieldAddress; 1202 if (mFieldIsObject) delete[] mFieldIsObject; 1203 if (mForEachSignatures) delete[] mForEachSignatures; 1204 if (mBoundAllocs) delete[] mBoundAllocs; 1205 if (mScriptSO) { 1206 dlclose(mScriptSO); 1207 } 1208#endif 1209} 1210 1211Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { 1212 if (!ptr) { 1213 return NULL; 1214 } 1215 1216 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) { 1217 Allocation *a = mBoundAllocs[ct]; 1218 if (!a) continue; 1219 if (a->mHal.drvState.lod[0].mallocPtr == ptr) { 1220 return a; 1221 } 1222 } 1223 ALOGE("rsGetAllocation, failed to find %p", ptr); 1224 return NULL; 1225} 1226 1227void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains, 1228 uint32_t inLen, Allocation * aout, 1229 const void * usr, uint32_t usrLen, 1230 const RsScriptCall *sc) {} 1231 1232void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains, 1233 uint32_t inLen, Allocation * aout, 1234 const void * usr, uint32_t usrLen, 1235 const RsScriptCall *sc) {} 1236 1237 1238} 1239} 1240