rsCpuScript.cpp revision f3213d7fd648da98bb3b03204eaf90f03c31926b
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsCpuCore.h" 18#include "rsCpuScript.h" 19 20#ifdef RS_COMPATIBILITY_LIB 21 #include <stdio.h> 22 #include <sys/stat.h> 23 #include <unistd.h> 24#else 25 #include <bcc/BCCContext.h> 26 #include <bcc/Config/Config.h> 27 #include <bcc/Renderscript/RSCompilerDriver.h> 28 #include <bcc/Renderscript/RSInfo.h> 29 #include <bcinfo/MetadataExtractor.h> 30 #include <cutils/properties.h> 31 32 #include <sys/types.h> 33 #include <sys/wait.h> 34 #include <unistd.h> 35 36 #include <string> 37 #include <vector> 38#endif 39 40#include <set> 41#include <string> 42#include <dlfcn.h> 43#include <stdlib.h> 44#include <string.h> 45#include <fstream> 46#include <iostream> 47 48#ifdef __LP64__ 49#define SYSLIBPATH "/system/lib64" 50#else 51#define SYSLIBPATH "/system/lib" 52#endif 53 54namespace { 55 56// Create a len length string containing random characters from [A-Za-z0-9]. 57static std::string getRandomString(size_t len) { 58 char buf[len + 1]; 59 for (size_t i = 0; i < len; i++) { 60 uint32_t r = arc4random() & 0xffff; 61 r %= 62; 62 if (r < 26) { 63 // lowercase 64 buf[i] = 'a' + r; 65 } else if (r < 52) { 66 // uppercase 67 buf[i] = 'A' + (r - 26); 68 } else { 69 // Use a number 70 buf[i] = '0' + (r - 52); 71 } 72 } 73 buf[len] = '\0'; 74 return std::string(buf); 75} 76 77// Check if a path exists and attempt to create it if it doesn't. 78static bool ensureCacheDirExists(const char *path) { 79 if (access(path, R_OK | W_OK | X_OK) == 0) { 80 // Done if we can rwx the directory 81 return true; 82 } 83 if (mkdir(path, 0700) == 0) { 84 return true; 85 } 86 return false; 87} 88 89// Copy the file named \p srcFile to \p dstFile. 90// Return 0 on success and -1 if anything wasn't copied. 91static int copyFile(const char *dstFile, const char *srcFile) { 92 std::ifstream srcStream(srcFile); 93 if (!srcStream) { 94 ALOGE("Could not verify or read source file: %s", srcFile); 95 return -1; 96 } 97 std::ofstream dstStream(dstFile); 98 if (!dstStream) { 99 ALOGE("Could not verify or write destination file: %s", dstFile); 100 return -1; 101 } 102 dstStream << srcStream.rdbuf(); 103 if (!dstStream) { 104 ALOGE("Could not write destination file: %s", dstFile); 105 return -1; 106 } 107 108 srcStream.close(); 109 dstStream.close(); 110 111 return 0; 112} 113 114static std::string findSharedObjectName(const char *cacheDir, 115 const char *resName) { 116#ifndef RS_SERVER 117 std::string scriptSOName(cacheDir); 118#if defined(RS_COMPATIBILITY_LIB) && !defined(__LP64__) 119 size_t cutPos = scriptSOName.rfind("cache"); 120 if (cutPos != std::string::npos) { 121 scriptSOName.erase(cutPos); 122 } else { 123 ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir); 124 } 125 scriptSOName.append("/lib/librs."); 126#else 127 scriptSOName.append("/librs."); 128#endif // RS_COMPATIBILITY_LIB 129 130#else 131 std::string scriptSOName("lib"); 132#endif // RS_SERVER 133 scriptSOName.append(resName); 134 scriptSOName.append(".so"); 135 136 return scriptSOName; 137} 138 139#ifndef RS_COMPATIBILITY_LIB 140 141static bool is_force_recompile() { 142#ifdef RS_SERVER 143 return false; 144#else 145 char buf[PROPERTY_VALUE_MAX]; 146 147 // Re-compile if floating point precision has been overridden. 148 property_get("debug.rs.precision", buf, ""); 149 if (buf[0] != '\0') { 150 return true; 151 } 152 153 // Re-compile if debug.rs.forcerecompile is set. 154 property_get("debug.rs.forcerecompile", buf, "0"); 155 if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) { 156 return true; 157 } else { 158 return false; 159 } 160#endif // RS_SERVER 161} 162 163const static char *BCC_EXE_PATH = "/system/bin/bcc"; 164 165static void setCompileArguments(std::vector<const char*>* args, 166 const std::string& bcFileName, 167 const char* cacheDir, const char* resName, 168 const char* core_lib, bool useRSDebugContext, 169 const char* bccPluginName) { 170 rsAssert(cacheDir && resName && core_lib); 171 args->push_back(BCC_EXE_PATH); 172 args->push_back("-unroll-runtime"); 173 args->push_back("-scalarize-load-store"); 174 args->push_back("-o"); 175 args->push_back(resName); 176 args->push_back("-output_path"); 177 args->push_back(cacheDir); 178 args->push_back("-bclib"); 179 args->push_back(core_lib); 180 args->push_back("-mtriple"); 181 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 182 183 // Enable workaround for A53 codegen by default. 184#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND) 185 args->push_back("-aarch64-fix-cortex-a53-835769"); 186#endif 187 188 // Execute the bcc compiler. 189 if (useRSDebugContext) { 190 args->push_back("-rs-debug-ctx"); 191 } else { 192 // Only load additional libraries for compiles that don't use 193 // the debug context. 194 if (bccPluginName && strlen(bccPluginName) > 0) { 195 args->push_back("-load"); 196 args->push_back(bccPluginName); 197 } 198 } 199 200 args->push_back("-fPIC"); 201 args->push_back("-embedRSInfo"); 202 203 args->push_back(bcFileName.c_str()); 204 args->push_back(nullptr); 205} 206 207static bool compileBitcode(const std::string &bcFileName, 208 const char *bitcode, 209 size_t bitcodeSize, 210 const char **compileArguments, 211 const std::string &compileCommandLine) { 212 rsAssert(bitcode && bitcodeSize); 213 214 FILE *bcfile = fopen(bcFileName.c_str(), "w"); 215 if (!bcfile) { 216 ALOGE("Could not write to %s", bcFileName.c_str()); 217 return false; 218 } 219 size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile); 220 fclose(bcfile); 221 if (nwritten != bitcodeSize) { 222 ALOGE("Could not write %zu bytes to %s", bitcodeSize, 223 bcFileName.c_str()); 224 return false; 225 } 226 227 pid_t pid = fork(); 228 229 switch (pid) { 230 case -1: { // Error occurred (we attempt no recovery) 231 ALOGE("Couldn't fork for bcc compiler execution"); 232 return false; 233 } 234 case 0: { // Child process 235 ALOGV("Invoking BCC with: %s", compileCommandLine.c_str()); 236 execv(BCC_EXE_PATH, (char* const*)compileArguments); 237 238 ALOGE("execv() failed: %s", strerror(errno)); 239 abort(); 240 return false; 241 } 242 default: { // Parent process (actual driver) 243 // Wait on child process to finish compiling the source. 244 int status = 0; 245 pid_t w = waitpid(pid, &status, 0); 246 if (w == -1) { 247 ALOGE("Could not wait for bcc compiler"); 248 return false; 249 } 250 251 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 252 return true; 253 } 254 255 ALOGE("bcc compiler terminated unexpectedly"); 256 return false; 257 } 258 } 259} 260 261#endif // !defined(RS_COMPATIBILITY_LIB) 262} // namespace 263 264namespace android { 265namespace renderscript { 266 267const char* SharedLibraryUtils::LD_EXE_PATH = "/system/bin/ld.mc"; 268const char* SharedLibraryUtils::RS_CACHE_DIR = "com.android.renderscript.cache"; 269 270#ifndef RS_COMPATIBILITY_LIB 271 272bool SharedLibraryUtils::createSharedLibrary(const char *cacheDir, const char *resName) { 273 std::string sharedLibName = findSharedObjectName(cacheDir, resName); 274 std::string objFileName = cacheDir; 275 objFileName.append("/"); 276 objFileName.append(resName); 277 objFileName.append(".o"); 278 279 const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so"; 280 std::vector<const char *> args = { 281 LD_EXE_PATH, 282 "-shared", 283 "-nostdlib", 284 compiler_rt, 285 "-mtriple", DEFAULT_TARGET_TRIPLE_STRING, 286 "-L", SYSLIBPATH, 287 "-lRSDriver", "-lm", "-lc", 288 objFileName.c_str(), 289 "-o", sharedLibName.c_str(), 290 nullptr 291 }; 292 293 std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data()); 294 295 pid_t pid = fork(); 296 297 switch (pid) { 298 case -1: { // Error occurred (we attempt no recovery) 299 ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH); 300 return false; 301 } 302 case 0: { // Child process 303 ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str()); 304 execv(LD_EXE_PATH, (char* const*) args.data()); 305 306 ALOGE("execv() failed: %s", strerror(errno)); 307 abort(); 308 return false; 309 } 310 default: { // Parent process (actual driver) 311 // Wait on child process to finish compiling the source. 312 int status = 0; 313 pid_t w = waitpid(pid, &status, 0); 314 if (w == -1) { 315 ALOGE("Could not wait for linker (%s)", LD_EXE_PATH); 316 return false; 317 } 318 319 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 320 return true; 321 } 322 323 ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH); 324 return false; 325 } 326 } 327} 328 329#endif // RS_COMPATIBILITY_LIB 330 331 332void* SharedLibraryUtils::loadSharedLibrary(const char *cacheDir, const char *resName, const char *nativeLibDir) { 333 void *loaded = nullptr; 334 335#if defined(RS_COMPATIBILITY_LIB) && defined(__LP64__) 336 std::string scriptSOName = findSharedObjectName(nativeLibDir, resName); 337#else 338 std::string scriptSOName = findSharedObjectName(cacheDir, resName); 339#endif 340 341 // We should check if we can load the library from the standard app 342 // location for shared libraries first. 343 loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName); 344 345 if (loaded == nullptr) { 346 ALOGE("Unable to open shared library (%s): %s", 347 scriptSOName.c_str(), dlerror()); 348 349#ifdef RS_COMPATIBILITY_LIB 350 // One final attempt to find the library in "/system/lib". 351 // We do this to allow bundled applications to use the compatibility 352 // library fallback path. Those applications don't have a private 353 // library path, so they need to install to the system directly. 354 // Note that this is really just a testing path. 355 std::string scriptSONameSystem("/system/lib/librs."); 356 scriptSONameSystem.append(resName); 357 scriptSONameSystem.append(".so"); 358 loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir, 359 resName); 360 if (loaded == nullptr) { 361 ALOGE("Unable to open system shared library (%s): %s", 362 scriptSONameSystem.c_str(), dlerror()); 363 } 364#endif 365 } 366 367 return loaded; 368} 369 370void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDir, 371 const char *resName) { 372 // Keep track of which .so libraries have been loaded. Once a library is 373 // in the set (per-process granularity), we must instead make a copy of 374 // the original shared object (randomly named .so file) and load that one 375 // instead. If we don't do this, we end up aliasing global data between 376 // the various Script instances (which are supposed to be completely 377 // independent). 378 static std::set<std::string> LoadedLibraries; 379 380 void *loaded = nullptr; 381 382 // Skip everything if we don't even have the original library available. 383 if (access(origName, F_OK) != 0) { 384 return nullptr; 385 } 386 387 // Common path is that we have not loaded this Script/library before. 388 if (LoadedLibraries.find(origName) == LoadedLibraries.end()) { 389 loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL); 390 if (loaded) { 391 LoadedLibraries.insert(origName); 392 } 393 return loaded; 394 } 395 396 std::string newName(cacheDir); 397 398 // Append RS_CACHE_DIR only if it is not found in cacheDir 399 // In driver mode, RS_CACHE_DIR is already appended to cacheDir. 400 if (newName.find(RS_CACHE_DIR) == std::string::npos) { 401 newName.append("/"); 402 newName.append(RS_CACHE_DIR); 403 newName.append("/"); 404 } 405 406 if (!ensureCacheDirExists(newName.c_str())) { 407 ALOGE("Could not verify or create cache dir: %s", cacheDir); 408 return nullptr; 409 } 410 411 // Construct an appropriately randomized filename for the copy. 412 newName.append("librs."); 413 newName.append(resName); 414 newName.append("#"); 415 newName.append(getRandomString(6)); // 62^6 potential filename variants. 416 newName.append(".so"); 417 418 int r = copyFile(newName.c_str(), origName); 419 if (r != 0) { 420 ALOGE("Could not create copy %s -> %s", origName, newName.c_str()); 421 return nullptr; 422 } 423 loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL); 424 r = unlink(newName.c_str()); 425 if (r != 0) { 426 ALOGE("Could not unlink copy %s", newName.c_str()); 427 } 428 if (loaded) { 429 LoadedLibraries.insert(newName.c_str()); 430 } 431 432 return loaded; 433} 434 435#define MAXLINE 500 436#define MAKE_STR_HELPER(S) #S 437#define MAKE_STR(S) MAKE_STR_HELPER(S) 438#define EXPORT_VAR_STR "exportVarCount: " 439#define EXPORT_FUNC_STR "exportFuncCount: " 440#define EXPORT_FOREACH_STR "exportForEachCount: " 441#define OBJECT_SLOT_STR "objectSlotCount: " 442#define PRAGMA_STR "pragmaCount: " 443#define THREADABLE_STR "isThreadable: " 444 445// Copy up to a newline or size chars from str -> s, updating str 446// Returns s when successful and nullptr when '\0' is finally reached. 447static char* strgets(char *s, int size, const char **ppstr) { 448 if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) { 449 return nullptr; 450 } 451 452 int i; 453 for (i = 0; i < (size - 1); i++) { 454 s[i] = **ppstr; 455 (*ppstr)++; 456 if (s[i] == '\0') { 457 return s; 458 } else if (s[i] == '\n') { 459 s[i+1] = '\0'; 460 return s; 461 } 462 } 463 464 // size has been exceeded. 465 s[i] = '\0'; 466 467 return s; 468} 469 470RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) { 471 mCtx = ctx; 472 mScript = s; 473 474 mScriptSO = nullptr; 475 476#ifndef RS_COMPATIBILITY_LIB 477 mCompilerDriver = nullptr; 478#endif 479 480 481 mRoot = nullptr; 482 mRootExpand = nullptr; 483 mInit = nullptr; 484 mFreeChildren = nullptr; 485 mScriptExec = nullptr; 486 487 mBoundAllocs = nullptr; 488 mIntrinsicData = nullptr; 489 mIsThreadable = true; 490} 491 492bool RsdCpuScriptImpl::storeRSInfoFromSO() { 493 mRoot = (RootFunc_t) dlsym(mScriptSO, "root"); 494 if (mRoot) { 495 //ALOGE("Found root(): %p", mRoot); 496 } 497 mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand"); 498 if (mRootExpand) { 499 //ALOGE("Found root.expand(): %p", mRootExpand); 500 } 501 mInit = (InvokeFunc_t) dlsym(mScriptSO, "init"); 502 if (mInit) { 503 //ALOGE("Found init(): %p", mInit); 504 } 505 mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor"); 506 if (mFreeChildren) { 507 //ALOGE("Found .rs.dtor(): %p", mFreeChildren); 508 } 509 510 mScriptExec = ScriptExecutable::createFromSharedObject( 511 mCtx->getContext(), mScriptSO); 512 513 if (mScriptExec == nullptr) { 514 return false; 515 } 516 517 size_t varCount = mScriptExec->getExportedVariableCount(); 518 if (varCount > 0) { 519 mBoundAllocs = new Allocation *[varCount]; 520 memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs)); 521 } 522 523 mIsThreadable = mScriptExec->getThreadable(); 524 //ALOGE("Script isThreadable? %d", mIsThreadable); 525 526 return true; 527} 528 529ScriptExecutable* ScriptExecutable::createFromSharedObject( 530 Context* RSContext, void* sharedObj) { 531 char line[MAXLINE]; 532 533 size_t varCount = 0; 534 size_t funcCount = 0; 535 size_t forEachCount = 0; 536 size_t objectSlotCount = 0; 537 size_t pragmaCount = 0; 538 bool isThreadable = true; 539 540 const char *rsInfo = (const char *) dlsym(sharedObj, ".rs.info"); 541 542 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 543 return nullptr; 544 } 545 if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) { 546 ALOGE("Invalid export var count!: %s", line); 547 return nullptr; 548 } 549 550 std::vector<void*> fieldAddress; 551 552 for (size_t i = 0; i < varCount; ++i) { 553 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 554 return nullptr; 555 } 556 char *c = strrchr(line, '\n'); 557 if (c) { 558 *c = '\0'; 559 } 560 void* addr = dlsym(sharedObj, line); 561 if (addr == nullptr) { 562 ALOGE("Failed to find variable address for %s: %s", 563 line, dlerror()); 564 // Not a critical error if we don't find a global variable. 565 } 566 fieldAddress.push_back(addr); 567 } 568 569 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 570 return nullptr; 571 } 572 if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) { 573 ALOGE("Invalid export func count!: %s", line); 574 return nullptr; 575 } 576 577 std::vector<InvokeFunc_t> invokeFunctions(funcCount); 578 579 for (size_t i = 0; i < funcCount; ++i) { 580 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 581 return nullptr ; 582 } 583 char *c = strrchr(line, '\n'); 584 if (c) { 585 *c = '\0'; 586 } 587 588 invokeFunctions[i] = (InvokeFunc_t) dlsym(sharedObj, line); 589 if (invokeFunctions[i] == nullptr) { 590 ALOGE("Failed to get function address for %s(): %s", 591 line, dlerror()); 592 return nullptr; 593 } 594 } 595 596 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 597 return nullptr; 598 } 599 if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) { 600 ALOGE("Invalid export forEach count!: %s", line); 601 return nullptr; 602 } 603 604 std::vector<ForEachFunc_t> forEachFunctions(forEachCount); 605 std::vector<uint32_t> forEachSignatures(forEachCount); 606 607 for (size_t i = 0; i < forEachCount; ++i) { 608 unsigned int tmpSig = 0; 609 char tmpName[MAXLINE]; 610 611 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 612 return nullptr; 613 } 614 if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s", 615 &tmpSig, tmpName) != 2) { 616 ALOGE("Invalid export forEach!: %s", line); 617 return nullptr; 618 } 619 620 // Lookup the expanded ForEach kernel. 621 strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName)); 622 forEachSignatures[i] = tmpSig; 623 forEachFunctions[i] = 624 (ForEachFunc_t) dlsym(sharedObj, tmpName); 625 if (i != 0 && forEachFunctions[i] == nullptr) { 626 // Ignore missing root.expand functions. 627 // root() is always specified at location 0. 628 ALOGE("Failed to find forEach function address for %s: %s", 629 tmpName, dlerror()); 630 return nullptr; 631 } 632 } 633 634 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 635 return nullptr; 636 } 637 if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) { 638 ALOGE("Invalid object slot count!: %s", line); 639 return nullptr; 640 } 641 642 std::vector<bool> fieldIsObject(varCount, false); 643 644 rsAssert(varCount > 0); 645 for (size_t i = 0; i < objectSlotCount; ++i) { 646 uint32_t varNum = 0; 647 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 648 return nullptr; 649 } 650 if (sscanf(line, "%u", &varNum) != 1) { 651 ALOGE("Invalid object slot!: %s", line); 652 return nullptr; 653 } 654 655 if (varNum < varCount) { 656 fieldIsObject[varNum] = true; 657 } 658 } 659 660#ifdef RS_COMPATIBILITY_LIB 661 // Do not attempt to read pragmas or isThreadable flag in compat lib path. 662 // Neither is applicable for compat lib 663 std::vector<const char *> pragmaKeys(pragmaCount); 664 std::vector<const char *> pragmaValues(pragmaCount); 665 666 isThreadable = true; 667 668#else 669 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 670 return nullptr; 671 } 672 673 if (sscanf(line, PRAGMA_STR "%zu", &pragmaCount) != 1) { 674 ALOGE("Invalid pragma count!: %s", line); 675 return nullptr; 676 } 677 678 std::vector<const char *> pragmaKeys(pragmaCount); 679 std::vector<const char *> pragmaValues(pragmaCount); 680 681 for (size_t i = 0; i < pragmaCount; ++i) { 682 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 683 ALOGE("Unable to read pragma at index %zu!", i); 684 return nullptr; 685 } 686 687 char key[MAXLINE]; 688 char value[MAXLINE] = ""; // initialize in case value is empty 689 690 // pragmas can just have a key and no value. Only check to make sure 691 // that the key is not empty 692 if (sscanf(line, "%" MAKE_STR(MAXLINE) "s - %" MAKE_STR(MAXLINE) "s", 693 key, value) == 0 || 694 strlen(key) == 0) 695 { 696 ALOGE("Invalid pragma value!: %s", line); 697 698 // free previously allocated keys and values 699 for (size_t idx = 0; idx < i; ++idx) { 700 delete [] pragmaKeys[idx]; 701 delete [] pragmaValues[idx]; 702 } 703 return nullptr; 704 } 705 706 char *pKey = new char[strlen(key)+1]; 707 strcpy(pKey, key); 708 pragmaKeys[i] = pKey; 709 710 char *pValue = new char[strlen(value)+1]; 711 strcpy(pValue, value); 712 pragmaValues[i] = pValue; 713 //ALOGE("Pragma %zu: Key: '%s' Value: '%s'", i, pKey, pValue); 714 } 715 716 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 717 return nullptr; 718 } 719 720 char tmpFlag[4]; 721 if (sscanf(line, THREADABLE_STR "%4s", tmpFlag) != 1) { 722 ALOGE("Invalid threadable flag!: %s", line); 723 return nullptr; 724 } 725 if (strcmp(tmpFlag, "yes") == 0) 726 isThreadable = true; 727 else if (strcmp(tmpFlag, "no") == 0) 728 isThreadable = false; 729 else { 730 ALOGE("Invalid threadable flag!: %s", tmpFlag); 731 return nullptr; 732 } 733 734#endif 735 736 return new ScriptExecutable( 737 RSContext, fieldAddress, fieldIsObject, invokeFunctions, 738 forEachFunctions, forEachSignatures, pragmaKeys, pragmaValues, 739 isThreadable); 740} 741 742bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir, 743 uint8_t const *bitcode, size_t bitcodeSize, 744 uint32_t flags, char const *bccPluginName) { 745 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 746 //ALOGE("rsdScriptInit %p %p", rsc, script); 747 748 mCtx->lockMutex(); 749#ifndef RS_COMPATIBILITY_LIB 750 bool useRSDebugContext = false; 751 752 mCompilerDriver = nullptr; 753 754 mCompilerDriver = new bcc::RSCompilerDriver(); 755 if (mCompilerDriver == nullptr) { 756 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 757 mCtx->unlockMutex(); 758 return false; 759 } 760 761 // Run any compiler setup functions we have been provided with. 762 RSSetupCompilerCallback setupCompilerCallback = 763 mCtx->getSetupCompilerCallback(); 764 if (setupCompilerCallback != nullptr) { 765 setupCompilerCallback(mCompilerDriver); 766 } 767 768 bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize); 769 if (!bitcodeMetadata.extract()) { 770 ALOGE("Could not extract metadata from bitcode"); 771 mCtx->unlockMutex(); 772 return false; 773 } 774 775 const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize); 776 777 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 778 mCompilerDriver->setDebugContext(true); 779 useRSDebugContext = true; 780 } 781 782 std::string bcFileName(cacheDir); 783 bcFileName.append("/"); 784 bcFileName.append(resName); 785 bcFileName.append(".bc"); 786 787 std::vector<const char*> compileArguments; 788 setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib, 789 useRSDebugContext, bccPluginName); 790 // The last argument of compileArguments ia a nullptr, so remove 1 from the size. 791 std::string compileCommandLine = 792 bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data()); 793 794 if (!is_force_recompile() && !useRSDebugContext) { 795 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 796 } 797 798 // If we can't, it's either not there or out of date. We compile the bit code and try loading 799 // again. 800 if (mScriptSO == nullptr) { 801 if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize, 802 compileArguments.data(), compileCommandLine)) 803 { 804 ALOGE("bcc: FAILS to compile '%s'", resName); 805 mCtx->unlockMutex(); 806 return false; 807 } 808 809 if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { 810 ALOGE("Linker: Failed to link object file '%s'", resName); 811 mCtx->unlockMutex(); 812 return false; 813 } 814 815 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 816 if (mScriptSO == nullptr) { 817 ALOGE("Unable to load '%s'", resName); 818 mCtx->unlockMutex(); 819 return false; 820 } 821 } 822 823 // Read RS symbol information from the .so. 824 if ( !mScriptSO) { 825 goto error; 826 } 827 828 if ( !storeRSInfoFromSO()) { 829 goto error; 830 } 831#else // RS_COMPATIBILITY_LIB is defined 832 const char *nativeLibDir = mCtx->getContext()->getNativeLibDir(); 833 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir); 834 835 if (!mScriptSO) { 836 goto error; 837 } 838 839 if (!storeRSInfoFromSO()) { 840 goto error; 841 } 842#endif 843 mCtx->unlockMutex(); 844 return true; 845 846error: 847 848 mCtx->unlockMutex(); 849 if (mScriptSO) { 850 dlclose(mScriptSO); 851 } 852 return false; 853} 854 855#ifndef RS_COMPATIBILITY_LIB 856 857const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode, 858 size_t bitcodeSize) { 859 const char* defaultLib = SYSLIBPATH"/libclcore.bc"; 860 861 // If we're debugging, use the debug library. 862 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 863 return SYSLIBPATH"/libclcore_debug.bc"; 864 } 865 866 // If a callback has been registered to specify a library, use that. 867 RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback(); 868 if (selectRTCallback != nullptr) { 869 return selectRTCallback((const char*)bitcode, bitcodeSize); 870 } 871 872 // Check for a platform specific library 873#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 874 enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision(); 875 if (prec == bcinfo::RS_FP_Relaxed) { 876 // NEON-capable ARMv7a devices can use an accelerated math library 877 // for all reduced precision scripts. 878 // ARMv8 does not use NEON, as ASIMD can be used with all precision 879 // levels. 880 return SYSLIBPATH"/libclcore_neon.bc"; 881 } else { 882 return defaultLib; 883 } 884#elif defined(__i386__) || defined(__x86_64__) 885 // x86 devices will use an optimized library. 886 return SYSLIBPATH"/libclcore_x86.bc"; 887#else 888 return defaultLib; 889#endif 890} 891 892#endif 893 894void RsdCpuScriptImpl::populateScript(Script *script) { 895 // Copy info over to runtime 896 script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount(); 897 script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount(); 898 script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();; 899 script->mHal.info.exportedPragmaKeyList = 900 const_cast<const char**>(&mScriptExec->getPragmaKeys().front()); 901 script->mHal.info.exportedPragmaValueList = 902 const_cast<const char**>(&mScriptExec->getPragmaValues().front()); 903 904 // Bug, need to stash in metadata 905 if (mRootExpand) { 906 script->mHal.info.root = mRootExpand; 907 } else { 908 script->mHal.info.root = mRoot; 909 } 910} 911 912 913typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 914 915bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, 916 uint32_t inLen, 917 Allocation * aout, 918 const void * usr, uint32_t usrLen, 919 const RsScriptCall *sc, 920 MTLaunchStruct *mtls) { 921 922 memset(mtls, 0, sizeof(MTLaunchStruct)); 923 924 for (int index = inLen; --index >= 0;) { 925 const Allocation* ain = ains[index]; 926 927 // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface 928 if (ain != nullptr && 929 (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) { 930 931 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 932 "rsForEach called with null in allocations"); 933 return false; 934 } 935 } 936 937 if (aout && 938 (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) { 939 940 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 941 "rsForEach called with null out allocations"); 942 return false; 943 } 944 945 if (inLen > 0) { 946 const Allocation *ain0 = ains[0]; 947 const Type *inType = ain0->getType(); 948 949 mtls->fep.dim.x = inType->getDimX(); 950 mtls->fep.dim.y = inType->getDimY(); 951 mtls->fep.dim.z = inType->getDimZ(); 952 953 for (int Index = inLen; --Index >= 1;) { 954 if (!ain0->hasSameDims(ains[Index])) { 955 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 956 "Failed to launch kernel; dimensions of input and output allocations do not match."); 957 958 return false; 959 } 960 } 961 962 } else if (aout != nullptr) { 963 const Type *outType = aout->getType(); 964 965 mtls->fep.dim.x = outType->getDimX(); 966 mtls->fep.dim.y = outType->getDimY(); 967 mtls->fep.dim.z = outType->getDimZ(); 968 969 } else { 970 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 971 "rsForEach called with null allocations"); 972 return false; 973 } 974 975 if (inLen > 0 && aout != nullptr) { 976 if (!ains[0]->hasSameDims(aout)) { 977 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 978 "Failed to launch kernel; dimensions of input and output allocations do not match."); 979 980 return false; 981 } 982 } 983 984 if (!sc || (sc->xEnd == 0)) { 985 mtls->end.x = mtls->fep.dim.x; 986 } else { 987 mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart); 988 mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd); 989 if (mtls->start.x >= mtls->end.x) return false; 990 } 991 992 if (!sc || (sc->yEnd == 0)) { 993 mtls->end.y = mtls->fep.dim.y; 994 } else { 995 mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart); 996 mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd); 997 if (mtls->start.y >= mtls->end.y) return false; 998 } 999 1000 if (!sc || (sc->zEnd == 0)) { 1001 mtls->end.z = mtls->fep.dim.z; 1002 } else { 1003 mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart); 1004 mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd); 1005 if (mtls->start.z >= mtls->end.z) return false; 1006 } 1007 1008 if (!sc || (sc->arrayEnd == 0)) { 1009 mtls->end.array[0] = mtls->fep.dim.array[0]; 1010 } else { 1011 mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart); 1012 mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd); 1013 if (mtls->start.array[0] >= mtls->end.array[0]) return false; 1014 } 1015 1016 if (!sc || (sc->array2End == 0)) { 1017 mtls->end.array[1] = mtls->fep.dim.array[1]; 1018 } else { 1019 mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start); 1020 mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End); 1021 if (mtls->start.array[1] >= mtls->end.array[1]) return false; 1022 } 1023 1024 if (!sc || (sc->array3End == 0)) { 1025 mtls->end.array[2] = mtls->fep.dim.array[2]; 1026 } else { 1027 mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start); 1028 mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End); 1029 if (mtls->start.array[2] >= mtls->end.array[2]) return false; 1030 } 1031 1032 if (!sc || (sc->array4End == 0)) { 1033 mtls->end.array[3] = mtls->fep.dim.array[3]; 1034 } else { 1035 mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start); 1036 mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End); 1037 if (mtls->start.array[3] >= mtls->end.array[3]) return false; 1038 } 1039 1040 1041 // The X & Y walkers always want 0-1 min even if dim is not present 1042 mtls->end.x = rsMax((uint32_t)1, mtls->end.x); 1043 mtls->end.y = rsMax((uint32_t)1, mtls->end.y); 1044 1045 mtls->rsc = mCtx; 1046 if (ains) { 1047 memcpy(mtls->ains, ains, inLen * sizeof(ains[0])); 1048 } 1049 mtls->aout[0] = aout; 1050 mtls->fep.usr = usr; 1051 mtls->fep.usrLen = usrLen; 1052 mtls->mSliceSize = 1; 1053 mtls->mSliceNum = 0; 1054 1055 mtls->isThreadable = mIsThreadable; 1056 1057 if (inLen > 0) { 1058 mtls->fep.inLen = inLen; 1059 for (int index = inLen; --index >= 0;) { 1060 mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr; 1061 mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes(); 1062 } 1063 } 1064 1065 if (aout != nullptr) { 1066 mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; 1067 mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes(); 1068 } 1069 1070 // All validation passed, ok to launch threads 1071 return true; 1072} 1073 1074 1075void RsdCpuScriptImpl::invokeForEach(uint32_t slot, 1076 const Allocation ** ains, 1077 uint32_t inLen, 1078 Allocation * aout, 1079 const void * usr, 1080 uint32_t usrLen, 1081 const RsScriptCall *sc) { 1082 1083 MTLaunchStruct mtls; 1084 1085 if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) { 1086 forEachKernelSetup(slot, &mtls); 1087 1088 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 1089 mCtx->launchThreads(ains, inLen, aout, sc, &mtls); 1090 mCtx->setTLS(oldTLS); 1091 } 1092} 1093 1094void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { 1095 mtls->script = this; 1096 mtls->fep.slot = slot; 1097 mtls->kernel = mScriptExec->getForEachFunction(slot); 1098 rsAssert(mtls->kernel != nullptr); 1099 mtls->sig = mScriptExec->getForEachSignature(slot); 1100} 1101 1102int RsdCpuScriptImpl::invokeRoot() { 1103 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 1104 int ret = mRoot(); 1105 mCtx->setTLS(oldTLS); 1106 return ret; 1107} 1108 1109void RsdCpuScriptImpl::invokeInit() { 1110 if (mInit) { 1111 mInit(); 1112 } 1113} 1114 1115void RsdCpuScriptImpl::invokeFreeChildren() { 1116 if (mFreeChildren) { 1117 mFreeChildren(); 1118 } 1119} 1120 1121void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, 1122 size_t paramLength) { 1123 //ALOGE("invoke %i %p %zu", slot, params, paramLength); 1124 void * ap = nullptr; 1125 1126#if defined(__x86_64__) 1127 // The invoked function could have input parameter of vector type for example float4 which 1128 // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform. 1129 // So try to align void* params before passing them into RS exported function. 1130 1131 if ((uint8_t)(uint64_t)params & 0x0F) { 1132 if ((ap = (void*)memalign(16, paramLength)) != nullptr) { 1133 memcpy(ap, params, paramLength); 1134 } else { 1135 ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned."); 1136 } 1137 } 1138#endif 1139 1140 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 1141 reinterpret_cast<void (*)(const void *, uint32_t)>( 1142 mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength); 1143 1144 mCtx->setTLS(oldTLS); 1145} 1146 1147void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 1148 //rsAssert(!script->mFieldIsObject[slot]); 1149 //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength); 1150 1151 //if (mIntrinsicID) { 1152 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); 1153 //return; 1154 //} 1155 1156 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1157 if (!destPtr) { 1158 //ALOGV("Calling setVar on slot = %i which is null", slot); 1159 return; 1160 } 1161 1162 memcpy(destPtr, data, dataLength); 1163} 1164 1165void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) { 1166 //rsAssert(!script->mFieldIsObject[slot]); 1167 //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength); 1168 1169 int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1170 if (!srcPtr) { 1171 //ALOGV("Calling setVar on slot = %i which is null", slot); 1172 return; 1173 } 1174 memcpy(data, srcPtr, dataLength); 1175} 1176 1177 1178void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, 1179 const Element *elem, 1180 const uint32_t *dims, size_t dimLength) { 1181 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1182 if (!destPtr) { 1183 //ALOGV("Calling setVar on slot = %i which is null", slot); 1184 return; 1185 } 1186 1187 // We want to look at dimension in terms of integer components, 1188 // but dimLength is given in terms of bytes. 1189 dimLength /= sizeof(int); 1190 1191 // Only a single dimension is currently supported. 1192 rsAssert(dimLength == 1); 1193 if (dimLength == 1) { 1194 // First do the increment loop. 1195 size_t stride = elem->getSizeBytes(); 1196 const char *cVal = reinterpret_cast<const char *>(data); 1197 for (uint32_t i = 0; i < dims[0]; i++) { 1198 elem->incRefs(cVal); 1199 cVal += stride; 1200 } 1201 1202 // Decrement loop comes after (to prevent race conditions). 1203 char *oldVal = reinterpret_cast<char *>(destPtr); 1204 for (uint32_t i = 0; i < dims[0]; i++) { 1205 elem->decRefs(oldVal); 1206 oldVal += stride; 1207 } 1208 } 1209 1210 memcpy(destPtr, data, dataLength); 1211} 1212 1213void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) { 1214 1215 //rsAssert(!script->mFieldIsObject[slot]); 1216 //ALOGE("setGlobalBind %i %p", slot, data); 1217 1218 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1219 if (!destPtr) { 1220 //ALOGV("Calling setVar on slot = %i which is null", slot); 1221 return; 1222 } 1223 1224 void *ptr = nullptr; 1225 mBoundAllocs[slot] = data; 1226 if (data) { 1227 ptr = data->mHal.drvState.lod[0].mallocPtr; 1228 } 1229 memcpy(destPtr, &ptr, sizeof(void *)); 1230} 1231 1232void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { 1233 1234 //rsAssert(script->mFieldIsObject[slot]); 1235 //ALOGE("setGlobalObj %i %p", slot, data); 1236 1237 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1238 if (!destPtr) { 1239 //ALOGV("Calling setVar on slot = %i which is null", slot); 1240 return; 1241 } 1242 1243 rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data); 1244} 1245 1246RsdCpuScriptImpl::~RsdCpuScriptImpl() { 1247#ifndef RS_COMPATIBILITY_LIB 1248 if (mCompilerDriver) { 1249 delete mCompilerDriver; 1250 } 1251#endif 1252 1253 if (mScriptExec != nullptr) { 1254 delete mScriptExec; 1255 } 1256 if (mBoundAllocs) delete[] mBoundAllocs; 1257 if (mScriptSO) { 1258 dlclose(mScriptSO); 1259 } 1260} 1261 1262Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { 1263 if (!ptr) { 1264 return nullptr; 1265 } 1266 1267 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) { 1268 Allocation *a = mBoundAllocs[ct]; 1269 if (!a) continue; 1270 if (a->mHal.drvState.lod[0].mallocPtr == ptr) { 1271 return a; 1272 } 1273 } 1274 ALOGE("rsGetAllocation, failed to find %p", ptr); 1275 return nullptr; 1276} 1277 1278void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains, 1279 uint32_t inLen, Allocation * aout, 1280 const void * usr, uint32_t usrLen, 1281 const RsScriptCall *sc) {} 1282 1283void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains, 1284 uint32_t inLen, Allocation * aout, 1285 const void * usr, uint32_t usrLen, 1286 const RsScriptCall *sc) {} 1287 1288 1289} 1290} 1291