rsCpuScript.cpp revision d9bae689c1b8c3f2ed1a5f2b374dc9393584b8dd
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsCpuCore.h" 18#include "rsCpuScript.h" 19 20#ifdef RS_COMPATIBILITY_LIB 21 #include <stdio.h> 22 #include <sys/stat.h> 23 #include <unistd.h> 24#else 25 #include <bcc/BCCContext.h> 26 #include <bcc/Config/Config.h> 27 #include <bcc/Renderscript/RSCompilerDriver.h> 28 #include <bcc/Renderscript/RSInfo.h> 29 #include <bcinfo/MetadataExtractor.h> 30 #include <cutils/properties.h> 31 32 #include <sys/types.h> 33 #include <sys/wait.h> 34 #include <unistd.h> 35 36 #include <string> 37 #include <vector> 38#endif 39 40#include <set> 41#include <string> 42#include <dlfcn.h> 43#include <stdlib.h> 44#include <string.h> 45#include <fstream> 46#include <iostream> 47 48#ifdef __LP64__ 49#define SYSLIBPATH "/system/lib64" 50#else 51#define SYSLIBPATH "/system/lib" 52#endif 53 54namespace { 55 56// Create a len length string containing random characters from [A-Za-z0-9]. 57static std::string getRandomString(size_t len) { 58 char buf[len + 1]; 59 for (size_t i = 0; i < len; i++) { 60 uint32_t r = arc4random() & 0xffff; 61 r %= 62; 62 if (r < 26) { 63 // lowercase 64 buf[i] = 'a' + r; 65 } else if (r < 52) { 66 // uppercase 67 buf[i] = 'A' + (r - 26); 68 } else { 69 // Use a number 70 buf[i] = '0' + (r - 52); 71 } 72 } 73 buf[len] = '\0'; 74 return std::string(buf); 75} 76 77// Check if a path exists and attempt to create it if it doesn't. 78static bool ensureCacheDirExists(const char *path) { 79 if (access(path, R_OK | W_OK | X_OK) == 0) { 80 // Done if we can rwx the directory 81 return true; 82 } 83 if (mkdir(path, 0700) == 0) { 84 return true; 85 } 86 return false; 87} 88 89// Copy the file named \p srcFile to \p dstFile. 90// Return 0 on success and -1 if anything wasn't copied. 91static int copyFile(const char *dstFile, const char *srcFile) { 92 std::ifstream srcStream(srcFile); 93 if (!srcStream) { 94 ALOGE("Could not verify or read source file: %s", srcFile); 95 return -1; 96 } 97 std::ofstream dstStream(dstFile); 98 if (!dstStream) { 99 ALOGE("Could not verify or write destination file: %s", dstFile); 100 return -1; 101 } 102 dstStream << srcStream.rdbuf(); 103 if (!dstStream) { 104 ALOGE("Could not write destination file: %s", dstFile); 105 return -1; 106 } 107 108 srcStream.close(); 109 dstStream.close(); 110 111 return 0; 112} 113 114#define RS_CACHE_DIR "com.android.renderscript.cache" 115 116// Attempt to load the shared library from origName, but then fall back to 117// creating a copy of the shared library if necessary (to ensure instancing). 118// This function returns the dlopen()-ed handle if successful. 119static void *loadSOHelper(const char *origName, const char *cacheDir, 120 const char *resName) { 121 // Keep track of which .so libraries have been loaded. Once a library is 122 // in the set (per-process granularity), we must instead make a copy of 123 // the original shared object (randomly named .so file) and load that one 124 // instead. If we don't do this, we end up aliasing global data between 125 // the various Script instances (which are supposed to be completely 126 // independent). 127 static std::set<std::string> LoadedLibraries; 128 129 void *loaded = nullptr; 130 131 // Skip everything if we don't even have the original library available. 132 if (access(origName, F_OK) != 0) { 133 return nullptr; 134 } 135 136 // Common path is that we have not loaded this Script/library before. 137 if (LoadedLibraries.find(origName) == LoadedLibraries.end()) { 138 loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL); 139 if (loaded) { 140 LoadedLibraries.insert(origName); 141 } 142 return loaded; 143 } 144 145 std::string newName(cacheDir); 146 147 // Append RS_CACHE_DIR only if it is not found in cacheDir 148 // In driver mode, RS_CACHE_DIR is already appended to cacheDir. 149 if (newName.find(RS_CACHE_DIR) == std::string::npos) { 150 newName.append("/" RS_CACHE_DIR "/"); 151 } 152 153 if (!ensureCacheDirExists(newName.c_str())) { 154 ALOGE("Could not verify or create cache dir: %s", cacheDir); 155 return nullptr; 156 } 157 158 // Construct an appropriately randomized filename for the copy. 159 newName.append("librs."); 160 newName.append(resName); 161 newName.append("#"); 162 newName.append(getRandomString(6)); // 62^6 potential filename variants. 163 newName.append(".so"); 164 165 int r = copyFile(newName.c_str(), origName); 166 if (r != 0) { 167 ALOGE("Could not create copy %s -> %s", origName, newName.c_str()); 168 return nullptr; 169 } 170 loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL); 171 r = unlink(newName.c_str()); 172 if (r != 0) { 173 ALOGE("Could not unlink copy %s", newName.c_str()); 174 } 175 if (loaded) { 176 LoadedLibraries.insert(newName.c_str()); 177 } 178 179 return loaded; 180} 181 182static std::string findSharedObjectName(const char *cacheDir, 183 const char *resName) { 184 185#ifndef RS_SERVER 186 std::string scriptSOName(cacheDir); 187#ifdef RS_COMPATIBILITY_LIB 188 size_t cutPos = scriptSOName.rfind("cache"); 189 if (cutPos != std::string::npos) { 190 scriptSOName.erase(cutPos); 191 } else { 192 ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir); 193 } 194 scriptSOName.append("/lib/librs."); 195#else 196 scriptSOName.append("/librs."); 197#endif 198 199#else 200 std::string scriptSOName("lib"); 201#endif 202 scriptSOName.append(resName); 203 scriptSOName.append(".so"); 204 205 return scriptSOName; 206} 207 208// Load the shared library referred to by cacheDir and resName. If we have 209// already loaded this library, we instead create a new copy (in the 210// cache dir) and then load that. We then immediately destroy the copy. 211// This is required behavior to implement script instancing for the support 212// library, since shared objects are loaded and de-duped by name only. 213static void *loadSharedLibrary(const char *cacheDir, const char *resName) { 214 void *loaded = nullptr; 215 216 std::string scriptSOName = findSharedObjectName(cacheDir, resName); 217 218 // We should check if we can load the library from the standard app 219 // location for shared libraries first. 220 loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName); 221 222 if (loaded == nullptr) { 223 ALOGE("Unable to open shared library (%s): %s", 224 scriptSOName.c_str(), dlerror()); 225 226#ifdef RS_COMPATIBILITY_LIB 227 // One final attempt to find the library in "/system/lib". 228 // We do this to allow bundled applications to use the compatibility 229 // library fallback path. Those applications don't have a private 230 // library path, so they need to install to the system directly. 231 // Note that this is really just a testing path. 232 std::string scriptSONameSystem("/system/lib/librs."); 233 scriptSONameSystem.append(resName); 234 scriptSONameSystem.append(".so"); 235 loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir, 236 resName); 237 if (loaded == nullptr) { 238 ALOGE("Unable to open system shared library (%s): %s", 239 scriptSONameSystem.c_str(), dlerror()); 240 } 241#endif 242 } 243 244 return loaded; 245} 246 247#ifndef RS_COMPATIBILITY_LIB 248 249static bool is_force_recompile() { 250#ifdef RS_SERVER 251 return false; 252#else 253 char buf[PROPERTY_VALUE_MAX]; 254 255 // Re-compile if floating point precision has been overridden. 256 property_get("debug.rs.precision", buf, ""); 257 if (buf[0] != '\0') { 258 return true; 259 } 260 261 // Re-compile if debug.rs.forcerecompile is set. 262 property_get("debug.rs.forcerecompile", buf, "0"); 263 if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) { 264 return true; 265 } else { 266 return false; 267 } 268#endif // RS_SERVER 269} 270 271const static char *BCC_EXE_PATH = "/system/bin/bcc"; 272 273static void setCompileArguments(std::vector<const char*>* args, 274 const std::string& bcFileName, 275 const char* cacheDir, const char* resName, 276 const char* core_lib, bool useRSDebugContext, 277 const char* bccPluginName) { 278 rsAssert(cacheDir && resName && core_lib); 279 args->push_back(BCC_EXE_PATH); 280 args->push_back("-unroll-runtime"); 281 args->push_back("-scalarize-load-store"); 282 args->push_back("-o"); 283 args->push_back(resName); 284 args->push_back("-output_path"); 285 args->push_back(cacheDir); 286 args->push_back("-bclib"); 287 args->push_back(core_lib); 288 args->push_back("-mtriple"); 289 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 290 291 // Enable workaround for A53 codegen by default. 292#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND) 293 args->push_back("-aarch64-fix-cortex-a53-835769"); 294#endif 295 296 // Execute the bcc compiler. 297 if (useRSDebugContext) { 298 args->push_back("-rs-debug-ctx"); 299 } else { 300 // Only load additional libraries for compiles that don't use 301 // the debug context. 302 if (bccPluginName && strlen(bccPluginName) > 0) { 303 args->push_back("-load"); 304 args->push_back(bccPluginName); 305 } 306 } 307 308 args->push_back("-fPIC"); 309 args->push_back("-embedRSInfo"); 310 311 args->push_back(bcFileName.c_str()); 312 args->push_back(nullptr); 313} 314 315static bool compileBitcode(const std::string &bcFileName, 316 const char *bitcode, 317 size_t bitcodeSize, 318 const char **compileArguments, 319 const std::string &compileCommandLine) { 320 rsAssert(bitcode && bitcodeSize); 321 322 FILE *bcfile = fopen(bcFileName.c_str(), "w"); 323 if (!bcfile) { 324 ALOGE("Could not write to %s", bcFileName.c_str()); 325 return false; 326 } 327 size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile); 328 fclose(bcfile); 329 if (nwritten != bitcodeSize) { 330 ALOGE("Could not write %zu bytes to %s", bitcodeSize, 331 bcFileName.c_str()); 332 return false; 333 } 334 335 pid_t pid = fork(); 336 337 switch (pid) { 338 case -1: { // Error occurred (we attempt no recovery) 339 ALOGE("Couldn't fork for bcc compiler execution"); 340 return false; 341 } 342 case 0: { // Child process 343 ALOGV("Invoking BCC with: %s", compileCommandLine.c_str()); 344 execv(BCC_EXE_PATH, (char* const*)compileArguments); 345 346 ALOGE("execv() failed: %s", strerror(errno)); 347 abort(); 348 return false; 349 } 350 default: { // Parent process (actual driver) 351 // Wait on child process to finish compiling the source. 352 int status = 0; 353 pid_t w = waitpid(pid, &status, 0); 354 if (w == -1) { 355 ALOGE("Could not wait for bcc compiler"); 356 return false; 357 } 358 359 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 360 return true; 361 } 362 363 ALOGE("bcc compiler terminated unexpectedly"); 364 return false; 365 } 366 } 367} 368 369const static char *LD_EXE_PATH = "/system/bin/ld.mc"; 370 371static bool createSharedLib(const char *cacheDir, const char *resName) { 372 std::string sharedLibName = findSharedObjectName(cacheDir, resName); 373 std::string objFileName = cacheDir; 374 objFileName.append("/"); 375 objFileName.append(resName); 376 objFileName.append(".o"); 377 378 const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so"; 379 std::vector<const char *> args = { 380 LD_EXE_PATH, 381 "-shared", 382 "-nostdlib", 383 compiler_rt, 384 "-mtriple", DEFAULT_TARGET_TRIPLE_STRING, 385 "-L", SYSLIBPATH, 386 "-lRSDriver", "-lm", "-lc", 387 objFileName.c_str(), 388 "-o", sharedLibName.c_str(), 389 nullptr 390 }; 391 392 std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data()); 393 394 pid_t pid = fork(); 395 396 switch (pid) { 397 case -1: { // Error occurred (we attempt no recovery) 398 ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH); 399 return false; 400 } 401 case 0: { // Child process 402 ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str()); 403 execv(LD_EXE_PATH, (char* const*) args.data()); 404 405 ALOGE("execv() failed: %s", strerror(errno)); 406 abort(); 407 return false; 408 } 409 default: { // Parent process (actual driver) 410 // Wait on child process to finish compiling the source. 411 int status = 0; 412 pid_t w = waitpid(pid, &status, 0); 413 if (w == -1) { 414 ALOGE("Could not wait for linker (%s)", LD_EXE_PATH); 415 return false; 416 } 417 418 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 419 return true; 420 } 421 422 ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH); 423 return false; 424 } 425 } 426} 427#endif // !defined(RS_COMPATIBILITY_LIB) 428} // namespace 429 430namespace android { 431namespace renderscript { 432 433#define MAXLINE 500 434#define MAKE_STR_HELPER(S) #S 435#define MAKE_STR(S) MAKE_STR_HELPER(S) 436#define EXPORT_VAR_STR "exportVarCount: " 437#define EXPORT_FUNC_STR "exportFuncCount: " 438#define EXPORT_FOREACH_STR "exportForEachCount: " 439#define OBJECT_SLOT_STR "objectSlotCount: " 440 441// Copy up to a newline or size chars from str -> s, updating str 442// Returns s when successful and nullptr when '\0' is finally reached. 443static char* strgets(char *s, int size, const char **ppstr) { 444 if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) { 445 return nullptr; 446 } 447 448 int i; 449 for (i = 0; i < (size - 1); i++) { 450 s[i] = **ppstr; 451 (*ppstr)++; 452 if (s[i] == '\0') { 453 return s; 454 } else if (s[i] == '\n') { 455 s[i+1] = '\0'; 456 return s; 457 } 458 } 459 460 // size has been exceeded. 461 s[i] = '\0'; 462 463 return s; 464} 465 466RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) { 467 mCtx = ctx; 468 mScript = s; 469 470 mScriptSO = nullptr; 471 472#ifndef RS_COMPATIBILITY_LIB 473 mCompilerDriver = nullptr; 474#endif 475 476 477 mRoot = nullptr; 478 mRootExpand = nullptr; 479 mInit = nullptr; 480 mFreeChildren = nullptr; 481 mScriptExec = nullptr; 482 483 mBoundAllocs = nullptr; 484 mIntrinsicData = nullptr; 485 mIsThreadable = true; 486} 487 488bool RsdCpuScriptImpl::storeRSInfoFromSO() { 489 mRoot = (RootFunc_t) dlsym(mScriptSO, "root"); 490 if (mRoot) { 491 //ALOGE("Found root(): %p", mRoot); 492 } 493 mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand"); 494 if (mRootExpand) { 495 //ALOGE("Found root.expand(): %p", mRootExpand); 496 } 497 mInit = (InvokeFunc_t) dlsym(mScriptSO, "init"); 498 if (mInit) { 499 //ALOGE("Found init(): %p", mInit); 500 } 501 mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor"); 502 if (mFreeChildren) { 503 //ALOGE("Found .rs.dtor(): %p", mFreeChildren); 504 } 505 506 mScriptExec = ScriptExecutable::createFromSharedObject( 507 mCtx->getContext(), mScriptSO); 508 509 if (mScriptExec == nullptr) { 510 return false; 511 } 512 513 size_t varCount = mScriptExec->getExportedVariableCount(); 514 if (varCount > 0) { 515 mBoundAllocs = new Allocation *[varCount]; 516 memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs)); 517 } 518 519 return true; 520} 521 522ScriptExecutable* ScriptExecutable::createFromSharedObject( 523 Context* RSContext, void* sharedObj) { 524 char line[MAXLINE]; 525 526 size_t varCount = 0; 527 size_t funcCount = 0; 528 size_t forEachCount = 0; 529 size_t objectSlotCount = 0; 530 531 const char *rsInfo = (const char *) dlsym(sharedObj, ".rs.info"); 532 533 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 534 return nullptr; 535 } 536 if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) { 537 ALOGE("Invalid export var count!: %s", line); 538 return nullptr; 539 } 540 541 std::vector<void*> fieldAddress; 542 543 for (size_t i = 0; i < varCount; ++i) { 544 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 545 return nullptr; 546 } 547 char *c = strrchr(line, '\n'); 548 if (c) { 549 *c = '\0'; 550 } 551 void* addr = dlsym(sharedObj, line); 552 if (addr == nullptr) { 553 ALOGE("Failed to find variable address for %s: %s", 554 line, dlerror()); 555 // Not a critical error if we don't find a global variable. 556 } 557 fieldAddress.push_back(addr); 558 } 559 560 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 561 return nullptr; 562 } 563 if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) { 564 ALOGE("Invalid export func count!: %s", line); 565 return nullptr; 566 } 567 568 std::vector<InvokeFunc_t> invokeFunctions(funcCount); 569 570 for (size_t i = 0; i < funcCount; ++i) { 571 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 572 return nullptr ; 573 } 574 char *c = strrchr(line, '\n'); 575 if (c) { 576 *c = '\0'; 577 } 578 579 invokeFunctions[i] = (InvokeFunc_t) dlsym(sharedObj, line); 580 if (invokeFunctions[i] == nullptr) { 581 ALOGE("Failed to get function address for %s(): %s", 582 line, dlerror()); 583 return nullptr; 584 } 585 } 586 587 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 588 return nullptr; 589 } 590 if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) { 591 ALOGE("Invalid export forEach count!: %s", line); 592 return nullptr; 593 } 594 595 std::vector<ForEachFunc_t> forEachFunctions(forEachCount); 596 std::vector<uint32_t> forEachSignatures(forEachCount); 597 598 for (size_t i = 0; i < forEachCount; ++i) { 599 unsigned int tmpSig = 0; 600 char tmpName[MAXLINE]; 601 602 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 603 return nullptr; 604 } 605 if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s", 606 &tmpSig, tmpName) != 2) { 607 ALOGE("Invalid export forEach!: %s", line); 608 return nullptr; 609 } 610 611 // Lookup the expanded ForEach kernel. 612 strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName)); 613 forEachSignatures[i] = tmpSig; 614 forEachFunctions[i] = 615 (ForEachFunc_t) dlsym(sharedObj, tmpName); 616 if (i != 0 && forEachFunctions[i] == nullptr) { 617 // Ignore missing root.expand functions. 618 // root() is always specified at location 0. 619 ALOGE("Failed to find forEach function address for %s: %s", 620 tmpName, dlerror()); 621 return nullptr; 622 } 623 } 624 625 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 626 return nullptr; 627 } 628 if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) { 629 ALOGE("Invalid object slot count!: %s", line); 630 return nullptr; 631 } 632 633 std::vector<bool> fieldIsObject(varCount, false); 634 635 rsAssert(varCount > 0); 636 for (size_t i = 0; i < objectSlotCount; ++i) { 637 uint32_t varNum = 0; 638 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 639 return nullptr; 640 } 641 if (sscanf(line, "%u", &varNum) != 1) { 642 ALOGE("Invalid object slot!: %s", line); 643 return nullptr; 644 } 645 646 if (varNum < varCount) { 647 fieldIsObject[varNum] = true; 648 } 649 } 650 651 return new ScriptExecutable( 652 RSContext, fieldAddress, fieldIsObject, invokeFunctions, 653 forEachFunctions, forEachSignatures); 654} 655 656bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir, 657 uint8_t const *bitcode, size_t bitcodeSize, 658 uint32_t flags, char const *bccPluginName) { 659 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 660 //ALOGE("rsdScriptInit %p %p", rsc, script); 661 662 mCtx->lockMutex(); 663#ifndef RS_COMPATIBILITY_LIB 664 bool useRSDebugContext = false; 665 666 mCompilerDriver = nullptr; 667 668 mCompilerDriver = new bcc::RSCompilerDriver(); 669 if (mCompilerDriver == nullptr) { 670 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 671 mCtx->unlockMutex(); 672 return false; 673 } 674 675 // Run any compiler setup functions we have been provided with. 676 RSSetupCompilerCallback setupCompilerCallback = 677 mCtx->getSetupCompilerCallback(); 678 if (setupCompilerCallback != nullptr) { 679 setupCompilerCallback(mCompilerDriver); 680 } 681 682 bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize); 683 if (!bitcodeMetadata.extract()) { 684 ALOGE("Could not extract metadata from bitcode"); 685 mCtx->unlockMutex(); 686 return false; 687 } 688 689 const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize); 690 691 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 692 mCompilerDriver->setDebugContext(true); 693 useRSDebugContext = true; 694 } 695 696 std::string bcFileName(cacheDir); 697 bcFileName.append("/"); 698 bcFileName.append(resName); 699 bcFileName.append(".bc"); 700 701 std::vector<const char*> compileArguments; 702 setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib, 703 useRSDebugContext, bccPluginName); 704 // The last argument of compileArguments ia a nullptr, so remove 1 from the size. 705 std::string compileCommandLine = 706 bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data()); 707 708 if (!is_force_recompile()) { 709 mScriptSO = loadSharedLibrary(cacheDir, resName); 710 } 711 712 // If we can't, it's either not there or out of date. We compile the bit code and try loading 713 // again. 714 if (mScriptSO == nullptr) { 715 if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize, 716 compileArguments.data(), compileCommandLine)) 717 { 718 ALOGE("bcc: FAILS to compile '%s'", resName); 719 mCtx->unlockMutex(); 720 return false; 721 } 722 723 if (!createSharedLib(cacheDir, resName)) { 724 ALOGE("Linker: Failed to link object file '%s'", resName); 725 mCtx->unlockMutex(); 726 return false; 727 } 728 729 mScriptSO = loadSharedLibrary(cacheDir, resName); 730 if (mScriptSO == nullptr) { 731 ALOGE("Unable to load '%s'", resName); 732 mCtx->unlockMutex(); 733 return false; 734 } 735 } 736 737 // Read RS symbol information from the .so. 738 if ( !mScriptSO) { 739 goto error; 740 } 741 742 if ( !storeRSInfoFromSO()) { 743 goto error; 744 } 745#else // RS_COMPATIBILITY_LIB is defined 746 747 mScriptSO = loadSharedLibrary(cacheDir, resName); 748 749 if (!mScriptSO) { 750 goto error; 751 } 752 753 if (!storeRSInfoFromSO()) { 754 goto error; 755 } 756#endif 757 mCtx->unlockMutex(); 758 return true; 759 760error: 761 762 mCtx->unlockMutex(); 763 if (mScriptSO) { 764 dlclose(mScriptSO); 765 } 766 return false; 767} 768 769#ifndef RS_COMPATIBILITY_LIB 770 771const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode, 772 size_t bitcodeSize) { 773 const char* defaultLib = SYSLIBPATH"/libclcore.bc"; 774 775 // If we're debugging, use the debug library. 776 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 777 return SYSLIBPATH"/libclcore_debug.bc"; 778 } 779 780 // If a callback has been registered to specify a library, use that. 781 RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback(); 782 if (selectRTCallback != nullptr) { 783 return selectRTCallback((const char*)bitcode, bitcodeSize); 784 } 785 786 // Check for a platform specific library 787#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 788 enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision(); 789 if (prec == bcinfo::RS_FP_Relaxed) { 790 // NEON-capable ARMv7a devices can use an accelerated math library 791 // for all reduced precision scripts. 792 // ARMv8 does not use NEON, as ASIMD can be used with all precision 793 // levels. 794 return SYSLIBPATH"/libclcore_neon.bc"; 795 } else { 796 return defaultLib; 797 } 798#elif defined(__i386__) || defined(__x86_64__) 799 // x86 devices will use an optimized library. 800 return SYSLIBPATH"/libclcore_x86.bc"; 801#else 802 return defaultLib; 803#endif 804} 805 806#endif 807 808void RsdCpuScriptImpl::populateScript(Script *script) { 809 // Copy info over to runtime 810 script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount(); 811 script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount(); 812 script->mHal.info.exportedPragmaCount = 0; 813 script->mHal.info.exportedPragmaKeyList = 0; 814 script->mHal.info.exportedPragmaValueList = 0; 815 816 // Bug, need to stash in metadata 817 if (mRootExpand) { 818 script->mHal.info.root = mRootExpand; 819 } else { 820 script->mHal.info.root = mRoot; 821 } 822} 823 824 825typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 826 827void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, 828 uint32_t inLen, 829 Allocation * aout, 830 const void * usr, uint32_t usrLen, 831 const RsScriptCall *sc, 832 MTLaunchStruct *mtls) { 833 834 memset(mtls, 0, sizeof(MTLaunchStruct)); 835 836 for (int index = inLen; --index >= 0;) { 837 const Allocation* ain = ains[index]; 838 839 // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface 840 if (ain != nullptr && 841 (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) { 842 843 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 844 "rsForEach called with null in allocations"); 845 return; 846 } 847 } 848 849 if (aout && 850 (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) { 851 852 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 853 "rsForEach called with null out allocations"); 854 return; 855 } 856 857 if (inLen > 0) { 858 const Allocation *ain0 = ains[0]; 859 const Type *inType = ain0->getType(); 860 861 mtls->fep.dim.x = inType->getDimX(); 862 mtls->fep.dim.y = inType->getDimY(); 863 mtls->fep.dim.z = inType->getDimZ(); 864 865 for (int Index = inLen; --Index >= 1;) { 866 if (!ain0->hasSameDims(ains[Index])) { 867 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 868 "Failed to launch kernel; dimensions of input and output allocations do not match."); 869 870 return; 871 } 872 } 873 874 } else if (aout != nullptr) { 875 const Type *outType = aout->getType(); 876 877 mtls->fep.dim.x = outType->getDimX(); 878 mtls->fep.dim.y = outType->getDimY(); 879 mtls->fep.dim.z = outType->getDimZ(); 880 881 } else { 882 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 883 "rsForEach called with null allocations"); 884 return; 885 } 886 887 if (inLen > 0 && aout != nullptr) { 888 if (!ains[0]->hasSameDims(aout)) { 889 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 890 "Failed to launch kernel; dimensions of input and output allocations do not match."); 891 892 return; 893 } 894 } 895 896 if (!sc || (sc->xEnd == 0)) { 897 mtls->xEnd = mtls->fep.dim.x; 898 } else { 899 rsAssert(sc->xStart < mtls->fep.dim.x); 900 rsAssert(sc->xEnd <= mtls->fep.dim.x); 901 rsAssert(sc->xStart < sc->xEnd); 902 mtls->xStart = rsMin(mtls->fep.dim.x, sc->xStart); 903 mtls->xEnd = rsMin(mtls->fep.dim.x, sc->xEnd); 904 if (mtls->xStart >= mtls->xEnd) return; 905 } 906 907 if (!sc || (sc->yEnd == 0)) { 908 mtls->yEnd = mtls->fep.dim.y; 909 } else { 910 rsAssert(sc->yStart < mtls->fep.dim.y); 911 rsAssert(sc->yEnd <= mtls->fep.dim.y); 912 rsAssert(sc->yStart < sc->yEnd); 913 mtls->yStart = rsMin(mtls->fep.dim.y, sc->yStart); 914 mtls->yEnd = rsMin(mtls->fep.dim.y, sc->yEnd); 915 if (mtls->yStart >= mtls->yEnd) return; 916 } 917 918 if (!sc || (sc->zEnd == 0)) { 919 mtls->zEnd = mtls->fep.dim.z; 920 } else { 921 rsAssert(sc->zStart < mtls->fep.dim.z); 922 rsAssert(sc->zEnd <= mtls->fep.dim.z); 923 rsAssert(sc->zStart < sc->zEnd); 924 mtls->zStart = rsMin(mtls->fep.dim.z, sc->zStart); 925 mtls->zEnd = rsMin(mtls->fep.dim.z, sc->zEnd); 926 if (mtls->zStart >= mtls->zEnd) return; 927 } 928 929 mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); 930 mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); 931 mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); 932 mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); 933 934 rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0)); 935 936 mtls->rsc = mCtx; 937 if (ains) { 938 memcpy(mtls->ains, ains, inLen * sizeof(ains[0])); 939 } 940 mtls->aout[0] = aout; 941 mtls->fep.usr = usr; 942 mtls->fep.usrLen = usrLen; 943 mtls->mSliceSize = 1; 944 mtls->mSliceNum = 0; 945 946 mtls->isThreadable = mIsThreadable; 947 948 if (inLen > 0) { 949 mtls->fep.inLen = inLen; 950 for (int index = inLen; --index >= 0;) { 951 mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr; 952 mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes(); 953 } 954 } 955 956 if (aout != nullptr) { 957 mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; 958 mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes(); 959 } 960} 961 962 963void RsdCpuScriptImpl::invokeForEach(uint32_t slot, 964 const Allocation ** ains, 965 uint32_t inLen, 966 Allocation * aout, 967 const void * usr, 968 uint32_t usrLen, 969 const RsScriptCall *sc) { 970 971 MTLaunchStruct mtls; 972 973 forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls); 974 forEachKernelSetup(slot, &mtls); 975 976 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 977 mCtx->launchThreads(ains, inLen, aout, sc, &mtls); 978 mCtx->setTLS(oldTLS); 979} 980 981void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { 982 mtls->script = this; 983 mtls->fep.slot = slot; 984 mtls->kernel = mScriptExec->getForEachFunction(slot); 985 rsAssert(mtls->kernel != nullptr); 986 mtls->sig = mScriptExec->getForEachSignature(slot); 987} 988 989int RsdCpuScriptImpl::invokeRoot() { 990 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 991 int ret = mRoot(); 992 mCtx->setTLS(oldTLS); 993 return ret; 994} 995 996void RsdCpuScriptImpl::invokeInit() { 997 if (mInit) { 998 mInit(); 999 } 1000} 1001 1002void RsdCpuScriptImpl::invokeFreeChildren() { 1003 if (mFreeChildren) { 1004 mFreeChildren(); 1005 } 1006} 1007 1008void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, 1009 size_t paramLength) { 1010 //ALOGE("invoke %i %p %zu", slot, params, paramLength); 1011 void * ap = nullptr; 1012 1013#if defined(__x86_64__) 1014 // The invoked function could have input parameter of vector type for example float4 which 1015 // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform. 1016 // So try to align void* params before passing them into RS exported function. 1017 1018 if ((uint8_t)(uint64_t)params & 0x0F) { 1019 if ((ap = (void*)memalign(16, paramLength)) != nullptr) { 1020 memcpy(ap, params, paramLength); 1021 } else { 1022 ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned."); 1023 } 1024 } 1025#endif 1026 1027 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 1028 reinterpret_cast<void (*)(const void *, uint32_t)>( 1029 mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength); 1030 1031 mCtx->setTLS(oldTLS); 1032} 1033 1034void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 1035 //rsAssert(!script->mFieldIsObject[slot]); 1036 //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength); 1037 1038 //if (mIntrinsicID) { 1039 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); 1040 //return; 1041 //} 1042 1043 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1044 if (!destPtr) { 1045 //ALOGV("Calling setVar on slot = %i which is null", slot); 1046 return; 1047 } 1048 1049 memcpy(destPtr, data, dataLength); 1050} 1051 1052void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) { 1053 //rsAssert(!script->mFieldIsObject[slot]); 1054 //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength); 1055 1056 int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1057 if (!srcPtr) { 1058 //ALOGV("Calling setVar on slot = %i which is null", slot); 1059 return; 1060 } 1061 memcpy(data, srcPtr, dataLength); 1062} 1063 1064 1065void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, 1066 const Element *elem, 1067 const uint32_t *dims, size_t dimLength) { 1068 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1069 if (!destPtr) { 1070 //ALOGV("Calling setVar on slot = %i which is null", slot); 1071 return; 1072 } 1073 1074 // We want to look at dimension in terms of integer components, 1075 // but dimLength is given in terms of bytes. 1076 dimLength /= sizeof(int); 1077 1078 // Only a single dimension is currently supported. 1079 rsAssert(dimLength == 1); 1080 if (dimLength == 1) { 1081 // First do the increment loop. 1082 size_t stride = elem->getSizeBytes(); 1083 const char *cVal = reinterpret_cast<const char *>(data); 1084 for (uint32_t i = 0; i < dims[0]; i++) { 1085 elem->incRefs(cVal); 1086 cVal += stride; 1087 } 1088 1089 // Decrement loop comes after (to prevent race conditions). 1090 char *oldVal = reinterpret_cast<char *>(destPtr); 1091 for (uint32_t i = 0; i < dims[0]; i++) { 1092 elem->decRefs(oldVal); 1093 oldVal += stride; 1094 } 1095 } 1096 1097 memcpy(destPtr, data, dataLength); 1098} 1099 1100void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) { 1101 1102 //rsAssert(!script->mFieldIsObject[slot]); 1103 //ALOGE("setGlobalBind %i %p", slot, data); 1104 1105 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1106 if (!destPtr) { 1107 //ALOGV("Calling setVar on slot = %i which is null", slot); 1108 return; 1109 } 1110 1111 void *ptr = nullptr; 1112 mBoundAllocs[slot] = data; 1113 if (data) { 1114 ptr = data->mHal.drvState.lod[0].mallocPtr; 1115 } 1116 memcpy(destPtr, &ptr, sizeof(void *)); 1117} 1118 1119void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { 1120 1121 //rsAssert(script->mFieldIsObject[slot]); 1122 //ALOGE("setGlobalObj %i %p", slot, data); 1123 1124 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 1125 if (!destPtr) { 1126 //ALOGV("Calling setVar on slot = %i which is null", slot); 1127 return; 1128 } 1129 1130 rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data); 1131} 1132 1133RsdCpuScriptImpl::~RsdCpuScriptImpl() { 1134#ifndef RS_COMPATIBILITY_LIB 1135 if (mCompilerDriver) { 1136 delete mCompilerDriver; 1137 } 1138#endif 1139 1140 if (mScriptExec != nullptr) { 1141 delete mScriptExec; 1142 } 1143 if (mBoundAllocs) delete[] mBoundAllocs; 1144 if (mScriptSO) { 1145 dlclose(mScriptSO); 1146 } 1147} 1148 1149Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { 1150 if (!ptr) { 1151 return nullptr; 1152 } 1153 1154 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) { 1155 Allocation *a = mBoundAllocs[ct]; 1156 if (!a) continue; 1157 if (a->mHal.drvState.lod[0].mallocPtr == ptr) { 1158 return a; 1159 } 1160 } 1161 ALOGE("rsGetAllocation, failed to find %p", ptr); 1162 return nullptr; 1163} 1164 1165void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains, 1166 uint32_t inLen, Allocation * aout, 1167 const void * usr, uint32_t usrLen, 1168 const RsScriptCall *sc) {} 1169 1170void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains, 1171 uint32_t inLen, Allocation * aout, 1172 const void * usr, uint32_t usrLen, 1173 const RsScriptCall *sc) {} 1174 1175 1176} 1177} 1178