rsCpuScript.cpp revision 45e753a46e587c69b3b0d0c5138e88715a24a29a
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsCpuCore.h" 18#include "rsCpuScript.h" 19 20#ifdef RS_COMPATIBILITY_LIB 21 #include <stdio.h> 22 #include <sys/stat.h> 23 #include <unistd.h> 24#else 25 #include <bcc/BCCContext.h> 26 #include <bcc/Config/Config.h> 27 #include <bcc/Renderscript/RSCompilerDriver.h> 28 #include <bcc/Renderscript/RSInfo.h> 29 #include <bcinfo/MetadataExtractor.h> 30 #include <cutils/properties.h> 31 32 #include <sys/types.h> 33 #include <sys/wait.h> 34 #include <unistd.h> 35 36 #include <string> 37 #include <vector> 38#endif 39 40#include <set> 41#include <string> 42#include <dlfcn.h> 43#include <stdlib.h> 44#include <string.h> 45#include <fstream> 46#include <iostream> 47 48#ifdef __LP64__ 49#define SYSLIBPATH "/system/lib64" 50#else 51#define SYSLIBPATH "/system/lib" 52#endif 53 54namespace { 55 56// Create a len length string containing random characters from [A-Za-z0-9]. 57static std::string getRandomString(size_t len) { 58 char buf[len + 1]; 59 for (size_t i = 0; i < len; i++) { 60 uint32_t r = arc4random() & 0xffff; 61 r %= 62; 62 if (r < 26) { 63 // lowercase 64 buf[i] = 'a' + r; 65 } else if (r < 52) { 66 // uppercase 67 buf[i] = 'A' + (r - 26); 68 } else { 69 // Use a number 70 buf[i] = '0' + (r - 52); 71 } 72 } 73 buf[len] = '\0'; 74 return std::string(buf); 75} 76 77// Check if a path exists and attempt to create it if it doesn't. 78static bool ensureCacheDirExists(const char *path) { 79 if (access(path, R_OK | W_OK | X_OK) == 0) { 80 // Done if we can rwx the directory 81 return true; 82 } 83 if (mkdir(path, 0700) == 0) { 84 return true; 85 } 86 return false; 87} 88 89// Copy the file named \p srcFile to \p dstFile. 90// Return 0 on success and -1 if anything wasn't copied. 91static int copyFile(const char *dstFile, const char *srcFile) { 92 std::ifstream srcStream(srcFile); 93 if (!srcStream) { 94 ALOGE("Could not verify or read source file: %s", srcFile); 95 return -1; 96 } 97 std::ofstream dstStream(dstFile); 98 if (!dstStream) { 99 ALOGE("Could not verify or write destination file: %s", dstFile); 100 return -1; 101 } 102 dstStream << srcStream.rdbuf(); 103 if (!dstStream) { 104 ALOGE("Could not write destination file: %s", dstFile); 105 return -1; 106 } 107 108 srcStream.close(); 109 dstStream.close(); 110 111 return 0; 112} 113 114#define RS_CACHE_DIR "com.android.renderscript.cache" 115 116// Attempt to load the shared library from origName, but then fall back to 117// creating a copy of the shared library if necessary (to ensure instancing). 118// This function returns the dlopen()-ed handle if successful. 119static void *loadSOHelper(const char *origName, const char *cacheDir, 120 const char *resName) { 121 // Keep track of which .so libraries have been loaded. Once a library is 122 // in the set (per-process granularity), we must instead make a copy of 123 // the original shared object (randomly named .so file) and load that one 124 // instead. If we don't do this, we end up aliasing global data between 125 // the various Script instances (which are supposed to be completely 126 // independent). 127 static std::set<std::string> LoadedLibraries; 128 129 void *loaded = nullptr; 130 131 // Skip everything if we don't even have the original library available. 132 if (access(origName, F_OK) != 0) { 133 return nullptr; 134 } 135 136 // Common path is that we have not loaded this Script/library before. 137 if (LoadedLibraries.find(origName) == LoadedLibraries.end()) { 138 loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL); 139 if (loaded) { 140 LoadedLibraries.insert(origName); 141 } 142 return loaded; 143 } 144 145 std::string newName(cacheDir); 146 147 // Append RS_CACHE_DIR only if it is not found in cacheDir 148 // In driver mode, RS_CACHE_DIR is already appended to cacheDir. 149 if (newName.find(RS_CACHE_DIR) == std::string::npos) { 150 newName.append("/" RS_CACHE_DIR "/"); 151 } 152 153 if (!ensureCacheDirExists(newName.c_str())) { 154 ALOGE("Could not verify or create cache dir: %s", cacheDir); 155 return nullptr; 156 } 157 158 // Construct an appropriately randomized filename for the copy. 159 newName.append("librs."); 160 newName.append(resName); 161 newName.append("#"); 162 newName.append(getRandomString(6)); // 62^6 potential filename variants. 163 newName.append(".so"); 164 165 int r = copyFile(newName.c_str(), origName); 166 if (r != 0) { 167 ALOGE("Could not create copy %s -> %s", origName, newName.c_str()); 168 return nullptr; 169 } 170 loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL); 171 r = unlink(newName.c_str()); 172 if (r != 0) { 173 ALOGE("Could not unlink copy %s", newName.c_str()); 174 } 175 if (loaded) { 176 LoadedLibraries.insert(newName.c_str()); 177 } 178 179 return loaded; 180} 181 182static std::string findSharedObjectName(const char *cacheDir, 183 const char *resName) { 184 185#ifndef RS_SERVER 186 std::string scriptSOName(cacheDir); 187#ifdef RS_COMPATIBILITY_LIB 188 size_t cutPos = scriptSOName.rfind("cache"); 189 if (cutPos != std::string::npos) { 190 scriptSOName.erase(cutPos); 191 } else { 192 ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir); 193 } 194 scriptSOName.append("/lib/librs."); 195#else 196 scriptSOName.append("/librs."); 197#endif 198 199#else 200 std::string scriptSOName("lib"); 201#endif 202 scriptSOName.append(resName); 203 scriptSOName.append(".so"); 204 205 return scriptSOName; 206} 207 208// Load the shared library referred to by cacheDir and resName. If we have 209// already loaded this library, we instead create a new copy (in the 210// cache dir) and then load that. We then immediately destroy the copy. 211// This is required behavior to implement script instancing for the support 212// library, since shared objects are loaded and de-duped by name only. 213static void *loadSharedLibrary(const char *cacheDir, const char *resName) { 214 void *loaded = nullptr; 215 216 std::string scriptSOName = findSharedObjectName(cacheDir, resName); 217 218 // We should check if we can load the library from the standard app 219 // location for shared libraries first. 220 loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName); 221 222 if (loaded == nullptr) { 223 ALOGE("Unable to open shared library (%s): %s", 224 scriptSOName.c_str(), dlerror()); 225 226 // One final attempt to find the library in "/system/lib". 227 // We do this to allow bundled applications to use the compatibility 228 // library fallback path. Those applications don't have a private 229 // library path, so they need to install to the system directly. 230 // Note that this is really just a testing path. 231 std::string scriptSONameSystem("/system/lib/librs."); 232 scriptSONameSystem.append(resName); 233 scriptSONameSystem.append(".so"); 234 loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir, 235 resName); 236 if (loaded == nullptr) { 237 ALOGE("Unable to open system shared library (%s): %s", 238 scriptSONameSystem.c_str(), dlerror()); 239 } 240 } 241 242 return loaded; 243} 244 245#ifndef RS_COMPATIBILITY_LIB 246 247static bool is_force_recompile() { 248#ifdef RS_SERVER 249 return false; 250#else 251 char buf[PROPERTY_VALUE_MAX]; 252 253 // Re-compile if floating point precision has been overridden. 254 property_get("debug.rs.precision", buf, ""); 255 if (buf[0] != '\0') { 256 return true; 257 } 258 259 // Re-compile if debug.rs.forcerecompile is set. 260 property_get("debug.rs.forcerecompile", buf, "0"); 261 if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) { 262 return true; 263 } else { 264 return false; 265 } 266#endif // RS_SERVER 267} 268 269const static char *BCC_EXE_PATH = "/system/bin/bcc"; 270 271static void setCompileArguments(std::vector<const char*>* args, 272 const std::string& bcFileName, 273 const char* cacheDir, const char* resName, 274 const char* core_lib, bool useRSDebugContext, 275 const char* bccPluginName) { 276 rsAssert(cacheDir && resName && core_lib); 277 args->push_back(BCC_EXE_PATH); 278 args->push_back("-unroll-runtime"); 279 args->push_back("-scalarize-load-store"); 280 args->push_back("-o"); 281 args->push_back(resName); 282 args->push_back("-output_path"); 283 args->push_back(cacheDir); 284 args->push_back("-bclib"); 285 args->push_back(core_lib); 286 args->push_back("-mtriple"); 287 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 288 289 // Enable workaround for A53 codegen by default. 290#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND) 291 args->push_back("-aarch64-fix-cortex-a53-835769"); 292#endif 293 294 // Execute the bcc compiler. 295 if (useRSDebugContext) { 296 args->push_back("-rs-debug-ctx"); 297 } else { 298 // Only load additional libraries for compiles that don't use 299 // the debug context. 300 if (bccPluginName && strlen(bccPluginName) > 0) { 301 args->push_back("-load"); 302 args->push_back(bccPluginName); 303 } 304 } 305 306 args->push_back("-fPIC"); 307 args->push_back("-embedRSInfo"); 308 309 args->push_back(bcFileName.c_str()); 310 args->push_back(nullptr); 311} 312 313static bool compileBitcode(const std::string &bcFileName, 314 const char *bitcode, 315 size_t bitcodeSize, 316 const char **compileArguments, 317 const std::string &compileCommandLine) { 318 rsAssert(bitcode && bitcodeSize); 319 320 FILE *bcfile = fopen(bcFileName.c_str(), "w"); 321 if (!bcfile) { 322 ALOGE("Could not write to %s", bcFileName.c_str()); 323 return false; 324 } 325 size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile); 326 fclose(bcfile); 327 if (nwritten != bitcodeSize) { 328 ALOGE("Could not write %zu bytes to %s", bitcodeSize, 329 bcFileName.c_str()); 330 return false; 331 } 332 333 pid_t pid = fork(); 334 335 switch (pid) { 336 case -1: { // Error occurred (we attempt no recovery) 337 ALOGE("Couldn't fork for bcc compiler execution"); 338 return false; 339 } 340 case 0: { // Child process 341 ALOGV("Invoking BCC with: %s", compileCommandLine.c_str()); 342 execv(BCC_EXE_PATH, (char* const*)compileArguments); 343 344 ALOGE("execv() failed: %s", strerror(errno)); 345 abort(); 346 return false; 347 } 348 default: { // Parent process (actual driver) 349 // Wait on child process to finish compiling the source. 350 int status = 0; 351 pid_t w = waitpid(pid, &status, 0); 352 if (w == -1) { 353 ALOGE("Could not wait for bcc compiler"); 354 return false; 355 } 356 357 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 358 return true; 359 } 360 361 ALOGE("bcc compiler terminated unexpectedly"); 362 return false; 363 } 364 } 365} 366 367const static char *LD_EXE_PATH = "/system/bin/ld.mc"; 368 369static bool createSharedLib(const char *cacheDir, const char *resName) { 370 std::string sharedLibName = findSharedObjectName(cacheDir, resName); 371 std::string objFileName = cacheDir; 372 objFileName.append("/"); 373 objFileName.append(resName); 374 objFileName.append(".o"); 375 376 const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so"; 377 std::vector<const char *> args = { 378 LD_EXE_PATH, 379 "-shared", 380 "-nostdlib", 381 compiler_rt, 382 "-mtriple", DEFAULT_TARGET_TRIPLE_STRING, 383 "-L", SYSLIBPATH, 384 "-lRSDriver", "-lm", "-lc", 385 objFileName.c_str(), 386 "-o", sharedLibName.c_str(), 387 nullptr 388 }; 389 390 std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data()); 391 392 pid_t pid = fork(); 393 394 switch (pid) { 395 case -1: { // Error occurred (we attempt no recovery) 396 ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH); 397 return false; 398 } 399 case 0: { // Child process 400 ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str()); 401 execv(LD_EXE_PATH, (char* const*) args.data()); 402 403 ALOGE("execv() failed: %s", strerror(errno)); 404 abort(); 405 return false; 406 } 407 default: { // Parent process (actual driver) 408 // Wait on child process to finish compiling the source. 409 int status = 0; 410 pid_t w = waitpid(pid, &status, 0); 411 if (w == -1) { 412 ALOGE("Could not wait for linker (%s)", LD_EXE_PATH); 413 return false; 414 } 415 416 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { 417 return true; 418 } 419 420 ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH); 421 return false; 422 } 423 } 424} 425#endif // !defined(RS_COMPATIBILITY_LIB) 426} // namespace 427 428namespace android { 429namespace renderscript { 430 431#define MAXLINE 500 432#define MAKE_STR_HELPER(S) #S 433#define MAKE_STR(S) MAKE_STR_HELPER(S) 434#define EXPORT_VAR_STR "exportVarCount: " 435#define EXPORT_FUNC_STR "exportFuncCount: " 436#define EXPORT_FOREACH_STR "exportForEachCount: " 437#define OBJECT_SLOT_STR "objectSlotCount: " 438 439// Copy up to a newline or size chars from str -> s, updating str 440// Returns s when successful and nullptr when '\0' is finally reached. 441static char* strgets(char *s, int size, const char **ppstr) { 442 if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) { 443 return nullptr; 444 } 445 446 int i; 447 for (i = 0; i < (size - 1); i++) { 448 s[i] = **ppstr; 449 (*ppstr)++; 450 if (s[i] == '\0') { 451 return s; 452 } else if (s[i] == '\n') { 453 s[i+1] = '\0'; 454 return s; 455 } 456 } 457 458 // size has been exceeded. 459 s[i] = '\0'; 460 461 return s; 462} 463 464RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) { 465 mCtx = ctx; 466 mScript = s; 467 468 mScriptSO = nullptr; 469 470 mInvokeFunctions = nullptr; 471 mForEachFunctions = nullptr; 472 mFieldAddress = nullptr; 473 mFieldIsObject = nullptr; 474 mForEachSignatures = nullptr; 475 476#ifndef RS_COMPATIBILITY_LIB 477 mCompilerDriver = nullptr; 478#endif 479 480 481 mRoot = nullptr; 482 mRootExpand = nullptr; 483 mInit = nullptr; 484 mFreeChildren = nullptr; 485 486 487 mBoundAllocs = nullptr; 488 mIntrinsicData = nullptr; 489 mIsThreadable = true; 490} 491 492bool RsdCpuScriptImpl::storeRSInfoFromSO() { 493 char line[MAXLINE]; 494 size_t varCount = 0; 495 size_t funcCount = 0; 496 size_t forEachCount = 0; 497 size_t objectSlotCount = 0; 498 499 mRoot = (RootFunc_t) dlsym(mScriptSO, "root"); 500 if (mRoot) { 501 //ALOGE("Found root(): %p", mRoot); 502 } 503 mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand"); 504 if (mRootExpand) { 505 //ALOGE("Found root.expand(): %p", mRootExpand); 506 } 507 mInit = (InvokeFunc_t) dlsym(mScriptSO, "init"); 508 if (mInit) { 509 //ALOGE("Found init(): %p", mInit); 510 } 511 mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor"); 512 if (mFreeChildren) { 513 //ALOGE("Found .rs.dtor(): %p", mFreeChildren); 514 } 515 516 const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info"); 517 if (rsInfo) { 518 //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo); 519 } 520 521 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 522 goto error; 523 } 524 if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) { 525 ALOGE("Invalid export var count!: %s", line); 526 goto error; 527 } 528 529 mExportedVariableCount = varCount; 530 //ALOGE("varCount: %zu", varCount); 531 if (varCount > 0) { 532 // Start by creating/zeroing this member, since we don't want to 533 // accidentally clean up invalid pointers later (if we error out). 534 mFieldIsObject = new bool[varCount]; 535 if (mFieldIsObject == nullptr) { 536 goto error; 537 } 538 memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject)); 539 mFieldAddress = new void*[varCount]; 540 if (mFieldAddress == nullptr) { 541 goto error; 542 } 543 for (size_t i = 0; i < varCount; ++i) { 544 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 545 goto error; 546 } 547 char *c = strrchr(line, '\n'); 548 if (c) { 549 *c = '\0'; 550 } 551 mFieldAddress[i] = dlsym(mScriptSO, line); 552 if (mFieldAddress[i] == nullptr) { 553 ALOGE("Failed to find variable address for %s: %s", 554 line, dlerror()); 555 // Not a critical error if we don't find a global variable. 556 } 557 else { 558 //ALOGE("Found variable %s at %p", line, 559 //mFieldAddress[i]); 560 } 561 } 562 } 563 564 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 565 goto error; 566 } 567 if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) { 568 ALOGE("Invalid export func count!: %s", line); 569 goto error; 570 } 571 572 mExportedFunctionCount = funcCount; 573 //ALOGE("funcCount: %zu", funcCount); 574 575 if (funcCount > 0) { 576 mInvokeFunctions = new InvokeFunc_t[funcCount]; 577 if (mInvokeFunctions == nullptr) { 578 goto error; 579 } 580 for (size_t i = 0; i < funcCount; ++i) { 581 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 582 goto error; 583 } 584 char *c = strrchr(line, '\n'); 585 if (c) { 586 *c = '\0'; 587 } 588 589 mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line); 590 if (mInvokeFunctions[i] == nullptr) { 591 ALOGE("Failed to get function address for %s(): %s", 592 line, dlerror()); 593 goto error; 594 } 595 else { 596 //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]); 597 } 598 } 599 } 600 601 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 602 goto error; 603 } 604 if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) { 605 ALOGE("Invalid export forEach count!: %s", line); 606 goto error; 607 } 608 609 if (forEachCount > 0) { 610 611 mForEachSignatures = new uint32_t[forEachCount]; 612 if (mForEachSignatures == nullptr) { 613 goto error; 614 } 615 mForEachFunctions = new ForEachFunc_t[forEachCount]; 616 if (mForEachFunctions == nullptr) { 617 goto error; 618 } 619 for (size_t i = 0; i < forEachCount; ++i) { 620 unsigned int tmpSig = 0; 621 char tmpName[MAXLINE]; 622 623 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 624 goto error; 625 } 626 if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s", 627 &tmpSig, tmpName) != 2) { 628 ALOGE("Invalid export forEach!: %s", line); 629 goto error; 630 } 631 632 // Lookup the expanded ForEach kernel. 633 strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName)); 634 mForEachSignatures[i] = tmpSig; 635 mForEachFunctions[i] = 636 (ForEachFunc_t) dlsym(mScriptSO, tmpName); 637 if (i != 0 && mForEachFunctions[i] == nullptr) { 638 // Ignore missing root.expand functions. 639 // root() is always specified at location 0. 640 ALOGE("Failed to find forEach function address for %s: %s", 641 tmpName, dlerror()); 642 goto error; 643 } 644 else { 645 //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]); 646 } 647 } 648 } 649 650 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 651 goto error; 652 } 653 if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) { 654 ALOGE("Invalid object slot count!: %s", line); 655 goto error; 656 } 657 658 if (objectSlotCount > 0) { 659 rsAssert(varCount > 0); 660 for (size_t i = 0; i < objectSlotCount; ++i) { 661 uint32_t varNum = 0; 662 if (strgets(line, MAXLINE, &rsInfo) == nullptr) { 663 goto error; 664 } 665 if (sscanf(line, "%u", &varNum) != 1) { 666 ALOGE("Invalid object slot!: %s", line); 667 goto error; 668 } 669 670 if (varNum < varCount) { 671 mFieldIsObject[varNum] = true; 672 } 673 } 674 } 675 676 if (varCount > 0) { 677 mBoundAllocs = new Allocation *[varCount]; 678 memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs)); 679 } 680 681 if (mScriptSO == (void*)1) { 682 //rsdLookupRuntimeStub(script, "acos"); 683 } 684 685 return true; 686 687error: 688 delete[] mInvokeFunctions; 689 delete[] mForEachFunctions; 690 delete[] mFieldAddress; 691 delete[] mFieldIsObject; 692 delete[] mForEachSignatures; 693 delete[] mBoundAllocs; 694 695 return false; 696} 697 698bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir, 699 uint8_t const *bitcode, size_t bitcodeSize, 700 uint32_t flags, char const *bccPluginName) { 701 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 702 //ALOGE("rsdScriptInit %p %p", rsc, script); 703 704 mCtx->lockMutex(); 705#ifndef RS_COMPATIBILITY_LIB 706 bool useRSDebugContext = false; 707 708 mCompilerDriver = nullptr; 709 710 mCompilerDriver = new bcc::RSCompilerDriver(); 711 if (mCompilerDriver == nullptr) { 712 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 713 mCtx->unlockMutex(); 714 return false; 715 } 716 717 // Run any compiler setup functions we have been provided with. 718 RSSetupCompilerCallback setupCompilerCallback = 719 mCtx->getSetupCompilerCallback(); 720 if (setupCompilerCallback != nullptr) { 721 setupCompilerCallback(mCompilerDriver); 722 } 723 724 bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize); 725 if (!bitcodeMetadata.extract()) { 726 ALOGE("Could not extract metadata from bitcode"); 727 mCtx->unlockMutex(); 728 return false; 729 } 730 731 const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize); 732 733 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 734 mCompilerDriver->setDebugContext(true); 735 useRSDebugContext = true; 736 } 737 738 std::string bcFileName(cacheDir); 739 bcFileName.append("/"); 740 bcFileName.append(resName); 741 bcFileName.append(".bc"); 742 743 std::vector<const char*> compileArguments; 744 setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib, 745 useRSDebugContext, bccPluginName); 746 // The last argument of compileArguments ia a nullptr, so remove 1 from the size. 747 std::string compileCommandLine = 748 bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data()); 749 750 if (!is_force_recompile()) { 751 mScriptSO = loadSharedLibrary(cacheDir, resName); 752 } 753 754 // If we can't, it's either not there or out of date. We compile the bit code and try loading 755 // again. 756 if (mScriptSO == nullptr) { 757 if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize, 758 compileArguments.data(), compileCommandLine)) 759 { 760 ALOGE("bcc: FAILS to compile '%s'", resName); 761 mCtx->unlockMutex(); 762 return false; 763 } 764 765 if (!createSharedLib(cacheDir, resName)) { 766 ALOGE("Linker: Failed to link object file '%s'", resName); 767 mCtx->unlockMutex(); 768 return false; 769 } 770 771 mScriptSO = loadSharedLibrary(cacheDir, resName); 772 if (mScriptSO == nullptr) { 773 ALOGE("Unable to load '%s'", resName); 774 mCtx->unlockMutex(); 775 return false; 776 } 777 } 778 779 // Read RS symbol information from the .so. 780 if ( !mScriptSO) { 781 goto error; 782 } 783 784 if ( !storeRSInfoFromSO()) { 785 goto error; 786 } 787#else // RS_COMPATIBILITY_LIB is defined 788 789 mScriptSO = loadSharedLibrary(cacheDir, resName); 790 791 if (!mScriptSO) { 792 goto error; 793 } 794 795 if (!storeRSInfoFromSO()) { 796 goto error; 797 } 798#endif 799 mCtx->unlockMutex(); 800 return true; 801 802error: 803 804 mCtx->unlockMutex(); 805 if (mScriptSO) { 806 dlclose(mScriptSO); 807 } 808 return false; 809} 810 811#ifndef RS_COMPATIBILITY_LIB 812 813const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode, 814 size_t bitcodeSize) { 815 const char* defaultLib = SYSLIBPATH"/libclcore.bc"; 816 817 // If we're debugging, use the debug library. 818 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 819 return SYSLIBPATH"/libclcore_debug.bc"; 820 } 821 822 // If a callback has been registered to specify a library, use that. 823 RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback(); 824 if (selectRTCallback != nullptr) { 825 return selectRTCallback((const char*)bitcode, bitcodeSize); 826 } 827 828 // Check for a platform specific library 829#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 830 enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision(); 831 if (prec == bcinfo::RS_FP_Relaxed) { 832 // NEON-capable ARMv7a devices can use an accelerated math library 833 // for all reduced precision scripts. 834 // ARMv8 does not use NEON, as ASIMD can be used with all precision 835 // levels. 836 return SYSLIBPATH"/libclcore_neon.bc"; 837 } else { 838 return defaultLib; 839 } 840#elif defined(__i386__) || defined(__x86_64__) 841 // x86 devices will use an optimized library. 842 return SYSLIBPATH"/libclcore_x86.bc"; 843#else 844 return defaultLib; 845#endif 846} 847 848#endif 849 850void RsdCpuScriptImpl::populateScript(Script *script) { 851 // Copy info over to runtime 852 script->mHal.info.exportedFunctionCount = mExportedFunctionCount; 853 script->mHal.info.exportedVariableCount = mExportedVariableCount; 854 script->mHal.info.exportedPragmaCount = 0; 855 script->mHal.info.exportedPragmaKeyList = 0; 856 script->mHal.info.exportedPragmaValueList = 0; 857 858 // Bug, need to stash in metadata 859 if (mRootExpand) { 860 script->mHal.info.root = mRootExpand; 861 } else { 862 script->mHal.info.root = mRoot; 863 } 864} 865 866 867typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 868 869void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, 870 uint32_t inLen, 871 Allocation * aout, 872 const void * usr, uint32_t usrLen, 873 const RsScriptCall *sc, 874 MTLaunchStruct *mtls) { 875 876 memset(mtls, 0, sizeof(MTLaunchStruct)); 877 878 for (int index = inLen; --index >= 0;) { 879 const Allocation* ain = ains[index]; 880 881 // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface 882 if (ain != nullptr && 883 (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) { 884 885 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 886 "rsForEach called with null in allocations"); 887 return; 888 } 889 } 890 891 if (aout && 892 (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) { 893 894 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 895 "rsForEach called with null out allocations"); 896 return; 897 } 898 899 if (inLen > 0) { 900 const Allocation *ain0 = ains[0]; 901 const Type *inType = ain0->getType(); 902 903 mtls->fep.dim.x = inType->getDimX(); 904 mtls->fep.dim.y = inType->getDimY(); 905 mtls->fep.dim.z = inType->getDimZ(); 906 907 for (int Index = inLen; --Index >= 1;) { 908 if (!ain0->hasSameDims(ains[Index])) { 909 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 910 "Failed to launch kernel; dimensions of input and output allocations do not match."); 911 912 return; 913 } 914 } 915 916 } else if (aout != nullptr) { 917 const Type *outType = aout->getType(); 918 919 mtls->fep.dim.x = outType->getDimX(); 920 mtls->fep.dim.y = outType->getDimY(); 921 mtls->fep.dim.z = outType->getDimZ(); 922 923 } else { 924 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 925 "rsForEach called with null allocations"); 926 return; 927 } 928 929 if (inLen > 0 && aout != nullptr) { 930 if (!ains[0]->hasSameDims(aout)) { 931 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 932 "Failed to launch kernel; dimensions of input and output allocations do not match."); 933 934 return; 935 } 936 } 937 938 if (!sc || (sc->xEnd == 0)) { 939 mtls->xEnd = mtls->fep.dim.x; 940 } else { 941 rsAssert(sc->xStart < mtls->fep.dim.x); 942 rsAssert(sc->xEnd <= mtls->fep.dim.x); 943 rsAssert(sc->xStart < sc->xEnd); 944 mtls->xStart = rsMin(mtls->fep.dim.x, sc->xStart); 945 mtls->xEnd = rsMin(mtls->fep.dim.x, sc->xEnd); 946 if (mtls->xStart >= mtls->xEnd) return; 947 } 948 949 if (!sc || (sc->yEnd == 0)) { 950 mtls->yEnd = mtls->fep.dim.y; 951 } else { 952 rsAssert(sc->yStart < mtls->fep.dim.y); 953 rsAssert(sc->yEnd <= mtls->fep.dim.y); 954 rsAssert(sc->yStart < sc->yEnd); 955 mtls->yStart = rsMin(mtls->fep.dim.y, sc->yStart); 956 mtls->yEnd = rsMin(mtls->fep.dim.y, sc->yEnd); 957 if (mtls->yStart >= mtls->yEnd) return; 958 } 959 960 if (!sc || (sc->zEnd == 0)) { 961 mtls->zEnd = mtls->fep.dim.z; 962 } else { 963 rsAssert(sc->zStart < mtls->fep.dim.z); 964 rsAssert(sc->zEnd <= mtls->fep.dim.z); 965 rsAssert(sc->zStart < sc->zEnd); 966 mtls->zStart = rsMin(mtls->fep.dim.z, sc->zStart); 967 mtls->zEnd = rsMin(mtls->fep.dim.z, sc->zEnd); 968 if (mtls->zStart >= mtls->zEnd) return; 969 } 970 971 mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); 972 mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); 973 mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); 974 mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); 975 976 rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0)); 977 978 mtls->rsc = mCtx; 979 if (ains) { 980 memcpy(mtls->ains, ains, inLen * sizeof(ains[0])); 981 } 982 mtls->aout[0] = aout; 983 mtls->fep.usr = usr; 984 mtls->fep.usrLen = usrLen; 985 mtls->mSliceSize = 1; 986 mtls->mSliceNum = 0; 987 988 mtls->isThreadable = mIsThreadable; 989 990 if (inLen > 0) { 991 mtls->fep.inLen = inLen; 992 for (int index = inLen; --index >= 0;) { 993 mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr; 994 mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes(); 995 } 996 } 997 998 if (aout != nullptr) { 999 mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; 1000 mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes(); 1001 } 1002} 1003 1004 1005void RsdCpuScriptImpl::invokeForEach(uint32_t slot, 1006 const Allocation ** ains, 1007 uint32_t inLen, 1008 Allocation * aout, 1009 const void * usr, 1010 uint32_t usrLen, 1011 const RsScriptCall *sc) { 1012 1013 MTLaunchStruct mtls; 1014 1015 forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls); 1016 forEachKernelSetup(slot, &mtls); 1017 1018 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 1019 mCtx->launchThreads(ains, inLen, aout, sc, &mtls); 1020 mCtx->setTLS(oldTLS); 1021} 1022 1023void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { 1024 mtls->script = this; 1025 mtls->fep.slot = slot; 1026 mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]); 1027 rsAssert(mtls->kernel != nullptr); 1028 mtls->sig = mForEachSignatures[slot]; 1029} 1030 1031int RsdCpuScriptImpl::invokeRoot() { 1032 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 1033 int ret = mRoot(); 1034 mCtx->setTLS(oldTLS); 1035 return ret; 1036} 1037 1038void RsdCpuScriptImpl::invokeInit() { 1039 if (mInit) { 1040 mInit(); 1041 } 1042} 1043 1044void RsdCpuScriptImpl::invokeFreeChildren() { 1045 if (mFreeChildren) { 1046 mFreeChildren(); 1047 } 1048} 1049 1050void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, 1051 size_t paramLength) { 1052 //ALOGE("invoke %i %p %zu", slot, params, paramLength); 1053 void * ap = nullptr; 1054 1055#if defined(__x86_64__) 1056 // The invoked function could have input parameter of vector type for example float4 which 1057 // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform. 1058 // So try to align void* params before passing them into RS exported function. 1059 1060 if ((uint8_t)(uint64_t)params & 0x0F) { 1061 if ((ap = (void*)memalign(16, paramLength)) != nullptr) { 1062 memcpy(ap, params, paramLength); 1063 } else { 1064 ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned."); 1065 } 1066 } 1067#endif 1068 1069 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 1070 reinterpret_cast<void (*)(const void *, uint32_t)>( 1071 mInvokeFunctions[slot])(ap? (const void *) ap: params, paramLength); 1072 1073 mCtx->setTLS(oldTLS); 1074} 1075 1076void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 1077 //rsAssert(!script->mFieldIsObject[slot]); 1078 //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength); 1079 1080 //if (mIntrinsicID) { 1081 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); 1082 //return; 1083 //} 1084 1085 int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1086 if (!destPtr) { 1087 //ALOGV("Calling setVar on slot = %i which is null", slot); 1088 return; 1089 } 1090 1091 memcpy(destPtr, data, dataLength); 1092} 1093 1094void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) { 1095 //rsAssert(!script->mFieldIsObject[slot]); 1096 //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength); 1097 1098 int32_t *srcPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1099 if (!srcPtr) { 1100 //ALOGV("Calling setVar on slot = %i which is null", slot); 1101 return; 1102 } 1103 memcpy(data, srcPtr, dataLength); 1104} 1105 1106 1107void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, 1108 const Element *elem, 1109 const uint32_t *dims, size_t dimLength) { 1110 int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1111 if (!destPtr) { 1112 //ALOGV("Calling setVar on slot = %i which is null", slot); 1113 return; 1114 } 1115 1116 // We want to look at dimension in terms of integer components, 1117 // but dimLength is given in terms of bytes. 1118 dimLength /= sizeof(int); 1119 1120 // Only a single dimension is currently supported. 1121 rsAssert(dimLength == 1); 1122 if (dimLength == 1) { 1123 // First do the increment loop. 1124 size_t stride = elem->getSizeBytes(); 1125 const char *cVal = reinterpret_cast<const char *>(data); 1126 for (uint32_t i = 0; i < dims[0]; i++) { 1127 elem->incRefs(cVal); 1128 cVal += stride; 1129 } 1130 1131 // Decrement loop comes after (to prevent race conditions). 1132 char *oldVal = reinterpret_cast<char *>(destPtr); 1133 for (uint32_t i = 0; i < dims[0]; i++) { 1134 elem->decRefs(oldVal); 1135 oldVal += stride; 1136 } 1137 } 1138 1139 memcpy(destPtr, data, dataLength); 1140} 1141 1142void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) { 1143 1144 //rsAssert(!script->mFieldIsObject[slot]); 1145 //ALOGE("setGlobalBind %i %p", slot, data); 1146 1147 int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1148 if (!destPtr) { 1149 //ALOGV("Calling setVar on slot = %i which is null", slot); 1150 return; 1151 } 1152 1153 void *ptr = nullptr; 1154 mBoundAllocs[slot] = data; 1155 if (data) { 1156 ptr = data->mHal.drvState.lod[0].mallocPtr; 1157 } 1158 memcpy(destPtr, &ptr, sizeof(void *)); 1159} 1160 1161void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { 1162 1163 //rsAssert(script->mFieldIsObject[slot]); 1164 //ALOGE("setGlobalObj %i %p", slot, data); 1165 1166 int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]); 1167 if (!destPtr) { 1168 //ALOGV("Calling setVar on slot = %i which is null", slot); 1169 return; 1170 } 1171 1172 rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data); 1173} 1174 1175RsdCpuScriptImpl::~RsdCpuScriptImpl() { 1176#ifndef RS_COMPATIBILITY_LIB 1177 1178 if (mCompilerDriver) { 1179 delete mCompilerDriver; 1180 } 1181 1182#endif 1183 1184 if (mFieldIsObject) { 1185 for (size_t i = 0; i < mExportedVariableCount; ++i) { 1186 if (mFieldIsObject[i]) { 1187 if (mFieldAddress[i] != nullptr) { 1188 rs_object_base *obj_addr = 1189 reinterpret_cast<rs_object_base *>(mFieldAddress[i]); 1190 rsrClearObject(mCtx->getContext(), obj_addr); 1191 } 1192 } 1193 } 1194 } 1195 1196 if (mInvokeFunctions) delete[] mInvokeFunctions; 1197 if (mForEachFunctions) delete[] mForEachFunctions; 1198 if (mFieldAddress) delete[] mFieldAddress; 1199 if (mFieldIsObject) delete[] mFieldIsObject; 1200 if (mForEachSignatures) delete[] mForEachSignatures; 1201 if (mBoundAllocs) delete[] mBoundAllocs; 1202 if (mScriptSO) { 1203 dlclose(mScriptSO); 1204 } 1205} 1206 1207Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { 1208 if (!ptr) { 1209 return nullptr; 1210 } 1211 1212 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) { 1213 Allocation *a = mBoundAllocs[ct]; 1214 if (!a) continue; 1215 if (a->mHal.drvState.lod[0].mallocPtr == ptr) { 1216 return a; 1217 } 1218 } 1219 ALOGE("rsGetAllocation, failed to find %p", ptr); 1220 return nullptr; 1221} 1222 1223void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains, 1224 uint32_t inLen, Allocation * aout, 1225 const void * usr, uint32_t usrLen, 1226 const RsScriptCall *sc) {} 1227 1228void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains, 1229 uint32_t inLen, Allocation * aout, 1230 const void * usr, uint32_t usrLen, 1231 const RsScriptCall *sc) {} 1232 1233 1234} 1235} 1236