rsCpuScript.cpp revision f3213d7fd648da98bb3b03204eaf90f03c31926b
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19
20#ifdef RS_COMPATIBILITY_LIB
21    #include <stdio.h>
22    #include <sys/stat.h>
23    #include <unistd.h>
24#else
25    #include <bcc/BCCContext.h>
26    #include <bcc/Config/Config.h>
27    #include <bcc/Renderscript/RSCompilerDriver.h>
28    #include <bcc/Renderscript/RSInfo.h>
29    #include <bcinfo/MetadataExtractor.h>
30    #include <cutils/properties.h>
31
32    #include <sys/types.h>
33    #include <sys/wait.h>
34    #include <unistd.h>
35
36    #include <string>
37    #include <vector>
38#endif
39
40#include <set>
41#include <string>
42#include <dlfcn.h>
43#include <stdlib.h>
44#include <string.h>
45#include <fstream>
46#include <iostream>
47
48#ifdef __LP64__
49#define SYSLIBPATH "/system/lib64"
50#else
51#define SYSLIBPATH "/system/lib"
52#endif
53
54namespace {
55
56// Create a len length string containing random characters from [A-Za-z0-9].
57static std::string getRandomString(size_t len) {
58    char buf[len + 1];
59    for (size_t i = 0; i < len; i++) {
60        uint32_t r = arc4random() & 0xffff;
61        r %= 62;
62        if (r < 26) {
63            // lowercase
64            buf[i] = 'a' + r;
65        } else if (r < 52) {
66            // uppercase
67            buf[i] = 'A' + (r - 26);
68        } else {
69            // Use a number
70            buf[i] = '0' + (r - 52);
71        }
72    }
73    buf[len] = '\0';
74    return std::string(buf);
75}
76
77// Check if a path exists and attempt to create it if it doesn't.
78static bool ensureCacheDirExists(const char *path) {
79    if (access(path, R_OK | W_OK | X_OK) == 0) {
80        // Done if we can rwx the directory
81        return true;
82    }
83    if (mkdir(path, 0700) == 0) {
84        return true;
85    }
86    return false;
87}
88
89// Copy the file named \p srcFile to \p dstFile.
90// Return 0 on success and -1 if anything wasn't copied.
91static int copyFile(const char *dstFile, const char *srcFile) {
92    std::ifstream srcStream(srcFile);
93    if (!srcStream) {
94        ALOGE("Could not verify or read source file: %s", srcFile);
95        return -1;
96    }
97    std::ofstream dstStream(dstFile);
98    if (!dstStream) {
99        ALOGE("Could not verify or write destination file: %s", dstFile);
100        return -1;
101    }
102    dstStream << srcStream.rdbuf();
103    if (!dstStream) {
104        ALOGE("Could not write destination file: %s", dstFile);
105        return -1;
106    }
107
108    srcStream.close();
109    dstStream.close();
110
111    return 0;
112}
113
114static std::string findSharedObjectName(const char *cacheDir,
115                                        const char *resName) {
116#ifndef RS_SERVER
117    std::string scriptSOName(cacheDir);
118#if defined(RS_COMPATIBILITY_LIB) && !defined(__LP64__)
119    size_t cutPos = scriptSOName.rfind("cache");
120    if (cutPos != std::string::npos) {
121        scriptSOName.erase(cutPos);
122    } else {
123        ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
124    }
125    scriptSOName.append("/lib/librs.");
126#else
127    scriptSOName.append("/librs.");
128#endif // RS_COMPATIBILITY_LIB
129
130#else
131    std::string scriptSOName("lib");
132#endif // RS_SERVER
133    scriptSOName.append(resName);
134    scriptSOName.append(".so");
135
136    return scriptSOName;
137}
138
139#ifndef RS_COMPATIBILITY_LIB
140
141static bool is_force_recompile() {
142#ifdef RS_SERVER
143  return false;
144#else
145  char buf[PROPERTY_VALUE_MAX];
146
147  // Re-compile if floating point precision has been overridden.
148  property_get("debug.rs.precision", buf, "");
149  if (buf[0] != '\0') {
150    return true;
151  }
152
153  // Re-compile if debug.rs.forcerecompile is set.
154  property_get("debug.rs.forcerecompile", buf, "0");
155  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
156    return true;
157  } else {
158    return false;
159  }
160#endif  // RS_SERVER
161}
162
163const static char *BCC_EXE_PATH = "/system/bin/bcc";
164
165static void setCompileArguments(std::vector<const char*>* args,
166                                const std::string& bcFileName,
167                                const char* cacheDir, const char* resName,
168                                const char* core_lib, bool useRSDebugContext,
169                                const char* bccPluginName) {
170    rsAssert(cacheDir && resName && core_lib);
171    args->push_back(BCC_EXE_PATH);
172    args->push_back("-unroll-runtime");
173    args->push_back("-scalarize-load-store");
174    args->push_back("-o");
175    args->push_back(resName);
176    args->push_back("-output_path");
177    args->push_back(cacheDir);
178    args->push_back("-bclib");
179    args->push_back(core_lib);
180    args->push_back("-mtriple");
181    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
182
183    // Enable workaround for A53 codegen by default.
184#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
185    args->push_back("-aarch64-fix-cortex-a53-835769");
186#endif
187
188    // Execute the bcc compiler.
189    if (useRSDebugContext) {
190        args->push_back("-rs-debug-ctx");
191    } else {
192        // Only load additional libraries for compiles that don't use
193        // the debug context.
194        if (bccPluginName && strlen(bccPluginName) > 0) {
195            args->push_back("-load");
196            args->push_back(bccPluginName);
197        }
198    }
199
200    args->push_back("-fPIC");
201    args->push_back("-embedRSInfo");
202
203    args->push_back(bcFileName.c_str());
204    args->push_back(nullptr);
205}
206
207static bool compileBitcode(const std::string &bcFileName,
208                           const char *bitcode,
209                           size_t bitcodeSize,
210                           const char **compileArguments,
211                           const std::string &compileCommandLine) {
212    rsAssert(bitcode && bitcodeSize);
213
214    FILE *bcfile = fopen(bcFileName.c_str(), "w");
215    if (!bcfile) {
216        ALOGE("Could not write to %s", bcFileName.c_str());
217        return false;
218    }
219    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
220    fclose(bcfile);
221    if (nwritten != bitcodeSize) {
222        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
223              bcFileName.c_str());
224        return false;
225    }
226
227    pid_t pid = fork();
228
229    switch (pid) {
230    case -1: {  // Error occurred (we attempt no recovery)
231        ALOGE("Couldn't fork for bcc compiler execution");
232        return false;
233    }
234    case 0: {  // Child process
235        ALOGV("Invoking BCC with: %s", compileCommandLine.c_str());
236        execv(BCC_EXE_PATH, (char* const*)compileArguments);
237
238        ALOGE("execv() failed: %s", strerror(errno));
239        abort();
240        return false;
241    }
242    default: {  // Parent process (actual driver)
243        // Wait on child process to finish compiling the source.
244        int status = 0;
245        pid_t w = waitpid(pid, &status, 0);
246        if (w == -1) {
247            ALOGE("Could not wait for bcc compiler");
248            return false;
249        }
250
251        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
252            return true;
253        }
254
255        ALOGE("bcc compiler terminated unexpectedly");
256        return false;
257    }
258    }
259}
260
261#endif  // !defined(RS_COMPATIBILITY_LIB)
262}  // namespace
263
264namespace android {
265namespace renderscript {
266
267const char* SharedLibraryUtils::LD_EXE_PATH = "/system/bin/ld.mc";
268const char* SharedLibraryUtils::RS_CACHE_DIR = "com.android.renderscript.cache";
269
270#ifndef RS_COMPATIBILITY_LIB
271
272bool SharedLibraryUtils::createSharedLibrary(const char *cacheDir, const char *resName) {
273    std::string sharedLibName = findSharedObjectName(cacheDir, resName);
274    std::string objFileName = cacheDir;
275    objFileName.append("/");
276    objFileName.append(resName);
277    objFileName.append(".o");
278
279    const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so";
280    std::vector<const char *> args = {
281        LD_EXE_PATH,
282        "-shared",
283        "-nostdlib",
284        compiler_rt,
285        "-mtriple", DEFAULT_TARGET_TRIPLE_STRING,
286        "-L", SYSLIBPATH,
287        "-lRSDriver", "-lm", "-lc",
288        objFileName.c_str(),
289        "-o", sharedLibName.c_str(),
290        nullptr
291    };
292
293    std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data());
294
295    pid_t pid = fork();
296
297    switch (pid) {
298    case -1: {  // Error occurred (we attempt no recovery)
299        ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH);
300        return false;
301    }
302    case 0: {  // Child process
303        ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str());
304        execv(LD_EXE_PATH, (char* const*) args.data());
305
306        ALOGE("execv() failed: %s", strerror(errno));
307        abort();
308        return false;
309    }
310    default: {  // Parent process (actual driver)
311        // Wait on child process to finish compiling the source.
312        int status = 0;
313        pid_t w = waitpid(pid, &status, 0);
314        if (w == -1) {
315            ALOGE("Could not wait for linker (%s)", LD_EXE_PATH);
316            return false;
317        }
318
319        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
320            return true;
321        }
322
323        ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH);
324        return false;
325    }
326    }
327}
328
329#endif  // RS_COMPATIBILITY_LIB
330
331
332void* SharedLibraryUtils::loadSharedLibrary(const char *cacheDir, const char *resName, const char *nativeLibDir) {
333    void *loaded = nullptr;
334
335#if defined(RS_COMPATIBILITY_LIB) && defined(__LP64__)
336    std::string scriptSOName = findSharedObjectName(nativeLibDir, resName);
337#else
338    std::string scriptSOName = findSharedObjectName(cacheDir, resName);
339#endif
340
341    // We should check if we can load the library from the standard app
342    // location for shared libraries first.
343    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
344
345    if (loaded == nullptr) {
346        ALOGE("Unable to open shared library (%s): %s",
347              scriptSOName.c_str(), dlerror());
348
349#ifdef RS_COMPATIBILITY_LIB
350        // One final attempt to find the library in "/system/lib".
351        // We do this to allow bundled applications to use the compatibility
352        // library fallback path. Those applications don't have a private
353        // library path, so they need to install to the system directly.
354        // Note that this is really just a testing path.
355        std::string scriptSONameSystem("/system/lib/librs.");
356        scriptSONameSystem.append(resName);
357        scriptSONameSystem.append(".so");
358        loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
359                              resName);
360        if (loaded == nullptr) {
361            ALOGE("Unable to open system shared library (%s): %s",
362                  scriptSONameSystem.c_str(), dlerror());
363        }
364#endif
365    }
366
367    return loaded;
368}
369
370void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDir,
371                                       const char *resName) {
372    // Keep track of which .so libraries have been loaded. Once a library is
373    // in the set (per-process granularity), we must instead make a copy of
374    // the original shared object (randomly named .so file) and load that one
375    // instead. If we don't do this, we end up aliasing global data between
376    // the various Script instances (which are supposed to be completely
377    // independent).
378    static std::set<std::string> LoadedLibraries;
379
380    void *loaded = nullptr;
381
382    // Skip everything if we don't even have the original library available.
383    if (access(origName, F_OK) != 0) {
384        return nullptr;
385    }
386
387    // Common path is that we have not loaded this Script/library before.
388    if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
389        loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
390        if (loaded) {
391            LoadedLibraries.insert(origName);
392        }
393        return loaded;
394    }
395
396    std::string newName(cacheDir);
397
398    // Append RS_CACHE_DIR only if it is not found in cacheDir
399    // In driver mode, RS_CACHE_DIR is already appended to cacheDir.
400    if (newName.find(RS_CACHE_DIR) == std::string::npos) {
401        newName.append("/");
402        newName.append(RS_CACHE_DIR);
403        newName.append("/");
404    }
405
406    if (!ensureCacheDirExists(newName.c_str())) {
407        ALOGE("Could not verify or create cache dir: %s", cacheDir);
408        return nullptr;
409    }
410
411    // Construct an appropriately randomized filename for the copy.
412    newName.append("librs.");
413    newName.append(resName);
414    newName.append("#");
415    newName.append(getRandomString(6));  // 62^6 potential filename variants.
416    newName.append(".so");
417
418    int r = copyFile(newName.c_str(), origName);
419    if (r != 0) {
420        ALOGE("Could not create copy %s -> %s", origName, newName.c_str());
421        return nullptr;
422    }
423    loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
424    r = unlink(newName.c_str());
425    if (r != 0) {
426        ALOGE("Could not unlink copy %s", newName.c_str());
427    }
428    if (loaded) {
429        LoadedLibraries.insert(newName.c_str());
430    }
431
432    return loaded;
433}
434
435#define MAXLINE 500
436#define MAKE_STR_HELPER(S) #S
437#define MAKE_STR(S) MAKE_STR_HELPER(S)
438#define EXPORT_VAR_STR "exportVarCount: "
439#define EXPORT_FUNC_STR "exportFuncCount: "
440#define EXPORT_FOREACH_STR "exportForEachCount: "
441#define OBJECT_SLOT_STR "objectSlotCount: "
442#define PRAGMA_STR "pragmaCount: "
443#define THREADABLE_STR "isThreadable: "
444
445// Copy up to a newline or size chars from str -> s, updating str
446// Returns s when successful and nullptr when '\0' is finally reached.
447static char* strgets(char *s, int size, const char **ppstr) {
448    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
449        return nullptr;
450    }
451
452    int i;
453    for (i = 0; i < (size - 1); i++) {
454        s[i] = **ppstr;
455        (*ppstr)++;
456        if (s[i] == '\0') {
457            return s;
458        } else if (s[i] == '\n') {
459            s[i+1] = '\0';
460            return s;
461        }
462    }
463
464    // size has been exceeded.
465    s[i] = '\0';
466
467    return s;
468}
469
470RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
471    mCtx = ctx;
472    mScript = s;
473
474    mScriptSO = nullptr;
475
476#ifndef RS_COMPATIBILITY_LIB
477    mCompilerDriver = nullptr;
478#endif
479
480
481    mRoot = nullptr;
482    mRootExpand = nullptr;
483    mInit = nullptr;
484    mFreeChildren = nullptr;
485    mScriptExec = nullptr;
486
487    mBoundAllocs = nullptr;
488    mIntrinsicData = nullptr;
489    mIsThreadable = true;
490}
491
492bool RsdCpuScriptImpl::storeRSInfoFromSO() {
493    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
494    if (mRoot) {
495        //ALOGE("Found root(): %p", mRoot);
496    }
497    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
498    if (mRootExpand) {
499        //ALOGE("Found root.expand(): %p", mRootExpand);
500    }
501    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
502    if (mInit) {
503        //ALOGE("Found init(): %p", mInit);
504    }
505    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
506    if (mFreeChildren) {
507        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
508    }
509
510    mScriptExec = ScriptExecutable::createFromSharedObject(
511            mCtx->getContext(), mScriptSO);
512
513    if (mScriptExec == nullptr) {
514        return false;
515    }
516
517    size_t varCount = mScriptExec->getExportedVariableCount();
518    if (varCount > 0) {
519        mBoundAllocs = new Allocation *[varCount];
520        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
521    }
522
523    mIsThreadable = mScriptExec->getThreadable();
524    //ALOGE("Script isThreadable? %d", mIsThreadable);
525
526    return true;
527}
528
529ScriptExecutable* ScriptExecutable::createFromSharedObject(
530    Context* RSContext, void* sharedObj) {
531    char line[MAXLINE];
532
533    size_t varCount = 0;
534    size_t funcCount = 0;
535    size_t forEachCount = 0;
536    size_t objectSlotCount = 0;
537    size_t pragmaCount = 0;
538    bool isThreadable = true;
539
540    const char *rsInfo = (const char *) dlsym(sharedObj, ".rs.info");
541
542    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
543        return nullptr;
544    }
545    if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
546        ALOGE("Invalid export var count!: %s", line);
547        return nullptr;
548    }
549
550    std::vector<void*> fieldAddress;
551
552    for (size_t i = 0; i < varCount; ++i) {
553        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
554            return nullptr;
555        }
556        char *c = strrchr(line, '\n');
557        if (c) {
558            *c = '\0';
559        }
560        void* addr = dlsym(sharedObj, line);
561        if (addr == nullptr) {
562            ALOGE("Failed to find variable address for %s: %s",
563                  line, dlerror());
564            // Not a critical error if we don't find a global variable.
565        }
566        fieldAddress.push_back(addr);
567    }
568
569    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
570        return nullptr;
571    }
572    if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
573        ALOGE("Invalid export func count!: %s", line);
574        return nullptr;
575    }
576
577    std::vector<InvokeFunc_t> invokeFunctions(funcCount);
578
579    for (size_t i = 0; i < funcCount; ++i) {
580        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
581            return nullptr ;
582        }
583        char *c = strrchr(line, '\n');
584        if (c) {
585            *c = '\0';
586        }
587
588        invokeFunctions[i] = (InvokeFunc_t) dlsym(sharedObj, line);
589        if (invokeFunctions[i] == nullptr) {
590            ALOGE("Failed to get function address for %s(): %s",
591                  line, dlerror());
592            return nullptr;
593        }
594    }
595
596    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
597        return nullptr;
598    }
599    if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
600        ALOGE("Invalid export forEach count!: %s", line);
601        return nullptr;
602    }
603
604    std::vector<ForEachFunc_t> forEachFunctions(forEachCount);
605    std::vector<uint32_t> forEachSignatures(forEachCount);
606
607    for (size_t i = 0; i < forEachCount; ++i) {
608        unsigned int tmpSig = 0;
609        char tmpName[MAXLINE];
610
611        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
612            return nullptr;
613        }
614        if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
615                   &tmpSig, tmpName) != 2) {
616          ALOGE("Invalid export forEach!: %s", line);
617          return nullptr;
618        }
619
620        // Lookup the expanded ForEach kernel.
621        strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
622        forEachSignatures[i] = tmpSig;
623        forEachFunctions[i] =
624            (ForEachFunc_t) dlsym(sharedObj, tmpName);
625        if (i != 0 && forEachFunctions[i] == nullptr) {
626            // Ignore missing root.expand functions.
627            // root() is always specified at location 0.
628            ALOGE("Failed to find forEach function address for %s: %s",
629                  tmpName, dlerror());
630            return nullptr;
631        }
632    }
633
634    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
635        return nullptr;
636    }
637    if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
638        ALOGE("Invalid object slot count!: %s", line);
639        return nullptr;
640    }
641
642    std::vector<bool> fieldIsObject(varCount, false);
643
644    rsAssert(varCount > 0);
645    for (size_t i = 0; i < objectSlotCount; ++i) {
646        uint32_t varNum = 0;
647        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
648            return nullptr;
649        }
650        if (sscanf(line, "%u", &varNum) != 1) {
651            ALOGE("Invalid object slot!: %s", line);
652            return nullptr;
653        }
654
655        if (varNum < varCount) {
656            fieldIsObject[varNum] = true;
657        }
658    }
659
660#ifdef RS_COMPATIBILITY_LIB
661    // Do not attempt to read pragmas or isThreadable flag in compat lib path.
662    // Neither is applicable for compat lib
663    std::vector<const char *> pragmaKeys(pragmaCount);
664    std::vector<const char *> pragmaValues(pragmaCount);
665
666    isThreadable = true;
667
668#else
669    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
670        return nullptr;
671    }
672
673    if (sscanf(line, PRAGMA_STR "%zu", &pragmaCount) != 1) {
674        ALOGE("Invalid pragma count!: %s", line);
675        return nullptr;
676    }
677
678    std::vector<const char *> pragmaKeys(pragmaCount);
679    std::vector<const char *> pragmaValues(pragmaCount);
680
681    for (size_t i = 0; i < pragmaCount; ++i) {
682        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
683            ALOGE("Unable to read pragma at index %zu!", i);
684            return nullptr;
685        }
686
687        char key[MAXLINE];
688        char value[MAXLINE] = ""; // initialize in case value is empty
689
690        // pragmas can just have a key and no value.  Only check to make sure
691        // that the key is not empty
692        if (sscanf(line, "%" MAKE_STR(MAXLINE) "s - %" MAKE_STR(MAXLINE) "s",
693                   key, value) == 0 ||
694            strlen(key) == 0)
695        {
696            ALOGE("Invalid pragma value!: %s", line);
697
698            // free previously allocated keys and values
699            for (size_t idx = 0; idx < i; ++idx) {
700                delete [] pragmaKeys[idx];
701                delete [] pragmaValues[idx];
702            }
703            return nullptr;
704        }
705
706        char *pKey = new char[strlen(key)+1];
707        strcpy(pKey, key);
708        pragmaKeys[i] = pKey;
709
710        char *pValue = new char[strlen(value)+1];
711        strcpy(pValue, value);
712        pragmaValues[i] = pValue;
713        //ALOGE("Pragma %zu: Key: '%s' Value: '%s'", i, pKey, pValue);
714    }
715
716    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
717        return nullptr;
718    }
719
720    char tmpFlag[4];
721    if (sscanf(line, THREADABLE_STR "%4s", tmpFlag) != 1) {
722        ALOGE("Invalid threadable flag!: %s", line);
723        return nullptr;
724    }
725    if (strcmp(tmpFlag, "yes") == 0)
726        isThreadable = true;
727    else if (strcmp(tmpFlag, "no") == 0)
728        isThreadable = false;
729    else {
730        ALOGE("Invalid threadable flag!: %s", tmpFlag);
731        return nullptr;
732    }
733
734#endif
735
736    return new ScriptExecutable(
737        RSContext, fieldAddress, fieldIsObject, invokeFunctions,
738        forEachFunctions, forEachSignatures, pragmaKeys, pragmaValues,
739        isThreadable);
740}
741
742bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
743                            uint8_t const *bitcode, size_t bitcodeSize,
744                            uint32_t flags, char const *bccPluginName) {
745    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
746    //ALOGE("rsdScriptInit %p %p", rsc, script);
747
748    mCtx->lockMutex();
749#ifndef RS_COMPATIBILITY_LIB
750    bool useRSDebugContext = false;
751
752    mCompilerDriver = nullptr;
753
754    mCompilerDriver = new bcc::RSCompilerDriver();
755    if (mCompilerDriver == nullptr) {
756        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
757        mCtx->unlockMutex();
758        return false;
759    }
760
761    // Run any compiler setup functions we have been provided with.
762    RSSetupCompilerCallback setupCompilerCallback =
763            mCtx->getSetupCompilerCallback();
764    if (setupCompilerCallback != nullptr) {
765        setupCompilerCallback(mCompilerDriver);
766    }
767
768    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
769    if (!bitcodeMetadata.extract()) {
770        ALOGE("Could not extract metadata from bitcode");
771        mCtx->unlockMutex();
772        return false;
773    }
774
775    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
776
777    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
778        mCompilerDriver->setDebugContext(true);
779        useRSDebugContext = true;
780    }
781
782    std::string bcFileName(cacheDir);
783    bcFileName.append("/");
784    bcFileName.append(resName);
785    bcFileName.append(".bc");
786
787    std::vector<const char*> compileArguments;
788    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
789                        useRSDebugContext, bccPluginName);
790    // The last argument of compileArguments ia a nullptr, so remove 1 from the size.
791    std::string compileCommandLine =
792                bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data());
793
794    if (!is_force_recompile() && !useRSDebugContext) {
795        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
796    }
797
798    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
799    // again.
800    if (mScriptSO == nullptr) {
801        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
802                            compileArguments.data(), compileCommandLine))
803        {
804            ALOGE("bcc: FAILS to compile '%s'", resName);
805            mCtx->unlockMutex();
806            return false;
807        }
808
809        if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
810            ALOGE("Linker: Failed to link object file '%s'", resName);
811            mCtx->unlockMutex();
812            return false;
813        }
814
815        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
816        if (mScriptSO == nullptr) {
817            ALOGE("Unable to load '%s'", resName);
818            mCtx->unlockMutex();
819            return false;
820        }
821    }
822
823    // Read RS symbol information from the .so.
824    if ( !mScriptSO) {
825        goto error;
826    }
827
828    if ( !storeRSInfoFromSO()) {
829      goto error;
830    }
831#else  // RS_COMPATIBILITY_LIB is defined
832    const char *nativeLibDir = mCtx->getContext()->getNativeLibDir();
833    mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir);
834
835    if (!mScriptSO) {
836        goto error;
837    }
838
839    if (!storeRSInfoFromSO()) {
840        goto error;
841    }
842#endif
843    mCtx->unlockMutex();
844    return true;
845
846error:
847
848    mCtx->unlockMutex();
849    if (mScriptSO) {
850        dlclose(mScriptSO);
851    }
852    return false;
853}
854
855#ifndef RS_COMPATIBILITY_LIB
856
857const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
858                                          size_t bitcodeSize) {
859    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
860
861    // If we're debugging, use the debug library.
862    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
863        return SYSLIBPATH"/libclcore_debug.bc";
864    }
865
866    // If a callback has been registered to specify a library, use that.
867    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
868    if (selectRTCallback != nullptr) {
869        return selectRTCallback((const char*)bitcode, bitcodeSize);
870    }
871
872    // Check for a platform specific library
873#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
874    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
875    if (prec == bcinfo::RS_FP_Relaxed) {
876        // NEON-capable ARMv7a devices can use an accelerated math library
877        // for all reduced precision scripts.
878        // ARMv8 does not use NEON, as ASIMD can be used with all precision
879        // levels.
880        return SYSLIBPATH"/libclcore_neon.bc";
881    } else {
882        return defaultLib;
883    }
884#elif defined(__i386__) || defined(__x86_64__)
885    // x86 devices will use an optimized library.
886    return SYSLIBPATH"/libclcore_x86.bc";
887#else
888    return defaultLib;
889#endif
890}
891
892#endif
893
894void RsdCpuScriptImpl::populateScript(Script *script) {
895    // Copy info over to runtime
896    script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
897    script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
898    script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
899    script->mHal.info.exportedPragmaKeyList =
900        const_cast<const char**>(&mScriptExec->getPragmaKeys().front());
901    script->mHal.info.exportedPragmaValueList =
902        const_cast<const char**>(&mScriptExec->getPragmaValues().front());
903
904    // Bug, need to stash in metadata
905    if (mRootExpand) {
906        script->mHal.info.root = mRootExpand;
907    } else {
908        script->mHal.info.root = mRoot;
909    }
910}
911
912
913typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
914
915bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
916                                        uint32_t inLen,
917                                        Allocation * aout,
918                                        const void * usr, uint32_t usrLen,
919                                        const RsScriptCall *sc,
920                                        MTLaunchStruct *mtls) {
921
922    memset(mtls, 0, sizeof(MTLaunchStruct));
923
924    for (int index = inLen; --index >= 0;) {
925        const Allocation* ain = ains[index];
926
927        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
928        if (ain != nullptr &&
929            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
930
931            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
932                                         "rsForEach called with null in allocations");
933            return false;
934        }
935    }
936
937    if (aout &&
938        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
939
940        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
941                                     "rsForEach called with null out allocations");
942        return false;
943    }
944
945    if (inLen > 0) {
946        const Allocation *ain0   = ains[0];
947        const Type       *inType = ain0->getType();
948
949        mtls->fep.dim.x = inType->getDimX();
950        mtls->fep.dim.y = inType->getDimY();
951        mtls->fep.dim.z = inType->getDimZ();
952
953        for (int Index = inLen; --Index >= 1;) {
954            if (!ain0->hasSameDims(ains[Index])) {
955                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
956                  "Failed to launch kernel; dimensions of input and output allocations do not match.");
957
958                return false;
959            }
960        }
961
962    } else if (aout != nullptr) {
963        const Type *outType = aout->getType();
964
965        mtls->fep.dim.x = outType->getDimX();
966        mtls->fep.dim.y = outType->getDimY();
967        mtls->fep.dim.z = outType->getDimZ();
968
969    } else {
970        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
971                                     "rsForEach called with null allocations");
972        return false;
973    }
974
975    if (inLen > 0 && aout != nullptr) {
976        if (!ains[0]->hasSameDims(aout)) {
977            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
978              "Failed to launch kernel; dimensions of input and output allocations do not match.");
979
980            return false;
981        }
982    }
983
984    if (!sc || (sc->xEnd == 0)) {
985        mtls->end.x = mtls->fep.dim.x;
986    } else {
987        mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
988        mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
989        if (mtls->start.x >= mtls->end.x) return false;
990    }
991
992    if (!sc || (sc->yEnd == 0)) {
993        mtls->end.y = mtls->fep.dim.y;
994    } else {
995        mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
996        mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
997        if (mtls->start.y >= mtls->end.y) return false;
998    }
999
1000    if (!sc || (sc->zEnd == 0)) {
1001        mtls->end.z = mtls->fep.dim.z;
1002    } else {
1003        mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
1004        mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
1005        if (mtls->start.z >= mtls->end.z) return false;
1006    }
1007
1008    if (!sc || (sc->arrayEnd == 0)) {
1009        mtls->end.array[0] = mtls->fep.dim.array[0];
1010    } else {
1011        mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
1012        mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
1013        if (mtls->start.array[0] >= mtls->end.array[0]) return false;
1014    }
1015
1016    if (!sc || (sc->array2End == 0)) {
1017        mtls->end.array[1] = mtls->fep.dim.array[1];
1018    } else {
1019        mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
1020        mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
1021        if (mtls->start.array[1] >= mtls->end.array[1]) return false;
1022    }
1023
1024    if (!sc || (sc->array3End == 0)) {
1025        mtls->end.array[2] = mtls->fep.dim.array[2];
1026    } else {
1027        mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
1028        mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
1029        if (mtls->start.array[2] >= mtls->end.array[2]) return false;
1030    }
1031
1032    if (!sc || (sc->array4End == 0)) {
1033        mtls->end.array[3] = mtls->fep.dim.array[3];
1034    } else {
1035        mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
1036        mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
1037        if (mtls->start.array[3] >= mtls->end.array[3]) return false;
1038    }
1039
1040
1041    // The X & Y walkers always want 0-1 min even if dim is not present
1042    mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
1043    mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
1044
1045    mtls->rsc        = mCtx;
1046    if (ains) {
1047        memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
1048    }
1049    mtls->aout[0]    = aout;
1050    mtls->fep.usr    = usr;
1051    mtls->fep.usrLen = usrLen;
1052    mtls->mSliceSize = 1;
1053    mtls->mSliceNum  = 0;
1054
1055    mtls->isThreadable  = mIsThreadable;
1056
1057    if (inLen > 0) {
1058        mtls->fep.inLen = inLen;
1059        for (int index = inLen; --index >= 0;) {
1060            mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
1061            mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
1062        }
1063    }
1064
1065    if (aout != nullptr) {
1066        mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
1067        mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
1068    }
1069
1070    // All validation passed, ok to launch threads
1071    return true;
1072}
1073
1074
1075void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
1076                                     const Allocation ** ains,
1077                                     uint32_t inLen,
1078                                     Allocation * aout,
1079                                     const void * usr,
1080                                     uint32_t usrLen,
1081                                     const RsScriptCall *sc) {
1082
1083    MTLaunchStruct mtls;
1084
1085    if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
1086        forEachKernelSetup(slot, &mtls);
1087
1088        RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1089        mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
1090        mCtx->setTLS(oldTLS);
1091    }
1092}
1093
1094void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
1095    mtls->script = this;
1096    mtls->fep.slot = slot;
1097    mtls->kernel = mScriptExec->getForEachFunction(slot);
1098    rsAssert(mtls->kernel != nullptr);
1099    mtls->sig = mScriptExec->getForEachSignature(slot);
1100}
1101
1102int RsdCpuScriptImpl::invokeRoot() {
1103    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1104    int ret = mRoot();
1105    mCtx->setTLS(oldTLS);
1106    return ret;
1107}
1108
1109void RsdCpuScriptImpl::invokeInit() {
1110    if (mInit) {
1111        mInit();
1112    }
1113}
1114
1115void RsdCpuScriptImpl::invokeFreeChildren() {
1116    if (mFreeChildren) {
1117        mFreeChildren();
1118    }
1119}
1120
1121void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
1122                                      size_t paramLength) {
1123    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
1124    void * ap = nullptr;
1125
1126#if defined(__x86_64__)
1127    // The invoked function could have input parameter of vector type for example float4 which
1128    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
1129    // So try to align void* params before passing them into RS exported function.
1130
1131    if ((uint8_t)(uint64_t)params & 0x0F) {
1132        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
1133            memcpy(ap, params, paramLength);
1134        } else {
1135            ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned.");
1136        }
1137    }
1138#endif
1139
1140    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1141    reinterpret_cast<void (*)(const void *, uint32_t)>(
1142        mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
1143
1144    mCtx->setTLS(oldTLS);
1145}
1146
1147void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
1148    //rsAssert(!script->mFieldIsObject[slot]);
1149    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
1150
1151    //if (mIntrinsicID) {
1152        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
1153        //return;
1154    //}
1155
1156    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1157    if (!destPtr) {
1158        //ALOGV("Calling setVar on slot = %i which is null", slot);
1159        return;
1160    }
1161
1162    memcpy(destPtr, data, dataLength);
1163}
1164
1165void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
1166    //rsAssert(!script->mFieldIsObject[slot]);
1167    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
1168
1169    int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1170    if (!srcPtr) {
1171        //ALOGV("Calling setVar on slot = %i which is null", slot);
1172        return;
1173    }
1174    memcpy(data, srcPtr, dataLength);
1175}
1176
1177
1178void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
1179                                                const Element *elem,
1180                                                const uint32_t *dims, size_t dimLength) {
1181    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1182    if (!destPtr) {
1183        //ALOGV("Calling setVar on slot = %i which is null", slot);
1184        return;
1185    }
1186
1187    // We want to look at dimension in terms of integer components,
1188    // but dimLength is given in terms of bytes.
1189    dimLength /= sizeof(int);
1190
1191    // Only a single dimension is currently supported.
1192    rsAssert(dimLength == 1);
1193    if (dimLength == 1) {
1194        // First do the increment loop.
1195        size_t stride = elem->getSizeBytes();
1196        const char *cVal = reinterpret_cast<const char *>(data);
1197        for (uint32_t i = 0; i < dims[0]; i++) {
1198            elem->incRefs(cVal);
1199            cVal += stride;
1200        }
1201
1202        // Decrement loop comes after (to prevent race conditions).
1203        char *oldVal = reinterpret_cast<char *>(destPtr);
1204        for (uint32_t i = 0; i < dims[0]; i++) {
1205            elem->decRefs(oldVal);
1206            oldVal += stride;
1207        }
1208    }
1209
1210    memcpy(destPtr, data, dataLength);
1211}
1212
1213void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
1214
1215    //rsAssert(!script->mFieldIsObject[slot]);
1216    //ALOGE("setGlobalBind %i %p", slot, data);
1217
1218    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1219    if (!destPtr) {
1220        //ALOGV("Calling setVar on slot = %i which is null", slot);
1221        return;
1222    }
1223
1224    void *ptr = nullptr;
1225    mBoundAllocs[slot] = data;
1226    if (data) {
1227        ptr = data->mHal.drvState.lod[0].mallocPtr;
1228    }
1229    memcpy(destPtr, &ptr, sizeof(void *));
1230}
1231
1232void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
1233
1234    //rsAssert(script->mFieldIsObject[slot]);
1235    //ALOGE("setGlobalObj %i %p", slot, data);
1236
1237    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1238    if (!destPtr) {
1239        //ALOGV("Calling setVar on slot = %i which is null", slot);
1240        return;
1241    }
1242
1243    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
1244}
1245
1246RsdCpuScriptImpl::~RsdCpuScriptImpl() {
1247#ifndef RS_COMPATIBILITY_LIB
1248    if (mCompilerDriver) {
1249        delete mCompilerDriver;
1250    }
1251#endif
1252
1253    if (mScriptExec != nullptr) {
1254        delete mScriptExec;
1255    }
1256    if (mBoundAllocs) delete[] mBoundAllocs;
1257    if (mScriptSO) {
1258        dlclose(mScriptSO);
1259    }
1260}
1261
1262Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
1263    if (!ptr) {
1264        return nullptr;
1265    }
1266
1267    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
1268        Allocation *a = mBoundAllocs[ct];
1269        if (!a) continue;
1270        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
1271            return a;
1272        }
1273    }
1274    ALOGE("rsGetAllocation, failed to find %p", ptr);
1275    return nullptr;
1276}
1277
1278void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
1279                                 uint32_t inLen, Allocation * aout,
1280                                 const void * usr, uint32_t usrLen,
1281                                 const RsScriptCall *sc) {}
1282
1283void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
1284                                  uint32_t inLen, Allocation * aout,
1285                                  const void * usr, uint32_t usrLen,
1286                                  const RsScriptCall *sc) {}
1287
1288
1289}
1290}
1291