rsCpuScript.cpp revision da0f069871343119251d6b0586be356dc2146a62
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19
20#ifdef RS_COMPATIBILITY_LIB
21    #include <stdio.h>
22    #include <sys/stat.h>
23    #include <unistd.h>
24#else
25    #include <bcc/BCCContext.h>
26    #include <bcc/Config/Config.h>
27    #include <bcc/Renderscript/RSCompilerDriver.h>
28    #include <bcc/Renderscript/RSInfo.h>
29    #include <bcinfo/MetadataExtractor.h>
30    #include <cutils/properties.h>
31
32    #include <sys/types.h>
33    #include <sys/wait.h>
34    #include <unistd.h>
35
36    #include <string>
37    #include <vector>
38#endif
39
40#include <set>
41#include <string>
42#include <dlfcn.h>
43#include <stdlib.h>
44#include <string.h>
45#include <fstream>
46#include <iostream>
47
48#ifdef __LP64__
49#define SYSLIBPATH "/system/lib64"
50#else
51#define SYSLIBPATH "/system/lib"
52#endif
53
54namespace {
55
56// Create a len length string containing random characters from [A-Za-z0-9].
57static std::string getRandomString(size_t len) {
58    char buf[len + 1];
59    for (size_t i = 0; i < len; i++) {
60        uint32_t r = arc4random() & 0xffff;
61        r %= 62;
62        if (r < 26) {
63            // lowercase
64            buf[i] = 'a' + r;
65        } else if (r < 52) {
66            // uppercase
67            buf[i] = 'A' + (r - 26);
68        } else {
69            // Use a number
70            buf[i] = '0' + (r - 52);
71        }
72    }
73    buf[len] = '\0';
74    return std::string(buf);
75}
76
77// Check if a path exists and attempt to create it if it doesn't.
78static bool ensureCacheDirExists(const char *path) {
79    if (access(path, R_OK | W_OK | X_OK) == 0) {
80        // Done if we can rwx the directory
81        return true;
82    }
83    if (mkdir(path, 0700) == 0) {
84        return true;
85    }
86    return false;
87}
88
89// Copy the file named \p srcFile to \p dstFile.
90// Return 0 on success and -1 if anything wasn't copied.
91static int copyFile(const char *dstFile, const char *srcFile) {
92    std::ifstream srcStream(srcFile);
93    if (!srcStream) {
94        ALOGE("Could not verify or read source file: %s", srcFile);
95        return -1;
96    }
97    std::ofstream dstStream(dstFile);
98    if (!dstStream) {
99        ALOGE("Could not verify or write destination file: %s", dstFile);
100        return -1;
101    }
102    dstStream << srcStream.rdbuf();
103    if (!dstStream) {
104        ALOGE("Could not write destination file: %s", dstFile);
105        return -1;
106    }
107
108    srcStream.close();
109    dstStream.close();
110
111    return 0;
112}
113
114static std::string findSharedObjectName(const char *cacheDir,
115                                        const char *resName) {
116
117#ifndef RS_SERVER
118    std::string scriptSOName(cacheDir);
119#ifdef RS_COMPATIBILITY_LIB
120    size_t cutPos = scriptSOName.rfind("cache");
121    if (cutPos != std::string::npos) {
122        scriptSOName.erase(cutPos);
123    } else {
124        ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
125    }
126    scriptSOName.append("/lib/librs.");
127#else
128    scriptSOName.append("/librs.");
129#endif
130
131#else
132    std::string scriptSOName("lib");
133#endif
134    scriptSOName.append(resName);
135    scriptSOName.append(".so");
136
137    return scriptSOName;
138}
139
140#ifndef RS_COMPATIBILITY_LIB
141
142static bool is_force_recompile() {
143#ifdef RS_SERVER
144  return false;
145#else
146  char buf[PROPERTY_VALUE_MAX];
147
148  // Re-compile if floating point precision has been overridden.
149  property_get("debug.rs.precision", buf, "");
150  if (buf[0] != '\0') {
151    return true;
152  }
153
154  // Re-compile if debug.rs.forcerecompile is set.
155  property_get("debug.rs.forcerecompile", buf, "0");
156  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
157    return true;
158  } else {
159    return false;
160  }
161#endif  // RS_SERVER
162}
163
164static void setCompileArguments(std::vector<const char*>* args,
165                                const std::string& bcFileName,
166                                const char* cacheDir, const char* resName,
167                                const char* core_lib, bool useRSDebugContext,
168                                const char* bccPluginName) {
169    rsAssert(cacheDir && resName && core_lib);
170    args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
171    args->push_back("-unroll-runtime");
172    args->push_back("-scalarize-load-store");
173    args->push_back("-o");
174    args->push_back(resName);
175    args->push_back("-output_path");
176    args->push_back(cacheDir);
177    args->push_back("-bclib");
178    args->push_back(core_lib);
179    args->push_back("-mtriple");
180    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
181
182    // Enable workaround for A53 codegen by default.
183#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
184    args->push_back("-aarch64-fix-cortex-a53-835769");
185#endif
186
187    // Execute the bcc compiler.
188    if (useRSDebugContext) {
189        args->push_back("-rs-debug-ctx");
190    } else {
191        // Only load additional libraries for compiles that don't use
192        // the debug context.
193        if (bccPluginName && strlen(bccPluginName) > 0) {
194            args->push_back("-load");
195            args->push_back(bccPluginName);
196        }
197    }
198
199    args->push_back("-fPIC");
200    args->push_back("-embedRSInfo");
201
202    args->push_back(bcFileName.c_str());
203    args->push_back(nullptr);
204}
205
206static bool compileBitcode(const std::string &bcFileName,
207                           const char *bitcode,
208                           size_t bitcodeSize,
209                           const char **compileArguments,
210                           const std::string &compileCommandLine) {
211    rsAssert(bitcode && bitcodeSize);
212
213    FILE *bcfile = fopen(bcFileName.c_str(), "w");
214    if (!bcfile) {
215        ALOGE("Could not write to %s", bcFileName.c_str());
216        return false;
217    }
218    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
219    fclose(bcfile);
220    if (nwritten != bitcodeSize) {
221        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
222              bcFileName.c_str());
223        return false;
224    }
225
226    pid_t pid = fork();
227
228    switch (pid) {
229    case -1: {  // Error occurred (we attempt no recovery)
230        ALOGE("Couldn't fork for bcc compiler execution");
231        return false;
232    }
233    case 0: {  // Child process
234        ALOGV("Invoking BCC with: %s", compileCommandLine.c_str());
235        execv(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
236              (char* const*)compileArguments);
237
238        ALOGE("execv() failed: %s", strerror(errno));
239        abort();
240        return false;
241    }
242    default: {  // Parent process (actual driver)
243        // Wait on child process to finish compiling the source.
244        int status = 0;
245        pid_t w = waitpid(pid, &status, 0);
246        if (w == -1) {
247            ALOGE("Could not wait for bcc compiler");
248            return false;
249        }
250
251        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
252            return true;
253        }
254
255        ALOGE("bcc compiler terminated unexpectedly");
256        return false;
257    }
258    }
259}
260
261#endif  // !defined(RS_COMPATIBILITY_LIB)
262}  // namespace
263
264namespace android {
265namespace renderscript {
266
267const char* SharedLibraryUtils::LD_EXE_PATH = "/system/bin/ld.mc";
268const char* SharedLibraryUtils::RS_CACHE_DIR = "com.android.renderscript.cache";
269
270#ifndef RS_COMPATIBILITY_LIB
271
272bool SharedLibraryUtils::createSharedLibrary(const char *cacheDir, const char *resName) {
273    std::string sharedLibName = findSharedObjectName(cacheDir, resName);
274    std::string objFileName = cacheDir;
275    objFileName.append("/");
276    objFileName.append(resName);
277    objFileName.append(".o");
278
279    const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so";
280    std::vector<const char *> args = {
281        LD_EXE_PATH,
282        "-shared",
283        "-nostdlib",
284        compiler_rt,
285        "-mtriple", DEFAULT_TARGET_TRIPLE_STRING,
286        "-L", SYSLIBPATH,
287        "-lRSDriver", "-lm", "-lc",
288        objFileName.c_str(),
289        "-o", sharedLibName.c_str(),
290        nullptr
291    };
292
293    std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data());
294
295    pid_t pid = fork();
296
297    switch (pid) {
298    case -1: {  // Error occurred (we attempt no recovery)
299        ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH);
300        return false;
301    }
302    case 0: {  // Child process
303        ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str());
304        execv(LD_EXE_PATH, (char* const*) args.data());
305
306        ALOGE("execv() failed: %s", strerror(errno));
307        abort();
308        return false;
309    }
310    default: {  // Parent process (actual driver)
311        // Wait on child process to finish compiling the source.
312        int status = 0;
313        pid_t w = waitpid(pid, &status, 0);
314        if (w == -1) {
315            ALOGE("Could not wait for linker (%s)", LD_EXE_PATH);
316            return false;
317        }
318
319        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
320            return true;
321        }
322
323        ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH);
324        return false;
325    }
326    }
327}
328
329#endif  // RS_COMPATIBILITY_LIB
330
331void* SharedLibraryUtils::loadSharedLibrary(const char *cacheDir, const char *resName) {
332    void *loaded = nullptr;
333
334    std::string scriptSOName = findSharedObjectName(cacheDir, resName);
335
336    // We should check if we can load the library from the standard app
337    // location for shared libraries first.
338    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
339
340    if (loaded == nullptr) {
341        ALOGE("Unable to open shared library (%s): %s",
342              scriptSOName.c_str(), dlerror());
343
344#ifdef RS_COMPATIBILITY_LIB
345        // One final attempt to find the library in "/system/lib".
346        // We do this to allow bundled applications to use the compatibility
347        // library fallback path. Those applications don't have a private
348        // library path, so they need to install to the system directly.
349        // Note that this is really just a testing path.
350        std::string scriptSONameSystem("/system/lib/librs.");
351        scriptSONameSystem.append(resName);
352        scriptSONameSystem.append(".so");
353        loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
354                              resName);
355        if (loaded == nullptr) {
356            ALOGE("Unable to open system shared library (%s): %s",
357                  scriptSONameSystem.c_str(), dlerror());
358        }
359#endif
360    }
361
362    return loaded;
363}
364
365void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDir,
366                                       const char *resName) {
367    // Keep track of which .so libraries have been loaded. Once a library is
368    // in the set (per-process granularity), we must instead make a copy of
369    // the original shared object (randomly named .so file) and load that one
370    // instead. If we don't do this, we end up aliasing global data between
371    // the various Script instances (which are supposed to be completely
372    // independent).
373    static std::set<std::string> LoadedLibraries;
374
375    void *loaded = nullptr;
376
377    // Skip everything if we don't even have the original library available.
378    if (access(origName, F_OK) != 0) {
379        return nullptr;
380    }
381
382    // Common path is that we have not loaded this Script/library before.
383    if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
384        loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
385        if (loaded) {
386            LoadedLibraries.insert(origName);
387        }
388        return loaded;
389    }
390
391    std::string newName(cacheDir);
392
393    // Append RS_CACHE_DIR only if it is not found in cacheDir
394    // In driver mode, RS_CACHE_DIR is already appended to cacheDir.
395    if (newName.find(RS_CACHE_DIR) == std::string::npos) {
396        newName.append("/");
397        newName.append(RS_CACHE_DIR);
398        newName.append("/");
399    }
400
401    if (!ensureCacheDirExists(newName.c_str())) {
402        ALOGE("Could not verify or create cache dir: %s", cacheDir);
403        return nullptr;
404    }
405
406    // Construct an appropriately randomized filename for the copy.
407    newName.append("librs.");
408    newName.append(resName);
409    newName.append("#");
410    newName.append(getRandomString(6));  // 62^6 potential filename variants.
411    newName.append(".so");
412
413    int r = copyFile(newName.c_str(), origName);
414    if (r != 0) {
415        ALOGE("Could not create copy %s -> %s", origName, newName.c_str());
416        return nullptr;
417    }
418    loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
419    r = unlink(newName.c_str());
420    if (r != 0) {
421        ALOGE("Could not unlink copy %s", newName.c_str());
422    }
423    if (loaded) {
424        LoadedLibraries.insert(newName.c_str());
425    }
426
427    return loaded;
428}
429
430const char* RsdCpuScriptImpl::BCC_EXE_PATH = "/system/bin/bcc";
431
432#define MAXLINE 500
433#define MAKE_STR_HELPER(S) #S
434#define MAKE_STR(S) MAKE_STR_HELPER(S)
435#define EXPORT_VAR_STR "exportVarCount: "
436#define EXPORT_FUNC_STR "exportFuncCount: "
437#define EXPORT_FOREACH_STR "exportForEachCount: "
438#define OBJECT_SLOT_STR "objectSlotCount: "
439#define PRAGMA_STR "pragmaCount: "
440#define THREADABLE_STR "isThreadable: "
441
442// Copy up to a newline or size chars from str -> s, updating str
443// Returns s when successful and nullptr when '\0' is finally reached.
444static char* strgets(char *s, int size, const char **ppstr) {
445    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
446        return nullptr;
447    }
448
449    int i;
450    for (i = 0; i < (size - 1); i++) {
451        s[i] = **ppstr;
452        (*ppstr)++;
453        if (s[i] == '\0') {
454            return s;
455        } else if (s[i] == '\n') {
456            s[i+1] = '\0';
457            return s;
458        }
459    }
460
461    // size has been exceeded.
462    s[i] = '\0';
463
464    return s;
465}
466
467RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
468    mCtx = ctx;
469    mScript = s;
470
471    mScriptSO = nullptr;
472
473#ifndef RS_COMPATIBILITY_LIB
474    mCompilerDriver = nullptr;
475#endif
476
477
478    mRoot = nullptr;
479    mRootExpand = nullptr;
480    mInit = nullptr;
481    mFreeChildren = nullptr;
482    mScriptExec = nullptr;
483
484    mBoundAllocs = nullptr;
485    mIntrinsicData = nullptr;
486    mIsThreadable = true;
487}
488
489bool RsdCpuScriptImpl::storeRSInfoFromSO() {
490    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
491    if (mRoot) {
492        //ALOGE("Found root(): %p", mRoot);
493    }
494    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
495    if (mRootExpand) {
496        //ALOGE("Found root.expand(): %p", mRootExpand);
497    }
498    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
499    if (mInit) {
500        //ALOGE("Found init(): %p", mInit);
501    }
502    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
503    if (mFreeChildren) {
504        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
505    }
506
507    mScriptExec = ScriptExecutable::createFromSharedObject(
508            mCtx->getContext(), mScriptSO);
509
510    if (mScriptExec == nullptr) {
511        return false;
512    }
513
514    size_t varCount = mScriptExec->getExportedVariableCount();
515    if (varCount > 0) {
516        mBoundAllocs = new Allocation *[varCount];
517        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
518    }
519
520    mIsThreadable = mScriptExec->getThreadable();
521    //ALOGE("Script isThreadable? %d", mIsThreadable);
522
523    return true;
524}
525
526ScriptExecutable* ScriptExecutable::createFromSharedObject(
527    Context* RSContext, void* sharedObj) {
528    char line[MAXLINE];
529
530    size_t varCount = 0;
531    size_t funcCount = 0;
532    size_t forEachCount = 0;
533    size_t objectSlotCount = 0;
534    size_t pragmaCount = 0;
535    bool isThreadable = true;
536
537    void** fieldAddress = nullptr;
538    bool* fieldIsObject = nullptr;
539    InvokeFunc_t* invokeFunctions = nullptr;
540    ForEachFunc_t* forEachFunctions = nullptr;
541    uint32_t* forEachSignatures = nullptr;
542    const char ** pragmaKeys = nullptr;
543    const char ** pragmaValues = nullptr;
544
545    const char *rsInfo = (const char *) dlsym(sharedObj, ".rs.info");
546
547    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
548        return nullptr;
549    }
550    if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
551        ALOGE("Invalid export var count!: %s", line);
552        return nullptr;
553    }
554
555    fieldAddress = new void*[varCount];
556    if (fieldAddress == nullptr) {
557        return nullptr;
558    }
559
560    fieldIsObject = new bool[varCount];
561    if (fieldIsObject == nullptr) {
562        goto error;
563    }
564
565    for (size_t i = 0; i < varCount; ++i) {
566        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
567            goto error;
568        }
569        char *c = strrchr(line, '\n');
570        if (c) {
571            *c = '\0';
572        }
573        void* addr = dlsym(sharedObj, line);
574        if (addr == nullptr) {
575            ALOGE("Failed to find variable address for %s: %s",
576                  line, dlerror());
577            // Not a critical error if we don't find a global variable.
578        }
579        fieldAddress[i] = addr;
580        fieldIsObject[i] = false;
581    }
582
583    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
584        goto error;
585    }
586    if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
587        ALOGE("Invalid export func count!: %s", line);
588        goto error;
589    }
590
591    invokeFunctions = new InvokeFunc_t[funcCount];
592    if (invokeFunctions == nullptr) {
593        goto error;
594    }
595
596    for (size_t i = 0; i < funcCount; ++i) {
597        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
598            goto error;
599        }
600        char *c = strrchr(line, '\n');
601        if (c) {
602            *c = '\0';
603        }
604
605        invokeFunctions[i] = (InvokeFunc_t) dlsym(sharedObj, line);
606        if (invokeFunctions[i] == nullptr) {
607            ALOGE("Failed to get function address for %s(): %s",
608                  line, dlerror());
609            goto error;
610        }
611    }
612
613    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
614        goto error;
615    }
616    if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
617        ALOGE("Invalid export forEach count!: %s", line);
618        goto error;
619    }
620
621    forEachFunctions = new ForEachFunc_t[forEachCount];
622    if (forEachFunctions == nullptr) {
623        goto error;
624    }
625
626    forEachSignatures = new uint32_t[forEachCount];
627    if (forEachSignatures == nullptr) {
628        goto error;
629    }
630
631    for (size_t i = 0; i < forEachCount; ++i) {
632        unsigned int tmpSig = 0;
633        char tmpName[MAXLINE];
634
635        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
636            goto error;
637        }
638        if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
639                   &tmpSig, tmpName) != 2) {
640          ALOGE("Invalid export forEach!: %s", line);
641          goto error;
642        }
643
644        // Lookup the expanded ForEach kernel.
645        strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
646        forEachSignatures[i] = tmpSig;
647        forEachFunctions[i] =
648            (ForEachFunc_t) dlsym(sharedObj, tmpName);
649        if (i != 0 && forEachFunctions[i] == nullptr) {
650            // Ignore missing root.expand functions.
651            // root() is always specified at location 0.
652            ALOGE("Failed to find forEach function address for %s: %s",
653                  tmpName, dlerror());
654            goto error;
655        }
656    }
657
658    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
659        goto error;
660    }
661    if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
662        ALOGE("Invalid object slot count!: %s", line);
663        goto error;
664    }
665
666    for (size_t i = 0; i < objectSlotCount; ++i) {
667        uint32_t varNum = 0;
668        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
669            goto error;
670        }
671        if (sscanf(line, "%u", &varNum) != 1) {
672            ALOGE("Invalid object slot!: %s", line);
673            goto error;
674        }
675
676        if (varNum < varCount) {
677            fieldIsObject[varNum] = true;
678        }
679    }
680
681#ifndef RS_COMPATIBILITY_LIB
682    // Do not attempt to read pragmas or isThreadable flag in compat lib path.
683    // Neither is applicable for compat lib
684
685    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
686        goto error;
687    }
688
689    if (sscanf(line, PRAGMA_STR "%zu", &pragmaCount) != 1) {
690        ALOGE("Invalid pragma count!: %s", line);
691        goto error;
692    }
693
694    pragmaKeys = new const char*[pragmaCount];
695    if (pragmaKeys == nullptr) {
696        goto error;
697    }
698
699    pragmaValues = new const char*[pragmaCount];
700    if (pragmaValues == nullptr) {
701        goto error;
702    }
703
704    bzero(pragmaKeys, sizeof(char*) * pragmaCount);
705    bzero(pragmaValues, sizeof(char*) * pragmaCount);
706
707    for (size_t i = 0; i < pragmaCount; ++i) {
708        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
709            ALOGE("Unable to read pragma at index %zu!", i);
710            goto error;
711        }
712
713        char key[MAXLINE];
714        char value[MAXLINE] = ""; // initialize in case value is empty
715
716        // pragmas can just have a key and no value.  Only check to make sure
717        // that the key is not empty
718        if (sscanf(line, "%" MAKE_STR(MAXLINE) "s - %" MAKE_STR(MAXLINE) "s",
719                   key, value) == 0 ||
720            strlen(key) == 0)
721        {
722            ALOGE("Invalid pragma value!: %s", line);
723
724            goto error;
725        }
726
727        char *pKey = new char[strlen(key)+1];
728        strcpy(pKey, key);
729        pragmaKeys[i] = pKey;
730
731        char *pValue = new char[strlen(value)+1];
732        strcpy(pValue, value);
733        pragmaValues[i] = pValue;
734        //ALOGE("Pragma %zu: Key: '%s' Value: '%s'", i, pKey, pValue);
735    }
736
737    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
738        goto error;
739    }
740
741    char tmpFlag[4];
742    if (sscanf(line, THREADABLE_STR "%4s", tmpFlag) != 1) {
743        ALOGE("Invalid threadable flag!: %s", line);
744        goto error;
745    }
746    if (strcmp(tmpFlag, "yes") == 0) {
747        isThreadable = true;
748    } else if (strcmp(tmpFlag, "no") == 0) {
749        isThreadable = false;
750    } else {
751        ALOGE("Invalid threadable flag!: %s", tmpFlag);
752        goto error;
753    }
754
755#endif  // RS_COMPATIBILITY_LIB
756
757    return new ScriptExecutable(
758        RSContext, fieldAddress, fieldIsObject, varCount,
759        invokeFunctions, funcCount,
760        forEachFunctions, forEachSignatures, forEachCount,
761        pragmaKeys, pragmaValues, pragmaCount,
762        isThreadable);
763
764error:
765
766#ifndef RS_COMPATIBILITY_LIB
767    for (size_t idx = 0; idx < pragmaCount; ++idx) {
768        delete [] pragmaKeys[idx];
769        delete [] pragmaValues[idx];
770    }
771
772    delete[] pragmaValues;
773    delete[] pragmaKeys;
774#endif  // RS_COMPATIBILITY_LIB
775
776    delete[] forEachSignatures;
777    delete[] forEachFunctions;
778    delete[] invokeFunctions;
779    delete[] fieldIsObject;
780    delete[] fieldAddress;
781
782    return nullptr;
783}
784
785bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
786                            uint8_t const *bitcode, size_t bitcodeSize,
787                            uint32_t flags, char const *bccPluginName) {
788    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
789    // bitcode, bitcodeSize, flags, lookupFunc);
790    //ALOGE("rsdScriptInit %p %p", rsc, script);
791
792    mCtx->lockMutex();
793#ifndef RS_COMPATIBILITY_LIB
794    bool useRSDebugContext = false;
795
796    mCompilerDriver = nullptr;
797
798    mCompilerDriver = new bcc::RSCompilerDriver();
799    if (mCompilerDriver == nullptr) {
800        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
801        mCtx->unlockMutex();
802        return false;
803    }
804
805    // Run any compiler setup functions we have been provided with.
806    RSSetupCompilerCallback setupCompilerCallback =
807            mCtx->getSetupCompilerCallback();
808    if (setupCompilerCallback != nullptr) {
809        setupCompilerCallback(mCompilerDriver);
810    }
811
812    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
813    if (!bitcodeMetadata.extract()) {
814        ALOGE("Could not extract metadata from bitcode");
815        mCtx->unlockMutex();
816        return false;
817    }
818
819    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
820
821    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
822        mCompilerDriver->setDebugContext(true);
823        useRSDebugContext = true;
824    }
825
826    std::string bcFileName(cacheDir);
827    bcFileName.append("/");
828    bcFileName.append(resName);
829    bcFileName.append(".bc");
830
831    std::vector<const char*> compileArguments;
832    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
833                        useRSDebugContext, bccPluginName);
834    // The last argument of compileArguments ia a nullptr, so remove 1 from the size.
835    std::string compileCommandLine =
836                bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data());
837
838    if (!is_force_recompile() && !useRSDebugContext) {
839        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
840    }
841
842    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
843    // again.
844    if (mScriptSO == nullptr) {
845        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
846                            compileArguments.data(), compileCommandLine))
847        {
848            ALOGE("bcc: FAILS to compile '%s'", resName);
849            mCtx->unlockMutex();
850            return false;
851        }
852
853        if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
854            ALOGE("Linker: Failed to link object file '%s'", resName);
855            mCtx->unlockMutex();
856            return false;
857        }
858
859        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
860        if (mScriptSO == nullptr) {
861            ALOGE("Unable to load '%s'", resName);
862            mCtx->unlockMutex();
863            return false;
864        }
865    }
866
867    mBitcodeFilePath = bcFileName;
868
869    // Read RS symbol information from the .so.
870    if ( !mScriptSO) {
871        goto error;
872    }
873
874    if ( !storeRSInfoFromSO()) {
875      goto error;
876    }
877#else  // RS_COMPATIBILITY_LIB is defined
878
879    mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
880
881    if (!mScriptSO) {
882        goto error;
883    }
884
885    if (!storeRSInfoFromSO()) {
886        goto error;
887    }
888#endif
889    mCtx->unlockMutex();
890    return true;
891
892error:
893
894    mCtx->unlockMutex();
895    if (mScriptSO) {
896        dlclose(mScriptSO);
897    }
898    return false;
899}
900
901#ifndef RS_COMPATIBILITY_LIB
902
903const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
904                                          size_t bitcodeSize) {
905    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
906
907    // If we're debugging, use the debug library.
908    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
909        return SYSLIBPATH"/libclcore_debug.bc";
910    }
911
912    // If a callback has been registered to specify a library, use that.
913    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
914    if (selectRTCallback != nullptr) {
915        return selectRTCallback((const char*)bitcode, bitcodeSize);
916    }
917
918    // Check for a platform specific library
919#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
920    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
921    if (prec == bcinfo::RS_FP_Relaxed) {
922        // NEON-capable ARMv7a devices can use an accelerated math library
923        // for all reduced precision scripts.
924        // ARMv8 does not use NEON, as ASIMD can be used with all precision
925        // levels.
926        return SYSLIBPATH"/libclcore_neon.bc";
927    } else {
928        return defaultLib;
929    }
930#elif defined(__i386__) || defined(__x86_64__)
931    // x86 devices will use an optimized library.
932    return SYSLIBPATH"/libclcore_x86.bc";
933#else
934    return defaultLib;
935#endif
936}
937
938#endif
939
940void RsdCpuScriptImpl::populateScript(Script *script) {
941    // Copy info over to runtime
942    script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
943    script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
944    script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
945    script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
946    script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
947
948    // Bug, need to stash in metadata
949    if (mRootExpand) {
950        script->mHal.info.root = mRootExpand;
951    } else {
952        script->mHal.info.root = mRoot;
953    }
954}
955
956
957typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
958
959bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
960                                        uint32_t inLen,
961                                        Allocation * aout,
962                                        const void * usr, uint32_t usrLen,
963                                        const RsScriptCall *sc,
964                                        MTLaunchStruct *mtls) {
965
966    memset(mtls, 0, sizeof(MTLaunchStruct));
967
968    for (int index = inLen; --index >= 0;) {
969        const Allocation* ain = ains[index];
970
971        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
972        if (ain != nullptr &&
973            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
974
975            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
976                                         "rsForEach called with null in allocations");
977            return false;
978        }
979    }
980
981    if (aout &&
982        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
983
984        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
985                                     "rsForEach called with null out allocations");
986        return false;
987    }
988
989    if (inLen > 0) {
990        const Allocation *ain0   = ains[0];
991        const Type       *inType = ain0->getType();
992
993        mtls->fep.dim.x = inType->getDimX();
994        mtls->fep.dim.y = inType->getDimY();
995        mtls->fep.dim.z = inType->getDimZ();
996
997        for (int Index = inLen; --Index >= 1;) {
998            if (!ain0->hasSameDims(ains[Index])) {
999                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1000                  "Failed to launch kernel; dimensions of input and output"
1001                  "allocations do not match.");
1002
1003                return false;
1004            }
1005        }
1006
1007    } else if (aout != nullptr) {
1008        const Type *outType = aout->getType();
1009
1010        mtls->fep.dim.x = outType->getDimX();
1011        mtls->fep.dim.y = outType->getDimY();
1012        mtls->fep.dim.z = outType->getDimZ();
1013
1014    } else {
1015        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1016                                     "rsForEach called with null allocations");
1017        return false;
1018    }
1019
1020    if (inLen > 0 && aout != nullptr) {
1021        if (!ains[0]->hasSameDims(aout)) {
1022            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1023              "Failed to launch kernel; dimensions of input and output allocations do not match.");
1024
1025            return false;
1026        }
1027    }
1028
1029    if (!sc || (sc->xEnd == 0)) {
1030        mtls->end.x = mtls->fep.dim.x;
1031    } else {
1032        mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
1033        mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
1034        if (mtls->start.x >= mtls->end.x) return false;
1035    }
1036
1037    if (!sc || (sc->yEnd == 0)) {
1038        mtls->end.y = mtls->fep.dim.y;
1039    } else {
1040        mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
1041        mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
1042        if (mtls->start.y >= mtls->end.y) return false;
1043    }
1044
1045    if (!sc || (sc->zEnd == 0)) {
1046        mtls->end.z = mtls->fep.dim.z;
1047    } else {
1048        mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
1049        mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
1050        if (mtls->start.z >= mtls->end.z) return false;
1051    }
1052
1053    if (!sc || (sc->arrayEnd == 0)) {
1054        mtls->end.array[0] = mtls->fep.dim.array[0];
1055    } else {
1056        mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
1057        mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
1058        if (mtls->start.array[0] >= mtls->end.array[0]) return false;
1059    }
1060
1061    if (!sc || (sc->array2End == 0)) {
1062        mtls->end.array[1] = mtls->fep.dim.array[1];
1063    } else {
1064        mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
1065        mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
1066        if (mtls->start.array[1] >= mtls->end.array[1]) return false;
1067    }
1068
1069    if (!sc || (sc->array3End == 0)) {
1070        mtls->end.array[2] = mtls->fep.dim.array[2];
1071    } else {
1072        mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
1073        mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
1074        if (mtls->start.array[2] >= mtls->end.array[2]) return false;
1075    }
1076
1077    if (!sc || (sc->array4End == 0)) {
1078        mtls->end.array[3] = mtls->fep.dim.array[3];
1079    } else {
1080        mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
1081        mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
1082        if (mtls->start.array[3] >= mtls->end.array[3]) return false;
1083    }
1084
1085
1086    // The X & Y walkers always want 0-1 min even if dim is not present
1087    mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
1088    mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
1089
1090    mtls->rsc        = mCtx;
1091    if (ains) {
1092        memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
1093    }
1094    mtls->aout[0]    = aout;
1095    mtls->fep.usr    = usr;
1096    mtls->fep.usrLen = usrLen;
1097    mtls->mSliceSize = 1;
1098    mtls->mSliceNum  = 0;
1099
1100    mtls->isThreadable  = mIsThreadable;
1101
1102    if (inLen > 0) {
1103        mtls->fep.inLen = inLen;
1104        for (int index = inLen; --index >= 0;) {
1105            mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
1106            mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
1107        }
1108    }
1109
1110    if (aout != nullptr) {
1111        mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
1112        mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
1113    }
1114
1115    // All validation passed, ok to launch threads
1116    return true;
1117}
1118
1119
1120void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
1121                                     const Allocation ** ains,
1122                                     uint32_t inLen,
1123                                     Allocation * aout,
1124                                     const void * usr,
1125                                     uint32_t usrLen,
1126                                     const RsScriptCall *sc) {
1127
1128    MTLaunchStruct mtls;
1129
1130    if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
1131        forEachKernelSetup(slot, &mtls);
1132
1133        RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1134        mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
1135        mCtx->setTLS(oldTLS);
1136    }
1137}
1138
1139void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
1140    mtls->script = this;
1141    mtls->fep.slot = slot;
1142    mtls->kernel = mScriptExec->getForEachFunction(slot);
1143    rsAssert(mtls->kernel != nullptr);
1144    mtls->sig = mScriptExec->getForEachSignature(slot);
1145}
1146
1147int RsdCpuScriptImpl::invokeRoot() {
1148    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1149    int ret = mRoot();
1150    mCtx->setTLS(oldTLS);
1151    return ret;
1152}
1153
1154void RsdCpuScriptImpl::invokeInit() {
1155    if (mInit) {
1156        mInit();
1157    }
1158}
1159
1160void RsdCpuScriptImpl::invokeFreeChildren() {
1161    if (mFreeChildren) {
1162        mFreeChildren();
1163    }
1164}
1165
1166void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
1167                                      size_t paramLength) {
1168    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
1169    void * ap = nullptr;
1170
1171#if defined(__x86_64__)
1172    // The invoked function could have input parameter of vector type for example float4 which
1173    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
1174    // So try to align void* params before passing them into RS exported function.
1175
1176    if ((uint8_t)(uint64_t)params & 0x0F) {
1177        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
1178            memcpy(ap, params, paramLength);
1179        } else {
1180            ALOGE("x86_64: invokeFunction memalign error, still use params which"
1181                  " is not 16 bytes aligned.");
1182        }
1183    }
1184#endif
1185
1186    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1187    reinterpret_cast<void (*)(const void *, uint32_t)>(
1188        mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
1189
1190    mCtx->setTLS(oldTLS);
1191}
1192
1193void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
1194    //rsAssert(!script->mFieldIsObject[slot]);
1195    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
1196
1197    //if (mIntrinsicID) {
1198        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
1199        //return;
1200    //}
1201
1202    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1203    if (!destPtr) {
1204        //ALOGV("Calling setVar on slot = %i which is null", slot);
1205        return;
1206    }
1207
1208    memcpy(destPtr, data, dataLength);
1209}
1210
1211void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
1212    //rsAssert(!script->mFieldIsObject[slot]);
1213    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
1214
1215    int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1216    if (!srcPtr) {
1217        //ALOGV("Calling setVar on slot = %i which is null", slot);
1218        return;
1219    }
1220    memcpy(data, srcPtr, dataLength);
1221}
1222
1223
1224void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
1225                                                const Element *elem,
1226                                                const uint32_t *dims, size_t dimLength) {
1227    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1228    if (!destPtr) {
1229        //ALOGV("Calling setVar on slot = %i which is null", slot);
1230        return;
1231    }
1232
1233    // We want to look at dimension in terms of integer components,
1234    // but dimLength is given in terms of bytes.
1235    dimLength /= sizeof(int);
1236
1237    // Only a single dimension is currently supported.
1238    rsAssert(dimLength == 1);
1239    if (dimLength == 1) {
1240        // First do the increment loop.
1241        size_t stride = elem->getSizeBytes();
1242        const char *cVal = reinterpret_cast<const char *>(data);
1243        for (uint32_t i = 0; i < dims[0]; i++) {
1244            elem->incRefs(cVal);
1245            cVal += stride;
1246        }
1247
1248        // Decrement loop comes after (to prevent race conditions).
1249        char *oldVal = reinterpret_cast<char *>(destPtr);
1250        for (uint32_t i = 0; i < dims[0]; i++) {
1251            elem->decRefs(oldVal);
1252            oldVal += stride;
1253        }
1254    }
1255
1256    memcpy(destPtr, data, dataLength);
1257}
1258
1259void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
1260
1261    //rsAssert(!script->mFieldIsObject[slot]);
1262    //ALOGE("setGlobalBind %i %p", slot, data);
1263
1264    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1265    if (!destPtr) {
1266        //ALOGV("Calling setVar on slot = %i which is null", slot);
1267        return;
1268    }
1269
1270    void *ptr = nullptr;
1271    mBoundAllocs[slot] = data;
1272    if (data) {
1273        ptr = data->mHal.drvState.lod[0].mallocPtr;
1274    }
1275    memcpy(destPtr, &ptr, sizeof(void *));
1276}
1277
1278void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
1279
1280    //rsAssert(script->mFieldIsObject[slot]);
1281    //ALOGE("setGlobalObj %i %p", slot, data);
1282
1283    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1284    if (!destPtr) {
1285        //ALOGV("Calling setVar on slot = %i which is null", slot);
1286        return;
1287    }
1288
1289    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
1290}
1291
1292RsdCpuScriptImpl::~RsdCpuScriptImpl() {
1293#ifndef RS_COMPATIBILITY_LIB
1294    if (mCompilerDriver) {
1295        delete mCompilerDriver;
1296    }
1297#endif
1298
1299    if (mScriptExec != nullptr) {
1300        delete mScriptExec;
1301    }
1302    if (mBoundAllocs) delete[] mBoundAllocs;
1303    if (mScriptSO) {
1304        dlclose(mScriptSO);
1305    }
1306}
1307
1308Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
1309    if (!ptr) {
1310        return nullptr;
1311    }
1312
1313    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
1314        Allocation *a = mBoundAllocs[ct];
1315        if (!a) continue;
1316        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
1317            return a;
1318        }
1319    }
1320    ALOGE("rsGetAllocation, failed to find %p", ptr);
1321    return nullptr;
1322}
1323
1324void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
1325                                 uint32_t inLen, Allocation * aout,
1326                                 const void * usr, uint32_t usrLen,
1327                                 const RsScriptCall *sc) {}
1328
1329void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
1330                                  uint32_t inLen, Allocation * aout,
1331                                  const void * usr, uint32_t usrLen,
1332                                  const RsScriptCall *sc) {}
1333
1334
1335}
1336}
1337