rsCpuScript.cpp revision e8f9fba78f0cb79fa8773373a635e30382113a75
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19
20#ifdef RS_COMPATIBILITY_LIB
21    #include <stdio.h>
22    #include <sys/stat.h>
23    #include <unistd.h>
24#else
25    #include <bcc/BCCContext.h>
26    #include <bcc/Config/Config.h>
27    #include <bcc/Renderscript/RSCompilerDriver.h>
28    #include <bcc/Renderscript/RSInfo.h>
29    #include <bcinfo/MetadataExtractor.h>
30    #include <cutils/properties.h>
31
32    #include <sys/types.h>
33    #include <sys/wait.h>
34    #include <unistd.h>
35
36    #include <string>
37    #include <vector>
38#endif
39
40#include <set>
41#include <string>
42#include <dlfcn.h>
43#include <stdlib.h>
44#include <string.h>
45#include <fstream>
46#include <iostream>
47
48#ifdef __LP64__
49#define SYSLIBPATH "/system/lib64"
50#else
51#define SYSLIBPATH "/system/lib"
52#endif
53
54namespace {
55
56// Create a len length string containing random characters from [A-Za-z0-9].
57static std::string getRandomString(size_t len) {
58    char buf[len + 1];
59    for (size_t i = 0; i < len; i++) {
60        uint32_t r = arc4random() & 0xffff;
61        r %= 62;
62        if (r < 26) {
63            // lowercase
64            buf[i] = 'a' + r;
65        } else if (r < 52) {
66            // uppercase
67            buf[i] = 'A' + (r - 26);
68        } else {
69            // Use a number
70            buf[i] = '0' + (r - 52);
71        }
72    }
73    buf[len] = '\0';
74    return std::string(buf);
75}
76
77// Check if a path exists and attempt to create it if it doesn't.
78static bool ensureCacheDirExists(const char *path) {
79    if (access(path, R_OK | W_OK | X_OK) == 0) {
80        // Done if we can rwx the directory
81        return true;
82    }
83    if (mkdir(path, 0700) == 0) {
84        return true;
85    }
86    return false;
87}
88
89// Copy the file named \p srcFile to \p dstFile.
90// Return 0 on success and -1 if anything wasn't copied.
91static int copyFile(const char *dstFile, const char *srcFile) {
92    std::ifstream srcStream(srcFile);
93    if (!srcStream) {
94        ALOGE("Could not verify or read source file: %s", srcFile);
95        return -1;
96    }
97    std::ofstream dstStream(dstFile);
98    if (!dstStream) {
99        ALOGE("Could not verify or write destination file: %s", dstFile);
100        return -1;
101    }
102    dstStream << srcStream.rdbuf();
103    if (!dstStream) {
104        ALOGE("Could not write destination file: %s", dstFile);
105        return -1;
106    }
107
108    srcStream.close();
109    dstStream.close();
110
111    return 0;
112}
113
114static std::string findSharedObjectName(const char *cacheDir,
115                                        const char *resName) {
116
117#ifndef RS_SERVER
118    std::string scriptSOName(cacheDir);
119#ifdef RS_COMPATIBILITY_LIB
120    size_t cutPos = scriptSOName.rfind("cache");
121    if (cutPos != std::string::npos) {
122        scriptSOName.erase(cutPos);
123    } else {
124        ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
125    }
126    scriptSOName.append("/lib/librs.");
127#else
128    scriptSOName.append("/librs.");
129#endif
130
131#else
132    std::string scriptSOName("lib");
133#endif
134    scriptSOName.append(resName);
135    scriptSOName.append(".so");
136
137    return scriptSOName;
138}
139
140#ifndef RS_COMPATIBILITY_LIB
141
142static bool is_force_recompile() {
143#ifdef RS_SERVER
144  return false;
145#else
146  char buf[PROPERTY_VALUE_MAX];
147
148  // Re-compile if floating point precision has been overridden.
149  property_get("debug.rs.precision", buf, "");
150  if (buf[0] != '\0') {
151    return true;
152  }
153
154  // Re-compile if debug.rs.forcerecompile is set.
155  property_get("debug.rs.forcerecompile", buf, "0");
156  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
157    return true;
158  } else {
159    return false;
160  }
161#endif  // RS_SERVER
162}
163
164const static char *BCC_EXE_PATH = "/system/bin/bcc";
165
166static void setCompileArguments(std::vector<const char*>* args,
167                                const std::string& bcFileName,
168                                const char* cacheDir, const char* resName,
169                                const char* core_lib, bool useRSDebugContext,
170                                const char* bccPluginName) {
171    rsAssert(cacheDir && resName && core_lib);
172    args->push_back(BCC_EXE_PATH);
173    args->push_back("-unroll-runtime");
174    args->push_back("-scalarize-load-store");
175    args->push_back("-o");
176    args->push_back(resName);
177    args->push_back("-output_path");
178    args->push_back(cacheDir);
179    args->push_back("-bclib");
180    args->push_back(core_lib);
181    args->push_back("-mtriple");
182    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
183
184    // Enable workaround for A53 codegen by default.
185#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
186    args->push_back("-aarch64-fix-cortex-a53-835769");
187#endif
188
189    // Execute the bcc compiler.
190    if (useRSDebugContext) {
191        args->push_back("-rs-debug-ctx");
192    } else {
193        // Only load additional libraries for compiles that don't use
194        // the debug context.
195        if (bccPluginName && strlen(bccPluginName) > 0) {
196            args->push_back("-load");
197            args->push_back(bccPluginName);
198        }
199    }
200
201    args->push_back("-fPIC");
202    args->push_back("-embedRSInfo");
203
204    args->push_back(bcFileName.c_str());
205    args->push_back(nullptr);
206}
207
208static bool compileBitcode(const std::string &bcFileName,
209                           const char *bitcode,
210                           size_t bitcodeSize,
211                           const char **compileArguments,
212                           const std::string &compileCommandLine) {
213    rsAssert(bitcode && bitcodeSize);
214
215    FILE *bcfile = fopen(bcFileName.c_str(), "w");
216    if (!bcfile) {
217        ALOGE("Could not write to %s", bcFileName.c_str());
218        return false;
219    }
220    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
221    fclose(bcfile);
222    if (nwritten != bitcodeSize) {
223        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
224              bcFileName.c_str());
225        return false;
226    }
227
228    pid_t pid = fork();
229
230    switch (pid) {
231    case -1: {  // Error occurred (we attempt no recovery)
232        ALOGE("Couldn't fork for bcc compiler execution");
233        return false;
234    }
235    case 0: {  // Child process
236        ALOGV("Invoking BCC with: %s", compileCommandLine.c_str());
237        execv(BCC_EXE_PATH, (char* const*)compileArguments);
238
239        ALOGE("execv() failed: %s", strerror(errno));
240        abort();
241        return false;
242    }
243    default: {  // Parent process (actual driver)
244        // Wait on child process to finish compiling the source.
245        int status = 0;
246        pid_t w = waitpid(pid, &status, 0);
247        if (w == -1) {
248            ALOGE("Could not wait for bcc compiler");
249            return false;
250        }
251
252        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
253            return true;
254        }
255
256        ALOGE("bcc compiler terminated unexpectedly");
257        return false;
258    }
259    }
260}
261
262#endif  // !defined(RS_COMPATIBILITY_LIB)
263}  // namespace
264
265namespace android {
266namespace renderscript {
267
268const char* SharedLibraryUtils::LD_EXE_PATH = "/system/bin/ld.mc";
269const char* SharedLibraryUtils::RS_CACHE_DIR = "com.android.renderscript.cache";
270
271#ifndef RS_COMPATIBILITY_LIB
272
273bool SharedLibraryUtils::createSharedLibrary(const char *cacheDir, const char *resName) {
274    std::string sharedLibName = findSharedObjectName(cacheDir, resName);
275    std::string objFileName = cacheDir;
276    objFileName.append("/");
277    objFileName.append(resName);
278    objFileName.append(".o");
279
280    const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so";
281    std::vector<const char *> args = {
282        LD_EXE_PATH,
283        "-shared",
284        "-nostdlib",
285        compiler_rt,
286        "-mtriple", DEFAULT_TARGET_TRIPLE_STRING,
287        "-L", SYSLIBPATH,
288        "-lRSDriver", "-lm", "-lc",
289        objFileName.c_str(),
290        "-o", sharedLibName.c_str(),
291        nullptr
292    };
293
294    std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data());
295
296    pid_t pid = fork();
297
298    switch (pid) {
299    case -1: {  // Error occurred (we attempt no recovery)
300        ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH);
301        return false;
302    }
303    case 0: {  // Child process
304        ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str());
305        execv(LD_EXE_PATH, (char* const*) args.data());
306
307        ALOGE("execv() failed: %s", strerror(errno));
308        abort();
309        return false;
310    }
311    default: {  // Parent process (actual driver)
312        // Wait on child process to finish compiling the source.
313        int status = 0;
314        pid_t w = waitpid(pid, &status, 0);
315        if (w == -1) {
316            ALOGE("Could not wait for linker (%s)", LD_EXE_PATH);
317            return false;
318        }
319
320        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
321            return true;
322        }
323
324        ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH);
325        return false;
326    }
327    }
328}
329
330#endif  // RS_COMPATIBILITY_LIB
331
332void* SharedLibraryUtils::loadSharedLibrary(const char *cacheDir, const char *resName) {
333    void *loaded = nullptr;
334
335    std::string scriptSOName = findSharedObjectName(cacheDir, resName);
336
337    // We should check if we can load the library from the standard app
338    // location for shared libraries first.
339    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
340
341    if (loaded == nullptr) {
342        ALOGE("Unable to open shared library (%s): %s",
343              scriptSOName.c_str(), dlerror());
344
345#ifdef RS_COMPATIBILITY_LIB
346        // One final attempt to find the library in "/system/lib".
347        // We do this to allow bundled applications to use the compatibility
348        // library fallback path. Those applications don't have a private
349        // library path, so they need to install to the system directly.
350        // Note that this is really just a testing path.
351        std::string scriptSONameSystem("/system/lib/librs.");
352        scriptSONameSystem.append(resName);
353        scriptSONameSystem.append(".so");
354        loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
355                              resName);
356        if (loaded == nullptr) {
357            ALOGE("Unable to open system shared library (%s): %s",
358                  scriptSONameSystem.c_str(), dlerror());
359        }
360#endif
361    }
362
363    return loaded;
364}
365
366void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDir,
367                                       const char *resName) {
368    // Keep track of which .so libraries have been loaded. Once a library is
369    // in the set (per-process granularity), we must instead make a copy of
370    // the original shared object (randomly named .so file) and load that one
371    // instead. If we don't do this, we end up aliasing global data between
372    // the various Script instances (which are supposed to be completely
373    // independent).
374    static std::set<std::string> LoadedLibraries;
375
376    void *loaded = nullptr;
377
378    // Skip everything if we don't even have the original library available.
379    if (access(origName, F_OK) != 0) {
380        return nullptr;
381    }
382
383    // Common path is that we have not loaded this Script/library before.
384    if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
385        loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
386        if (loaded) {
387            LoadedLibraries.insert(origName);
388        }
389        return loaded;
390    }
391
392    std::string newName(cacheDir);
393
394    // Append RS_CACHE_DIR only if it is not found in cacheDir
395    // In driver mode, RS_CACHE_DIR is already appended to cacheDir.
396    if (newName.find(RS_CACHE_DIR) == std::string::npos) {
397        newName.append("/");
398        newName.append(RS_CACHE_DIR);
399        newName.append("/");
400    }
401
402    if (!ensureCacheDirExists(newName.c_str())) {
403        ALOGE("Could not verify or create cache dir: %s", cacheDir);
404        return nullptr;
405    }
406
407    // Construct an appropriately randomized filename for the copy.
408    newName.append("librs.");
409    newName.append(resName);
410    newName.append("#");
411    newName.append(getRandomString(6));  // 62^6 potential filename variants.
412    newName.append(".so");
413
414    int r = copyFile(newName.c_str(), origName);
415    if (r != 0) {
416        ALOGE("Could not create copy %s -> %s", origName, newName.c_str());
417        return nullptr;
418    }
419    loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
420    r = unlink(newName.c_str());
421    if (r != 0) {
422        ALOGE("Could not unlink copy %s", newName.c_str());
423    }
424    if (loaded) {
425        LoadedLibraries.insert(newName.c_str());
426    }
427
428    return loaded;
429}
430
431#define MAXLINE 500
432#define MAKE_STR_HELPER(S) #S
433#define MAKE_STR(S) MAKE_STR_HELPER(S)
434#define EXPORT_VAR_STR "exportVarCount: "
435#define EXPORT_FUNC_STR "exportFuncCount: "
436#define EXPORT_FOREACH_STR "exportForEachCount: "
437#define OBJECT_SLOT_STR "objectSlotCount: "
438#define PRAGMA_STR "pragmaCount: "
439#define THREADABLE_STR "isThreadable: "
440
441// Copy up to a newline or size chars from str -> s, updating str
442// Returns s when successful and nullptr when '\0' is finally reached.
443static char* strgets(char *s, int size, const char **ppstr) {
444    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
445        return nullptr;
446    }
447
448    int i;
449    for (i = 0; i < (size - 1); i++) {
450        s[i] = **ppstr;
451        (*ppstr)++;
452        if (s[i] == '\0') {
453            return s;
454        } else if (s[i] == '\n') {
455            s[i+1] = '\0';
456            return s;
457        }
458    }
459
460    // size has been exceeded.
461    s[i] = '\0';
462
463    return s;
464}
465
466RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
467    mCtx = ctx;
468    mScript = s;
469
470    mScriptSO = nullptr;
471
472#ifndef RS_COMPATIBILITY_LIB
473    mCompilerDriver = nullptr;
474#endif
475
476
477    mRoot = nullptr;
478    mRootExpand = nullptr;
479    mInit = nullptr;
480    mFreeChildren = nullptr;
481    mScriptExec = nullptr;
482
483    mBoundAllocs = nullptr;
484    mIntrinsicData = nullptr;
485    mIsThreadable = true;
486}
487
488bool RsdCpuScriptImpl::storeRSInfoFromSO() {
489    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
490    if (mRoot) {
491        //ALOGE("Found root(): %p", mRoot);
492    }
493    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
494    if (mRootExpand) {
495        //ALOGE("Found root.expand(): %p", mRootExpand);
496    }
497    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
498    if (mInit) {
499        //ALOGE("Found init(): %p", mInit);
500    }
501    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
502    if (mFreeChildren) {
503        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
504    }
505
506    mScriptExec = ScriptExecutable::createFromSharedObject(
507            mCtx->getContext(), mScriptSO);
508
509    if (mScriptExec == nullptr) {
510        return false;
511    }
512
513    size_t varCount = mScriptExec->getExportedVariableCount();
514    if (varCount > 0) {
515        mBoundAllocs = new Allocation *[varCount];
516        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
517    }
518
519    mIsThreadable = mScriptExec->getThreadable();
520    //ALOGE("Script isThreadable? %d", mIsThreadable);
521
522    return true;
523}
524
525ScriptExecutable* ScriptExecutable::createFromSharedObject(
526    Context* RSContext, void* sharedObj) {
527    char line[MAXLINE];
528
529    size_t varCount = 0;
530    size_t funcCount = 0;
531    size_t forEachCount = 0;
532    size_t objectSlotCount = 0;
533    size_t pragmaCount = 0;
534    bool isThreadable = true;
535
536    void** fieldAddress = nullptr;
537    bool* fieldIsObject = nullptr;
538    InvokeFunc_t* invokeFunctions = nullptr;
539    ForEachFunc_t* forEachFunctions = nullptr;
540    uint32_t* forEachSignatures = nullptr;
541    const char ** pragmaKeys = nullptr;
542    const char ** pragmaValues = nullptr;
543
544    const char *rsInfo = (const char *) dlsym(sharedObj, ".rs.info");
545
546    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
547        return nullptr;
548    }
549    if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
550        ALOGE("Invalid export var count!: %s", line);
551        return nullptr;
552    }
553
554    fieldAddress = new void*[varCount];
555    if (fieldAddress == nullptr) {
556        return nullptr;
557    }
558
559    fieldIsObject = new bool[varCount];
560    if (fieldIsObject == nullptr) {
561        goto error;
562    }
563
564    for (size_t i = 0; i < varCount; ++i) {
565        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
566            goto error;
567        }
568        char *c = strrchr(line, '\n');
569        if (c) {
570            *c = '\0';
571        }
572        void* addr = dlsym(sharedObj, line);
573        if (addr == nullptr) {
574            ALOGE("Failed to find variable address for %s: %s",
575                  line, dlerror());
576            // Not a critical error if we don't find a global variable.
577        }
578        fieldAddress[i] = addr;
579        fieldIsObject[i] = false;
580    }
581
582    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
583        goto error;
584    }
585    if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
586        ALOGE("Invalid export func count!: %s", line);
587        goto error;
588    }
589
590    invokeFunctions = new InvokeFunc_t[funcCount];
591    if (invokeFunctions == nullptr) {
592        goto error;
593    }
594
595    for (size_t i = 0; i < funcCount; ++i) {
596        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
597            goto error;
598        }
599        char *c = strrchr(line, '\n');
600        if (c) {
601            *c = '\0';
602        }
603
604        invokeFunctions[i] = (InvokeFunc_t) dlsym(sharedObj, line);
605        if (invokeFunctions[i] == nullptr) {
606            ALOGE("Failed to get function address for %s(): %s",
607                  line, dlerror());
608            goto error;
609        }
610    }
611
612    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
613        goto error;
614    }
615    if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
616        ALOGE("Invalid export forEach count!: %s", line);
617        goto error;
618    }
619
620    forEachFunctions = new ForEachFunc_t[forEachCount];
621    if (forEachFunctions == nullptr) {
622        goto error;
623    }
624
625    forEachSignatures = new uint32_t[forEachCount];
626    if (forEachSignatures == nullptr) {
627        goto error;
628    }
629
630    for (size_t i = 0; i < forEachCount; ++i) {
631        unsigned int tmpSig = 0;
632        char tmpName[MAXLINE];
633
634        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
635            goto error;
636        }
637        if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
638                   &tmpSig, tmpName) != 2) {
639          ALOGE("Invalid export forEach!: %s", line);
640          goto error;
641        }
642
643        // Lookup the expanded ForEach kernel.
644        strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
645        forEachSignatures[i] = tmpSig;
646        forEachFunctions[i] =
647            (ForEachFunc_t) dlsym(sharedObj, tmpName);
648        if (i != 0 && forEachFunctions[i] == nullptr) {
649            // Ignore missing root.expand functions.
650            // root() is always specified at location 0.
651            ALOGE("Failed to find forEach function address for %s: %s",
652                  tmpName, dlerror());
653            goto error;
654        }
655    }
656
657    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
658        goto error;
659    }
660    if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
661        ALOGE("Invalid object slot count!: %s", line);
662        goto error;
663    }
664
665    for (size_t i = 0; i < objectSlotCount; ++i) {
666        uint32_t varNum = 0;
667        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
668            goto error;
669        }
670        if (sscanf(line, "%u", &varNum) != 1) {
671            ALOGE("Invalid object slot!: %s", line);
672            goto error;
673        }
674
675        if (varNum < varCount) {
676            fieldIsObject[varNum] = true;
677        }
678    }
679
680#ifndef RS_COMPATIBILITY_LIB
681    // Do not attempt to read pragmas or isThreadable flag in compat lib path.
682    // Neither is applicable for compat lib
683
684    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
685        goto error;
686    }
687
688    if (sscanf(line, PRAGMA_STR "%zu", &pragmaCount) != 1) {
689        ALOGE("Invalid pragma count!: %s", line);
690        goto error;
691    }
692
693    pragmaKeys = new const char*[pragmaCount];
694    if (pragmaKeys == nullptr) {
695        goto error;
696    }
697
698    pragmaValues = new const char*[pragmaCount];
699    if (pragmaValues == nullptr) {
700        goto error;
701    }
702
703    bzero(pragmaKeys, sizeof(char*) * pragmaCount);
704    bzero(pragmaValues, sizeof(char*) * pragmaCount);
705
706    for (size_t i = 0; i < pragmaCount; ++i) {
707        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
708            ALOGE("Unable to read pragma at index %zu!", i);
709            goto error;
710        }
711
712        char key[MAXLINE];
713        char value[MAXLINE] = ""; // initialize in case value is empty
714
715        // pragmas can just have a key and no value.  Only check to make sure
716        // that the key is not empty
717        if (sscanf(line, "%" MAKE_STR(MAXLINE) "s - %" MAKE_STR(MAXLINE) "s",
718                   key, value) == 0 ||
719            strlen(key) == 0)
720        {
721            ALOGE("Invalid pragma value!: %s", line);
722
723            goto error;
724        }
725
726        char *pKey = new char[strlen(key)+1];
727        strcpy(pKey, key);
728        pragmaKeys[i] = pKey;
729
730        char *pValue = new char[strlen(value)+1];
731        strcpy(pValue, value);
732        pragmaValues[i] = pValue;
733        //ALOGE("Pragma %zu: Key: '%s' Value: '%s'", i, pKey, pValue);
734    }
735
736    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
737        goto error;
738    }
739
740    char tmpFlag[4];
741    if (sscanf(line, THREADABLE_STR "%4s", tmpFlag) != 1) {
742        ALOGE("Invalid threadable flag!: %s", line);
743        goto error;
744    }
745    if (strcmp(tmpFlag, "yes") == 0) {
746        isThreadable = true;
747    } else if (strcmp(tmpFlag, "no") == 0) {
748        isThreadable = false;
749    } else {
750        ALOGE("Invalid threadable flag!: %s", tmpFlag);
751        goto error;
752    }
753
754#endif  // RS_COMPATIBILITY_LIB
755
756    return new ScriptExecutable(
757        RSContext, fieldAddress, fieldIsObject, varCount,
758        invokeFunctions, funcCount,
759        forEachFunctions, forEachSignatures, forEachCount,
760        pragmaKeys, pragmaValues, pragmaCount,
761        isThreadable);
762
763error:
764
765#ifndef RS_COMPATIBILITY_LIB
766    for (size_t idx = 0; idx < pragmaCount; ++idx) {
767        if (pragmaKeys[idx] != nullptr) {
768            delete [] pragmaKeys[idx];
769        }
770        if (pragmaValues[idx] != nullptr) {
771            delete [] pragmaValues[idx];
772        }
773    }
774
775    delete[] pragmaValues;
776    delete[] pragmaKeys;
777#endif  // RS_COMPATIBILITY_LIB
778
779    delete[] forEachSignatures;
780    delete[] forEachFunctions;
781    delete[] invokeFunctions;
782    delete[] fieldIsObject;
783    delete[] fieldAddress;
784
785    return nullptr;
786}
787
788bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
789                            uint8_t const *bitcode, size_t bitcodeSize,
790                            uint32_t flags, char const *bccPluginName) {
791    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
792    // bitcode, bitcodeSize, flags, lookupFunc);
793    //ALOGE("rsdScriptInit %p %p", rsc, script);
794
795    mCtx->lockMutex();
796#ifndef RS_COMPATIBILITY_LIB
797    bool useRSDebugContext = false;
798
799    mCompilerDriver = nullptr;
800
801    mCompilerDriver = new bcc::RSCompilerDriver();
802    if (mCompilerDriver == nullptr) {
803        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
804        mCtx->unlockMutex();
805        return false;
806    }
807
808    // Run any compiler setup functions we have been provided with.
809    RSSetupCompilerCallback setupCompilerCallback =
810            mCtx->getSetupCompilerCallback();
811    if (setupCompilerCallback != nullptr) {
812        setupCompilerCallback(mCompilerDriver);
813    }
814
815    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
816    if (!bitcodeMetadata.extract()) {
817        ALOGE("Could not extract metadata from bitcode");
818        mCtx->unlockMutex();
819        return false;
820    }
821
822    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
823
824    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
825        mCompilerDriver->setDebugContext(true);
826        useRSDebugContext = true;
827    }
828
829    std::string bcFileName(cacheDir);
830    bcFileName.append("/");
831    bcFileName.append(resName);
832    bcFileName.append(".bc");
833
834    std::vector<const char*> compileArguments;
835    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
836                        useRSDebugContext, bccPluginName);
837    // The last argument of compileArguments ia a nullptr, so remove 1 from the size.
838    std::string compileCommandLine =
839                bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data());
840
841    if (!is_force_recompile() && !useRSDebugContext) {
842        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
843    }
844
845    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
846    // again.
847    if (mScriptSO == nullptr) {
848        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
849                            compileArguments.data(), compileCommandLine))
850        {
851            ALOGE("bcc: FAILS to compile '%s'", resName);
852            mCtx->unlockMutex();
853            return false;
854        }
855
856        if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
857            ALOGE("Linker: Failed to link object file '%s'", resName);
858            mCtx->unlockMutex();
859            return false;
860        }
861
862        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
863        if (mScriptSO == nullptr) {
864            ALOGE("Unable to load '%s'", resName);
865            mCtx->unlockMutex();
866            return false;
867        }
868    }
869
870    // Read RS symbol information from the .so.
871    if ( !mScriptSO) {
872        goto error;
873    }
874
875    if ( !storeRSInfoFromSO()) {
876      goto error;
877    }
878#else  // RS_COMPATIBILITY_LIB is defined
879
880    mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
881
882    if (!mScriptSO) {
883        goto error;
884    }
885
886    if (!storeRSInfoFromSO()) {
887        goto error;
888    }
889#endif
890    mCtx->unlockMutex();
891    return true;
892
893error:
894
895    mCtx->unlockMutex();
896    if (mScriptSO) {
897        dlclose(mScriptSO);
898    }
899    return false;
900}
901
902#ifndef RS_COMPATIBILITY_LIB
903
904const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
905                                          size_t bitcodeSize) {
906    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
907
908    // If we're debugging, use the debug library.
909    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
910        return SYSLIBPATH"/libclcore_debug.bc";
911    }
912
913    // If a callback has been registered to specify a library, use that.
914    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
915    if (selectRTCallback != nullptr) {
916        return selectRTCallback((const char*)bitcode, bitcodeSize);
917    }
918
919    // Check for a platform specific library
920#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
921    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
922    if (prec == bcinfo::RS_FP_Relaxed) {
923        // NEON-capable ARMv7a devices can use an accelerated math library
924        // for all reduced precision scripts.
925        // ARMv8 does not use NEON, as ASIMD can be used with all precision
926        // levels.
927        return SYSLIBPATH"/libclcore_neon.bc";
928    } else {
929        return defaultLib;
930    }
931#elif defined(__i386__) || defined(__x86_64__)
932    // x86 devices will use an optimized library.
933    return SYSLIBPATH"/libclcore_x86.bc";
934#else
935    return defaultLib;
936#endif
937}
938
939#endif
940
941void RsdCpuScriptImpl::populateScript(Script *script) {
942    // Copy info over to runtime
943    script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
944    script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
945    script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
946    script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
947    script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
948
949    // Bug, need to stash in metadata
950    if (mRootExpand) {
951        script->mHal.info.root = mRootExpand;
952    } else {
953        script->mHal.info.root = mRoot;
954    }
955}
956
957
958typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
959
960bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
961                                        uint32_t inLen,
962                                        Allocation * aout,
963                                        const void * usr, uint32_t usrLen,
964                                        const RsScriptCall *sc,
965                                        MTLaunchStruct *mtls) {
966
967    memset(mtls, 0, sizeof(MTLaunchStruct));
968
969    for (int index = inLen; --index >= 0;) {
970        const Allocation* ain = ains[index];
971
972        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
973        if (ain != nullptr &&
974            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
975
976            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
977                                         "rsForEach called with null in allocations");
978            return false;
979        }
980    }
981
982    if (aout &&
983        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
984
985        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
986                                     "rsForEach called with null out allocations");
987        return false;
988    }
989
990    if (inLen > 0) {
991        const Allocation *ain0   = ains[0];
992        const Type       *inType = ain0->getType();
993
994        mtls->fep.dim.x = inType->getDimX();
995        mtls->fep.dim.y = inType->getDimY();
996        mtls->fep.dim.z = inType->getDimZ();
997
998        for (int Index = inLen; --Index >= 1;) {
999            if (!ain0->hasSameDims(ains[Index])) {
1000                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1001                  "Failed to launch kernel; dimensions of input and output"
1002                  "allocations do not match.");
1003
1004                return false;
1005            }
1006        }
1007
1008    } else if (aout != nullptr) {
1009        const Type *outType = aout->getType();
1010
1011        mtls->fep.dim.x = outType->getDimX();
1012        mtls->fep.dim.y = outType->getDimY();
1013        mtls->fep.dim.z = outType->getDimZ();
1014
1015    } else {
1016        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1017                                     "rsForEach called with null allocations");
1018        return false;
1019    }
1020
1021    if (inLen > 0 && aout != nullptr) {
1022        if (!ains[0]->hasSameDims(aout)) {
1023            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1024              "Failed to launch kernel; dimensions of input and output allocations do not match.");
1025
1026            return false;
1027        }
1028    }
1029
1030    if (!sc || (sc->xEnd == 0)) {
1031        mtls->end.x = mtls->fep.dim.x;
1032    } else {
1033        mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
1034        mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
1035        if (mtls->start.x >= mtls->end.x) return false;
1036    }
1037
1038    if (!sc || (sc->yEnd == 0)) {
1039        mtls->end.y = mtls->fep.dim.y;
1040    } else {
1041        mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
1042        mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
1043        if (mtls->start.y >= mtls->end.y) return false;
1044    }
1045
1046    if (!sc || (sc->zEnd == 0)) {
1047        mtls->end.z = mtls->fep.dim.z;
1048    } else {
1049        mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
1050        mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
1051        if (mtls->start.z >= mtls->end.z) return false;
1052    }
1053
1054    if (!sc || (sc->arrayEnd == 0)) {
1055        mtls->end.array[0] = mtls->fep.dim.array[0];
1056    } else {
1057        mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
1058        mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
1059        if (mtls->start.array[0] >= mtls->end.array[0]) return false;
1060    }
1061
1062    if (!sc || (sc->array2End == 0)) {
1063        mtls->end.array[1] = mtls->fep.dim.array[1];
1064    } else {
1065        mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
1066        mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
1067        if (mtls->start.array[1] >= mtls->end.array[1]) return false;
1068    }
1069
1070    if (!sc || (sc->array3End == 0)) {
1071        mtls->end.array[2] = mtls->fep.dim.array[2];
1072    } else {
1073        mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
1074        mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
1075        if (mtls->start.array[2] >= mtls->end.array[2]) return false;
1076    }
1077
1078    if (!sc || (sc->array4End == 0)) {
1079        mtls->end.array[3] = mtls->fep.dim.array[3];
1080    } else {
1081        mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
1082        mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
1083        if (mtls->start.array[3] >= mtls->end.array[3]) return false;
1084    }
1085
1086
1087    // The X & Y walkers always want 0-1 min even if dim is not present
1088    mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
1089    mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
1090
1091    mtls->rsc        = mCtx;
1092    if (ains) {
1093        memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
1094    }
1095    mtls->aout[0]    = aout;
1096    mtls->fep.usr    = usr;
1097    mtls->fep.usrLen = usrLen;
1098    mtls->mSliceSize = 1;
1099    mtls->mSliceNum  = 0;
1100
1101    mtls->isThreadable  = mIsThreadable;
1102
1103    if (inLen > 0) {
1104        mtls->fep.inLen = inLen;
1105        for (int index = inLen; --index >= 0;) {
1106            mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
1107            mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
1108        }
1109    }
1110
1111    if (aout != nullptr) {
1112        mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
1113        mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
1114    }
1115
1116    // All validation passed, ok to launch threads
1117    return true;
1118}
1119
1120
1121void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
1122                                     const Allocation ** ains,
1123                                     uint32_t inLen,
1124                                     Allocation * aout,
1125                                     const void * usr,
1126                                     uint32_t usrLen,
1127                                     const RsScriptCall *sc) {
1128
1129    MTLaunchStruct mtls;
1130
1131    if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
1132        forEachKernelSetup(slot, &mtls);
1133
1134        RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1135        mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
1136        mCtx->setTLS(oldTLS);
1137    }
1138}
1139
1140void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
1141    mtls->script = this;
1142    mtls->fep.slot = slot;
1143    mtls->kernel = mScriptExec->getForEachFunction(slot);
1144    rsAssert(mtls->kernel != nullptr);
1145    mtls->sig = mScriptExec->getForEachSignature(slot);
1146}
1147
1148int RsdCpuScriptImpl::invokeRoot() {
1149    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1150    int ret = mRoot();
1151    mCtx->setTLS(oldTLS);
1152    return ret;
1153}
1154
1155void RsdCpuScriptImpl::invokeInit() {
1156    if (mInit) {
1157        mInit();
1158    }
1159}
1160
1161void RsdCpuScriptImpl::invokeFreeChildren() {
1162    if (mFreeChildren) {
1163        mFreeChildren();
1164    }
1165}
1166
1167void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
1168                                      size_t paramLength) {
1169    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
1170    void * ap = nullptr;
1171
1172#if defined(__x86_64__)
1173    // The invoked function could have input parameter of vector type for example float4 which
1174    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
1175    // So try to align void* params before passing them into RS exported function.
1176
1177    if ((uint8_t)(uint64_t)params & 0x0F) {
1178        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
1179            memcpy(ap, params, paramLength);
1180        } else {
1181            ALOGE("x86_64: invokeFunction memalign error, still use params which"
1182                  " is not 16 bytes aligned.");
1183        }
1184    }
1185#endif
1186
1187    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1188    reinterpret_cast<void (*)(const void *, uint32_t)>(
1189        mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
1190
1191    mCtx->setTLS(oldTLS);
1192}
1193
1194void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
1195    //rsAssert(!script->mFieldIsObject[slot]);
1196    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
1197
1198    //if (mIntrinsicID) {
1199        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
1200        //return;
1201    //}
1202
1203    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1204    if (!destPtr) {
1205        //ALOGV("Calling setVar on slot = %i which is null", slot);
1206        return;
1207    }
1208
1209    memcpy(destPtr, data, dataLength);
1210}
1211
1212void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
1213    //rsAssert(!script->mFieldIsObject[slot]);
1214    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
1215
1216    int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1217    if (!srcPtr) {
1218        //ALOGV("Calling setVar on slot = %i which is null", slot);
1219        return;
1220    }
1221    memcpy(data, srcPtr, dataLength);
1222}
1223
1224
1225void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
1226                                                const Element *elem,
1227                                                const uint32_t *dims, size_t dimLength) {
1228    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1229    if (!destPtr) {
1230        //ALOGV("Calling setVar on slot = %i which is null", slot);
1231        return;
1232    }
1233
1234    // We want to look at dimension in terms of integer components,
1235    // but dimLength is given in terms of bytes.
1236    dimLength /= sizeof(int);
1237
1238    // Only a single dimension is currently supported.
1239    rsAssert(dimLength == 1);
1240    if (dimLength == 1) {
1241        // First do the increment loop.
1242        size_t stride = elem->getSizeBytes();
1243        const char *cVal = reinterpret_cast<const char *>(data);
1244        for (uint32_t i = 0; i < dims[0]; i++) {
1245            elem->incRefs(cVal);
1246            cVal += stride;
1247        }
1248
1249        // Decrement loop comes after (to prevent race conditions).
1250        char *oldVal = reinterpret_cast<char *>(destPtr);
1251        for (uint32_t i = 0; i < dims[0]; i++) {
1252            elem->decRefs(oldVal);
1253            oldVal += stride;
1254        }
1255    }
1256
1257    memcpy(destPtr, data, dataLength);
1258}
1259
1260void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
1261
1262    //rsAssert(!script->mFieldIsObject[slot]);
1263    //ALOGE("setGlobalBind %i %p", slot, data);
1264
1265    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1266    if (!destPtr) {
1267        //ALOGV("Calling setVar on slot = %i which is null", slot);
1268        return;
1269    }
1270
1271    void *ptr = nullptr;
1272    mBoundAllocs[slot] = data;
1273    if (data) {
1274        ptr = data->mHal.drvState.lod[0].mallocPtr;
1275    }
1276    memcpy(destPtr, &ptr, sizeof(void *));
1277}
1278
1279void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
1280
1281    //rsAssert(script->mFieldIsObject[slot]);
1282    //ALOGE("setGlobalObj %i %p", slot, data);
1283
1284    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1285    if (!destPtr) {
1286        //ALOGV("Calling setVar on slot = %i which is null", slot);
1287        return;
1288    }
1289
1290    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
1291}
1292
1293RsdCpuScriptImpl::~RsdCpuScriptImpl() {
1294#ifndef RS_COMPATIBILITY_LIB
1295    if (mCompilerDriver) {
1296        delete mCompilerDriver;
1297    }
1298#endif
1299
1300    if (mScriptExec != nullptr) {
1301        delete mScriptExec;
1302    }
1303    if (mBoundAllocs) delete[] mBoundAllocs;
1304    if (mScriptSO) {
1305        dlclose(mScriptSO);
1306    }
1307}
1308
1309Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
1310    if (!ptr) {
1311        return nullptr;
1312    }
1313
1314    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
1315        Allocation *a = mBoundAllocs[ct];
1316        if (!a) continue;
1317        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
1318            return a;
1319        }
1320    }
1321    ALOGE("rsGetAllocation, failed to find %p", ptr);
1322    return nullptr;
1323}
1324
1325void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
1326                                 uint32_t inLen, Allocation * aout,
1327                                 const void * usr, uint32_t usrLen,
1328                                 const RsScriptCall *sc) {}
1329
1330void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
1331                                  uint32_t inLen, Allocation * aout,
1332                                  const void * usr, uint32_t usrLen,
1333                                  const RsScriptCall *sc) {}
1334
1335
1336}
1337}
1338