rsCpuScript.cpp revision d9bae689c1b8c3f2ed1a5f2b374dc9393584b8dd
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19
20#ifdef RS_COMPATIBILITY_LIB
21    #include <stdio.h>
22    #include <sys/stat.h>
23    #include <unistd.h>
24#else
25    #include <bcc/BCCContext.h>
26    #include <bcc/Config/Config.h>
27    #include <bcc/Renderscript/RSCompilerDriver.h>
28    #include <bcc/Renderscript/RSInfo.h>
29    #include <bcinfo/MetadataExtractor.h>
30    #include <cutils/properties.h>
31
32    #include <sys/types.h>
33    #include <sys/wait.h>
34    #include <unistd.h>
35
36    #include <string>
37    #include <vector>
38#endif
39
40#include <set>
41#include <string>
42#include <dlfcn.h>
43#include <stdlib.h>
44#include <string.h>
45#include <fstream>
46#include <iostream>
47
48#ifdef __LP64__
49#define SYSLIBPATH "/system/lib64"
50#else
51#define SYSLIBPATH "/system/lib"
52#endif
53
54namespace {
55
56// Create a len length string containing random characters from [A-Za-z0-9].
57static std::string getRandomString(size_t len) {
58    char buf[len + 1];
59    for (size_t i = 0; i < len; i++) {
60        uint32_t r = arc4random() & 0xffff;
61        r %= 62;
62        if (r < 26) {
63            // lowercase
64            buf[i] = 'a' + r;
65        } else if (r < 52) {
66            // uppercase
67            buf[i] = 'A' + (r - 26);
68        } else {
69            // Use a number
70            buf[i] = '0' + (r - 52);
71        }
72    }
73    buf[len] = '\0';
74    return std::string(buf);
75}
76
77// Check if a path exists and attempt to create it if it doesn't.
78static bool ensureCacheDirExists(const char *path) {
79    if (access(path, R_OK | W_OK | X_OK) == 0) {
80        // Done if we can rwx the directory
81        return true;
82    }
83    if (mkdir(path, 0700) == 0) {
84        return true;
85    }
86    return false;
87}
88
89// Copy the file named \p srcFile to \p dstFile.
90// Return 0 on success and -1 if anything wasn't copied.
91static int copyFile(const char *dstFile, const char *srcFile) {
92    std::ifstream srcStream(srcFile);
93    if (!srcStream) {
94        ALOGE("Could not verify or read source file: %s", srcFile);
95        return -1;
96    }
97    std::ofstream dstStream(dstFile);
98    if (!dstStream) {
99        ALOGE("Could not verify or write destination file: %s", dstFile);
100        return -1;
101    }
102    dstStream << srcStream.rdbuf();
103    if (!dstStream) {
104        ALOGE("Could not write destination file: %s", dstFile);
105        return -1;
106    }
107
108    srcStream.close();
109    dstStream.close();
110
111    return 0;
112}
113
114#define RS_CACHE_DIR "com.android.renderscript.cache"
115
116// Attempt to load the shared library from origName, but then fall back to
117// creating a copy of the shared library if necessary (to ensure instancing).
118// This function returns the dlopen()-ed handle if successful.
119static void *loadSOHelper(const char *origName, const char *cacheDir,
120                          const char *resName) {
121    // Keep track of which .so libraries have been loaded. Once a library is
122    // in the set (per-process granularity), we must instead make a copy of
123    // the original shared object (randomly named .so file) and load that one
124    // instead. If we don't do this, we end up aliasing global data between
125    // the various Script instances (which are supposed to be completely
126    // independent).
127    static std::set<std::string> LoadedLibraries;
128
129    void *loaded = nullptr;
130
131    // Skip everything if we don't even have the original library available.
132    if (access(origName, F_OK) != 0) {
133        return nullptr;
134    }
135
136    // Common path is that we have not loaded this Script/library before.
137    if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
138        loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
139        if (loaded) {
140            LoadedLibraries.insert(origName);
141        }
142        return loaded;
143    }
144
145    std::string newName(cacheDir);
146
147    // Append RS_CACHE_DIR only if it is not found in cacheDir
148    // In driver mode, RS_CACHE_DIR is already appended to cacheDir.
149    if (newName.find(RS_CACHE_DIR) == std::string::npos) {
150        newName.append("/" RS_CACHE_DIR "/");
151    }
152
153    if (!ensureCacheDirExists(newName.c_str())) {
154        ALOGE("Could not verify or create cache dir: %s", cacheDir);
155        return nullptr;
156    }
157
158    // Construct an appropriately randomized filename for the copy.
159    newName.append("librs.");
160    newName.append(resName);
161    newName.append("#");
162    newName.append(getRandomString(6));  // 62^6 potential filename variants.
163    newName.append(".so");
164
165    int r = copyFile(newName.c_str(), origName);
166    if (r != 0) {
167        ALOGE("Could not create copy %s -> %s", origName, newName.c_str());
168        return nullptr;
169    }
170    loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
171    r = unlink(newName.c_str());
172    if (r != 0) {
173        ALOGE("Could not unlink copy %s", newName.c_str());
174    }
175    if (loaded) {
176        LoadedLibraries.insert(newName.c_str());
177    }
178
179    return loaded;
180}
181
182static std::string findSharedObjectName(const char *cacheDir,
183                                        const char *resName) {
184
185#ifndef RS_SERVER
186    std::string scriptSOName(cacheDir);
187#ifdef RS_COMPATIBILITY_LIB
188    size_t cutPos = scriptSOName.rfind("cache");
189    if (cutPos != std::string::npos) {
190        scriptSOName.erase(cutPos);
191    } else {
192        ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
193    }
194    scriptSOName.append("/lib/librs.");
195#else
196    scriptSOName.append("/librs.");
197#endif
198
199#else
200    std::string scriptSOName("lib");
201#endif
202    scriptSOName.append(resName);
203    scriptSOName.append(".so");
204
205    return scriptSOName;
206}
207
208// Load the shared library referred to by cacheDir and resName. If we have
209// already loaded this library, we instead create a new copy (in the
210// cache dir) and then load that. We then immediately destroy the copy.
211// This is required behavior to implement script instancing for the support
212// library, since shared objects are loaded and de-duped by name only.
213static void *loadSharedLibrary(const char *cacheDir, const char *resName) {
214    void *loaded = nullptr;
215
216    std::string scriptSOName = findSharedObjectName(cacheDir, resName);
217
218    // We should check if we can load the library from the standard app
219    // location for shared libraries first.
220    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
221
222    if (loaded == nullptr) {
223        ALOGE("Unable to open shared library (%s): %s",
224              scriptSOName.c_str(), dlerror());
225
226#ifdef RS_COMPATIBILITY_LIB
227        // One final attempt to find the library in "/system/lib".
228        // We do this to allow bundled applications to use the compatibility
229        // library fallback path. Those applications don't have a private
230        // library path, so they need to install to the system directly.
231        // Note that this is really just a testing path.
232        std::string scriptSONameSystem("/system/lib/librs.");
233        scriptSONameSystem.append(resName);
234        scriptSONameSystem.append(".so");
235        loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
236                              resName);
237        if (loaded == nullptr) {
238            ALOGE("Unable to open system shared library (%s): %s",
239                  scriptSONameSystem.c_str(), dlerror());
240        }
241#endif
242    }
243
244    return loaded;
245}
246
247#ifndef RS_COMPATIBILITY_LIB
248
249static bool is_force_recompile() {
250#ifdef RS_SERVER
251  return false;
252#else
253  char buf[PROPERTY_VALUE_MAX];
254
255  // Re-compile if floating point precision has been overridden.
256  property_get("debug.rs.precision", buf, "");
257  if (buf[0] != '\0') {
258    return true;
259  }
260
261  // Re-compile if debug.rs.forcerecompile is set.
262  property_get("debug.rs.forcerecompile", buf, "0");
263  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
264    return true;
265  } else {
266    return false;
267  }
268#endif  // RS_SERVER
269}
270
271const static char *BCC_EXE_PATH = "/system/bin/bcc";
272
273static void setCompileArguments(std::vector<const char*>* args,
274                                const std::string& bcFileName,
275                                const char* cacheDir, const char* resName,
276                                const char* core_lib, bool useRSDebugContext,
277                                const char* bccPluginName) {
278    rsAssert(cacheDir && resName && core_lib);
279    args->push_back(BCC_EXE_PATH);
280    args->push_back("-unroll-runtime");
281    args->push_back("-scalarize-load-store");
282    args->push_back("-o");
283    args->push_back(resName);
284    args->push_back("-output_path");
285    args->push_back(cacheDir);
286    args->push_back("-bclib");
287    args->push_back(core_lib);
288    args->push_back("-mtriple");
289    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
290
291    // Enable workaround for A53 codegen by default.
292#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
293    args->push_back("-aarch64-fix-cortex-a53-835769");
294#endif
295
296    // Execute the bcc compiler.
297    if (useRSDebugContext) {
298        args->push_back("-rs-debug-ctx");
299    } else {
300        // Only load additional libraries for compiles that don't use
301        // the debug context.
302        if (bccPluginName && strlen(bccPluginName) > 0) {
303            args->push_back("-load");
304            args->push_back(bccPluginName);
305        }
306    }
307
308    args->push_back("-fPIC");
309    args->push_back("-embedRSInfo");
310
311    args->push_back(bcFileName.c_str());
312    args->push_back(nullptr);
313}
314
315static bool compileBitcode(const std::string &bcFileName,
316                           const char *bitcode,
317                           size_t bitcodeSize,
318                           const char **compileArguments,
319                           const std::string &compileCommandLine) {
320    rsAssert(bitcode && bitcodeSize);
321
322    FILE *bcfile = fopen(bcFileName.c_str(), "w");
323    if (!bcfile) {
324        ALOGE("Could not write to %s", bcFileName.c_str());
325        return false;
326    }
327    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
328    fclose(bcfile);
329    if (nwritten != bitcodeSize) {
330        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
331              bcFileName.c_str());
332        return false;
333    }
334
335    pid_t pid = fork();
336
337    switch (pid) {
338    case -1: {  // Error occurred (we attempt no recovery)
339        ALOGE("Couldn't fork for bcc compiler execution");
340        return false;
341    }
342    case 0: {  // Child process
343        ALOGV("Invoking BCC with: %s", compileCommandLine.c_str());
344        execv(BCC_EXE_PATH, (char* const*)compileArguments);
345
346        ALOGE("execv() failed: %s", strerror(errno));
347        abort();
348        return false;
349    }
350    default: {  // Parent process (actual driver)
351        // Wait on child process to finish compiling the source.
352        int status = 0;
353        pid_t w = waitpid(pid, &status, 0);
354        if (w == -1) {
355            ALOGE("Could not wait for bcc compiler");
356            return false;
357        }
358
359        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
360            return true;
361        }
362
363        ALOGE("bcc compiler terminated unexpectedly");
364        return false;
365    }
366    }
367}
368
369const static char *LD_EXE_PATH = "/system/bin/ld.mc";
370
371static bool createSharedLib(const char *cacheDir, const char *resName) {
372    std::string sharedLibName = findSharedObjectName(cacheDir, resName);
373    std::string objFileName = cacheDir;
374    objFileName.append("/");
375    objFileName.append(resName);
376    objFileName.append(".o");
377
378    const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so";
379    std::vector<const char *> args = {
380        LD_EXE_PATH,
381        "-shared",
382        "-nostdlib",
383        compiler_rt,
384        "-mtriple", DEFAULT_TARGET_TRIPLE_STRING,
385        "-L", SYSLIBPATH,
386        "-lRSDriver", "-lm", "-lc",
387        objFileName.c_str(),
388        "-o", sharedLibName.c_str(),
389        nullptr
390    };
391
392    std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data());
393
394    pid_t pid = fork();
395
396    switch (pid) {
397    case -1: {  // Error occurred (we attempt no recovery)
398        ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH);
399        return false;
400    }
401    case 0: {  // Child process
402        ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str());
403        execv(LD_EXE_PATH, (char* const*) args.data());
404
405        ALOGE("execv() failed: %s", strerror(errno));
406        abort();
407        return false;
408    }
409    default: {  // Parent process (actual driver)
410        // Wait on child process to finish compiling the source.
411        int status = 0;
412        pid_t w = waitpid(pid, &status, 0);
413        if (w == -1) {
414            ALOGE("Could not wait for linker (%s)", LD_EXE_PATH);
415            return false;
416        }
417
418        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
419            return true;
420        }
421
422        ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH);
423        return false;
424    }
425    }
426}
427#endif  // !defined(RS_COMPATIBILITY_LIB)
428}  // namespace
429
430namespace android {
431namespace renderscript {
432
433#define MAXLINE 500
434#define MAKE_STR_HELPER(S) #S
435#define MAKE_STR(S) MAKE_STR_HELPER(S)
436#define EXPORT_VAR_STR "exportVarCount: "
437#define EXPORT_FUNC_STR "exportFuncCount: "
438#define EXPORT_FOREACH_STR "exportForEachCount: "
439#define OBJECT_SLOT_STR "objectSlotCount: "
440
441// Copy up to a newline or size chars from str -> s, updating str
442// Returns s when successful and nullptr when '\0' is finally reached.
443static char* strgets(char *s, int size, const char **ppstr) {
444    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
445        return nullptr;
446    }
447
448    int i;
449    for (i = 0; i < (size - 1); i++) {
450        s[i] = **ppstr;
451        (*ppstr)++;
452        if (s[i] == '\0') {
453            return s;
454        } else if (s[i] == '\n') {
455            s[i+1] = '\0';
456            return s;
457        }
458    }
459
460    // size has been exceeded.
461    s[i] = '\0';
462
463    return s;
464}
465
466RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
467    mCtx = ctx;
468    mScript = s;
469
470    mScriptSO = nullptr;
471
472#ifndef RS_COMPATIBILITY_LIB
473    mCompilerDriver = nullptr;
474#endif
475
476
477    mRoot = nullptr;
478    mRootExpand = nullptr;
479    mInit = nullptr;
480    mFreeChildren = nullptr;
481    mScriptExec = nullptr;
482
483    mBoundAllocs = nullptr;
484    mIntrinsicData = nullptr;
485    mIsThreadable = true;
486}
487
488bool RsdCpuScriptImpl::storeRSInfoFromSO() {
489    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
490    if (mRoot) {
491        //ALOGE("Found root(): %p", mRoot);
492    }
493    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
494    if (mRootExpand) {
495        //ALOGE("Found root.expand(): %p", mRootExpand);
496    }
497    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
498    if (mInit) {
499        //ALOGE("Found init(): %p", mInit);
500    }
501    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
502    if (mFreeChildren) {
503        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
504    }
505
506    mScriptExec = ScriptExecutable::createFromSharedObject(
507            mCtx->getContext(), mScriptSO);
508
509    if (mScriptExec == nullptr) {
510        return false;
511    }
512
513    size_t varCount = mScriptExec->getExportedVariableCount();
514    if (varCount > 0) {
515        mBoundAllocs = new Allocation *[varCount];
516        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
517    }
518
519    return true;
520}
521
522ScriptExecutable* ScriptExecutable::createFromSharedObject(
523    Context* RSContext, void* sharedObj) {
524    char line[MAXLINE];
525
526    size_t varCount = 0;
527    size_t funcCount = 0;
528    size_t forEachCount = 0;
529    size_t objectSlotCount = 0;
530
531    const char *rsInfo = (const char *) dlsym(sharedObj, ".rs.info");
532
533    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
534        return nullptr;
535    }
536    if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
537        ALOGE("Invalid export var count!: %s", line);
538        return nullptr;
539    }
540
541    std::vector<void*> fieldAddress;
542
543    for (size_t i = 0; i < varCount; ++i) {
544        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
545            return nullptr;
546        }
547        char *c = strrchr(line, '\n');
548        if (c) {
549            *c = '\0';
550        }
551        void* addr = dlsym(sharedObj, line);
552        if (addr == nullptr) {
553            ALOGE("Failed to find variable address for %s: %s",
554                  line, dlerror());
555            // Not a critical error if we don't find a global variable.
556        }
557        fieldAddress.push_back(addr);
558    }
559
560    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
561        return nullptr;
562    }
563    if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
564        ALOGE("Invalid export func count!: %s", line);
565        return nullptr;
566    }
567
568    std::vector<InvokeFunc_t> invokeFunctions(funcCount);
569
570    for (size_t i = 0; i < funcCount; ++i) {
571        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
572            return nullptr ;
573        }
574        char *c = strrchr(line, '\n');
575        if (c) {
576            *c = '\0';
577        }
578
579        invokeFunctions[i] = (InvokeFunc_t) dlsym(sharedObj, line);
580        if (invokeFunctions[i] == nullptr) {
581            ALOGE("Failed to get function address for %s(): %s",
582                  line, dlerror());
583            return nullptr;
584        }
585    }
586
587    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
588        return nullptr;
589    }
590    if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
591        ALOGE("Invalid export forEach count!: %s", line);
592        return nullptr;
593    }
594
595    std::vector<ForEachFunc_t> forEachFunctions(forEachCount);
596    std::vector<uint32_t> forEachSignatures(forEachCount);
597
598    for (size_t i = 0; i < forEachCount; ++i) {
599        unsigned int tmpSig = 0;
600        char tmpName[MAXLINE];
601
602        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
603            return nullptr;
604        }
605        if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
606                   &tmpSig, tmpName) != 2) {
607          ALOGE("Invalid export forEach!: %s", line);
608          return nullptr;
609        }
610
611        // Lookup the expanded ForEach kernel.
612        strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
613        forEachSignatures[i] = tmpSig;
614        forEachFunctions[i] =
615            (ForEachFunc_t) dlsym(sharedObj, tmpName);
616        if (i != 0 && forEachFunctions[i] == nullptr) {
617            // Ignore missing root.expand functions.
618            // root() is always specified at location 0.
619            ALOGE("Failed to find forEach function address for %s: %s",
620                  tmpName, dlerror());
621            return nullptr;
622        }
623    }
624
625    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
626        return nullptr;
627    }
628    if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
629        ALOGE("Invalid object slot count!: %s", line);
630        return nullptr;
631    }
632
633    std::vector<bool> fieldIsObject(varCount, false);
634
635    rsAssert(varCount > 0);
636    for (size_t i = 0; i < objectSlotCount; ++i) {
637        uint32_t varNum = 0;
638        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
639            return nullptr;
640        }
641        if (sscanf(line, "%u", &varNum) != 1) {
642            ALOGE("Invalid object slot!: %s", line);
643            return nullptr;
644        }
645
646        if (varNum < varCount) {
647            fieldIsObject[varNum] = true;
648        }
649    }
650
651    return new ScriptExecutable(
652        RSContext, fieldAddress, fieldIsObject, invokeFunctions,
653        forEachFunctions, forEachSignatures);
654}
655
656bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
657                            uint8_t const *bitcode, size_t bitcodeSize,
658                            uint32_t flags, char const *bccPluginName) {
659    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
660    //ALOGE("rsdScriptInit %p %p", rsc, script);
661
662    mCtx->lockMutex();
663#ifndef RS_COMPATIBILITY_LIB
664    bool useRSDebugContext = false;
665
666    mCompilerDriver = nullptr;
667
668    mCompilerDriver = new bcc::RSCompilerDriver();
669    if (mCompilerDriver == nullptr) {
670        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
671        mCtx->unlockMutex();
672        return false;
673    }
674
675    // Run any compiler setup functions we have been provided with.
676    RSSetupCompilerCallback setupCompilerCallback =
677            mCtx->getSetupCompilerCallback();
678    if (setupCompilerCallback != nullptr) {
679        setupCompilerCallback(mCompilerDriver);
680    }
681
682    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
683    if (!bitcodeMetadata.extract()) {
684        ALOGE("Could not extract metadata from bitcode");
685        mCtx->unlockMutex();
686        return false;
687    }
688
689    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
690
691    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
692        mCompilerDriver->setDebugContext(true);
693        useRSDebugContext = true;
694    }
695
696    std::string bcFileName(cacheDir);
697    bcFileName.append("/");
698    bcFileName.append(resName);
699    bcFileName.append(".bc");
700
701    std::vector<const char*> compileArguments;
702    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
703                        useRSDebugContext, bccPluginName);
704    // The last argument of compileArguments ia a nullptr, so remove 1 from the size.
705    std::string compileCommandLine =
706                bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data());
707
708    if (!is_force_recompile()) {
709        mScriptSO = loadSharedLibrary(cacheDir, resName);
710    }
711
712    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
713    // again.
714    if (mScriptSO == nullptr) {
715        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
716                            compileArguments.data(), compileCommandLine))
717        {
718            ALOGE("bcc: FAILS to compile '%s'", resName);
719            mCtx->unlockMutex();
720            return false;
721        }
722
723        if (!createSharedLib(cacheDir, resName)) {
724            ALOGE("Linker: Failed to link object file '%s'", resName);
725            mCtx->unlockMutex();
726            return false;
727        }
728
729        mScriptSO = loadSharedLibrary(cacheDir, resName);
730        if (mScriptSO == nullptr) {
731            ALOGE("Unable to load '%s'", resName);
732            mCtx->unlockMutex();
733            return false;
734        }
735    }
736
737    // Read RS symbol information from the .so.
738    if ( !mScriptSO) {
739        goto error;
740    }
741
742    if ( !storeRSInfoFromSO()) {
743      goto error;
744    }
745#else  // RS_COMPATIBILITY_LIB is defined
746
747    mScriptSO = loadSharedLibrary(cacheDir, resName);
748
749    if (!mScriptSO) {
750        goto error;
751    }
752
753    if (!storeRSInfoFromSO()) {
754        goto error;
755    }
756#endif
757    mCtx->unlockMutex();
758    return true;
759
760error:
761
762    mCtx->unlockMutex();
763    if (mScriptSO) {
764        dlclose(mScriptSO);
765    }
766    return false;
767}
768
769#ifndef RS_COMPATIBILITY_LIB
770
771const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
772                                          size_t bitcodeSize) {
773    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
774
775    // If we're debugging, use the debug library.
776    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
777        return SYSLIBPATH"/libclcore_debug.bc";
778    }
779
780    // If a callback has been registered to specify a library, use that.
781    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
782    if (selectRTCallback != nullptr) {
783        return selectRTCallback((const char*)bitcode, bitcodeSize);
784    }
785
786    // Check for a platform specific library
787#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
788    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
789    if (prec == bcinfo::RS_FP_Relaxed) {
790        // NEON-capable ARMv7a devices can use an accelerated math library
791        // for all reduced precision scripts.
792        // ARMv8 does not use NEON, as ASIMD can be used with all precision
793        // levels.
794        return SYSLIBPATH"/libclcore_neon.bc";
795    } else {
796        return defaultLib;
797    }
798#elif defined(__i386__) || defined(__x86_64__)
799    // x86 devices will use an optimized library.
800    return SYSLIBPATH"/libclcore_x86.bc";
801#else
802    return defaultLib;
803#endif
804}
805
806#endif
807
808void RsdCpuScriptImpl::populateScript(Script *script) {
809    // Copy info over to runtime
810    script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
811    script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
812    script->mHal.info.exportedPragmaCount = 0;
813    script->mHal.info.exportedPragmaKeyList = 0;
814    script->mHal.info.exportedPragmaValueList = 0;
815
816    // Bug, need to stash in metadata
817    if (mRootExpand) {
818        script->mHal.info.root = mRootExpand;
819    } else {
820        script->mHal.info.root = mRoot;
821    }
822}
823
824
825typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
826
827void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
828                                        uint32_t inLen,
829                                        Allocation * aout,
830                                        const void * usr, uint32_t usrLen,
831                                        const RsScriptCall *sc,
832                                        MTLaunchStruct *mtls) {
833
834    memset(mtls, 0, sizeof(MTLaunchStruct));
835
836    for (int index = inLen; --index >= 0;) {
837        const Allocation* ain = ains[index];
838
839        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
840        if (ain != nullptr &&
841            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
842
843            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
844                                         "rsForEach called with null in allocations");
845            return;
846        }
847    }
848
849    if (aout &&
850        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
851
852        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
853                                     "rsForEach called with null out allocations");
854        return;
855    }
856
857    if (inLen > 0) {
858        const Allocation *ain0   = ains[0];
859        const Type       *inType = ain0->getType();
860
861        mtls->fep.dim.x = inType->getDimX();
862        mtls->fep.dim.y = inType->getDimY();
863        mtls->fep.dim.z = inType->getDimZ();
864
865        for (int Index = inLen; --Index >= 1;) {
866            if (!ain0->hasSameDims(ains[Index])) {
867                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
868                  "Failed to launch kernel; dimensions of input and output allocations do not match.");
869
870                return;
871            }
872        }
873
874    } else if (aout != nullptr) {
875        const Type *outType = aout->getType();
876
877        mtls->fep.dim.x = outType->getDimX();
878        mtls->fep.dim.y = outType->getDimY();
879        mtls->fep.dim.z = outType->getDimZ();
880
881    } else {
882        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
883                                     "rsForEach called with null allocations");
884        return;
885    }
886
887    if (inLen > 0 && aout != nullptr) {
888        if (!ains[0]->hasSameDims(aout)) {
889            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
890              "Failed to launch kernel; dimensions of input and output allocations do not match.");
891
892            return;
893        }
894    }
895
896    if (!sc || (sc->xEnd == 0)) {
897        mtls->xEnd = mtls->fep.dim.x;
898    } else {
899        rsAssert(sc->xStart < mtls->fep.dim.x);
900        rsAssert(sc->xEnd <= mtls->fep.dim.x);
901        rsAssert(sc->xStart < sc->xEnd);
902        mtls->xStart = rsMin(mtls->fep.dim.x, sc->xStart);
903        mtls->xEnd = rsMin(mtls->fep.dim.x, sc->xEnd);
904        if (mtls->xStart >= mtls->xEnd) return;
905    }
906
907    if (!sc || (sc->yEnd == 0)) {
908        mtls->yEnd = mtls->fep.dim.y;
909    } else {
910        rsAssert(sc->yStart < mtls->fep.dim.y);
911        rsAssert(sc->yEnd <= mtls->fep.dim.y);
912        rsAssert(sc->yStart < sc->yEnd);
913        mtls->yStart = rsMin(mtls->fep.dim.y, sc->yStart);
914        mtls->yEnd = rsMin(mtls->fep.dim.y, sc->yEnd);
915        if (mtls->yStart >= mtls->yEnd) return;
916    }
917
918    if (!sc || (sc->zEnd == 0)) {
919        mtls->zEnd = mtls->fep.dim.z;
920    } else {
921        rsAssert(sc->zStart < mtls->fep.dim.z);
922        rsAssert(sc->zEnd <= mtls->fep.dim.z);
923        rsAssert(sc->zStart < sc->zEnd);
924        mtls->zStart = rsMin(mtls->fep.dim.z, sc->zStart);
925        mtls->zEnd = rsMin(mtls->fep.dim.z, sc->zEnd);
926        if (mtls->zStart >= mtls->zEnd) return;
927    }
928
929    mtls->xEnd     = rsMax((uint32_t)1, mtls->xEnd);
930    mtls->yEnd     = rsMax((uint32_t)1, mtls->yEnd);
931    mtls->zEnd     = rsMax((uint32_t)1, mtls->zEnd);
932    mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
933
934    rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0));
935
936    mtls->rsc        = mCtx;
937    if (ains) {
938        memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
939    }
940    mtls->aout[0]    = aout;
941    mtls->fep.usr    = usr;
942    mtls->fep.usrLen = usrLen;
943    mtls->mSliceSize = 1;
944    mtls->mSliceNum  = 0;
945
946    mtls->isThreadable  = mIsThreadable;
947
948    if (inLen > 0) {
949        mtls->fep.inLen = inLen;
950        for (int index = inLen; --index >= 0;) {
951            mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
952            mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
953        }
954    }
955
956    if (aout != nullptr) {
957        mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
958        mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
959    }
960}
961
962
963void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
964                                     const Allocation ** ains,
965                                     uint32_t inLen,
966                                     Allocation * aout,
967                                     const void * usr,
968                                     uint32_t usrLen,
969                                     const RsScriptCall *sc) {
970
971    MTLaunchStruct mtls;
972
973    forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
974    forEachKernelSetup(slot, &mtls);
975
976    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
977    mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
978    mCtx->setTLS(oldTLS);
979}
980
981void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
982    mtls->script = this;
983    mtls->fep.slot = slot;
984    mtls->kernel = mScriptExec->getForEachFunction(slot);
985    rsAssert(mtls->kernel != nullptr);
986    mtls->sig = mScriptExec->getForEachSignature(slot);
987}
988
989int RsdCpuScriptImpl::invokeRoot() {
990    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
991    int ret = mRoot();
992    mCtx->setTLS(oldTLS);
993    return ret;
994}
995
996void RsdCpuScriptImpl::invokeInit() {
997    if (mInit) {
998        mInit();
999    }
1000}
1001
1002void RsdCpuScriptImpl::invokeFreeChildren() {
1003    if (mFreeChildren) {
1004        mFreeChildren();
1005    }
1006}
1007
1008void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
1009                                      size_t paramLength) {
1010    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
1011    void * ap = nullptr;
1012
1013#if defined(__x86_64__)
1014    // The invoked function could have input parameter of vector type for example float4 which
1015    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
1016    // So try to align void* params before passing them into RS exported function.
1017
1018    if ((uint8_t)(uint64_t)params & 0x0F) {
1019        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
1020            memcpy(ap, params, paramLength);
1021        } else {
1022            ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned.");
1023        }
1024    }
1025#endif
1026
1027    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1028    reinterpret_cast<void (*)(const void *, uint32_t)>(
1029        mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
1030
1031    mCtx->setTLS(oldTLS);
1032}
1033
1034void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
1035    //rsAssert(!script->mFieldIsObject[slot]);
1036    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
1037
1038    //if (mIntrinsicID) {
1039        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
1040        //return;
1041    //}
1042
1043    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1044    if (!destPtr) {
1045        //ALOGV("Calling setVar on slot = %i which is null", slot);
1046        return;
1047    }
1048
1049    memcpy(destPtr, data, dataLength);
1050}
1051
1052void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
1053    //rsAssert(!script->mFieldIsObject[slot]);
1054    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
1055
1056    int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1057    if (!srcPtr) {
1058        //ALOGV("Calling setVar on slot = %i which is null", slot);
1059        return;
1060    }
1061    memcpy(data, srcPtr, dataLength);
1062}
1063
1064
1065void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
1066                                                const Element *elem,
1067                                                const uint32_t *dims, size_t dimLength) {
1068    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1069    if (!destPtr) {
1070        //ALOGV("Calling setVar on slot = %i which is null", slot);
1071        return;
1072    }
1073
1074    // We want to look at dimension in terms of integer components,
1075    // but dimLength is given in terms of bytes.
1076    dimLength /= sizeof(int);
1077
1078    // Only a single dimension is currently supported.
1079    rsAssert(dimLength == 1);
1080    if (dimLength == 1) {
1081        // First do the increment loop.
1082        size_t stride = elem->getSizeBytes();
1083        const char *cVal = reinterpret_cast<const char *>(data);
1084        for (uint32_t i = 0; i < dims[0]; i++) {
1085            elem->incRefs(cVal);
1086            cVal += stride;
1087        }
1088
1089        // Decrement loop comes after (to prevent race conditions).
1090        char *oldVal = reinterpret_cast<char *>(destPtr);
1091        for (uint32_t i = 0; i < dims[0]; i++) {
1092            elem->decRefs(oldVal);
1093            oldVal += stride;
1094        }
1095    }
1096
1097    memcpy(destPtr, data, dataLength);
1098}
1099
1100void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
1101
1102    //rsAssert(!script->mFieldIsObject[slot]);
1103    //ALOGE("setGlobalBind %i %p", slot, data);
1104
1105    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1106    if (!destPtr) {
1107        //ALOGV("Calling setVar on slot = %i which is null", slot);
1108        return;
1109    }
1110
1111    void *ptr = nullptr;
1112    mBoundAllocs[slot] = data;
1113    if (data) {
1114        ptr = data->mHal.drvState.lod[0].mallocPtr;
1115    }
1116    memcpy(destPtr, &ptr, sizeof(void *));
1117}
1118
1119void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
1120
1121    //rsAssert(script->mFieldIsObject[slot]);
1122    //ALOGE("setGlobalObj %i %p", slot, data);
1123
1124    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1125    if (!destPtr) {
1126        //ALOGV("Calling setVar on slot = %i which is null", slot);
1127        return;
1128    }
1129
1130    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
1131}
1132
1133RsdCpuScriptImpl::~RsdCpuScriptImpl() {
1134#ifndef RS_COMPATIBILITY_LIB
1135    if (mCompilerDriver) {
1136        delete mCompilerDriver;
1137    }
1138#endif
1139
1140    if (mScriptExec != nullptr) {
1141        delete mScriptExec;
1142    }
1143    if (mBoundAllocs) delete[] mBoundAllocs;
1144    if (mScriptSO) {
1145        dlclose(mScriptSO);
1146    }
1147}
1148
1149Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
1150    if (!ptr) {
1151        return nullptr;
1152    }
1153
1154    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
1155        Allocation *a = mBoundAllocs[ct];
1156        if (!a) continue;
1157        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
1158            return a;
1159        }
1160    }
1161    ALOGE("rsGetAllocation, failed to find %p", ptr);
1162    return nullptr;
1163}
1164
1165void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
1166                                 uint32_t inLen, Allocation * aout,
1167                                 const void * usr, uint32_t usrLen,
1168                                 const RsScriptCall *sc) {}
1169
1170void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
1171                                  uint32_t inLen, Allocation * aout,
1172                                  const void * usr, uint32_t usrLen,
1173                                  const RsScriptCall *sc) {}
1174
1175
1176}
1177}
1178