rsCpuScript.cpp revision eb9aa675754c49f613c6ad71d41472b30f38b007
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19
20#ifdef RS_COMPATIBILITY_LIB
21    #include <stdio.h>
22    #include <sys/stat.h>
23    #include <unistd.h>
24#else
25    #include <bcc/BCCContext.h>
26    #include <bcc/Config/Config.h>
27    #include <bcc/Renderscript/RSCompilerDriver.h>
28    #include <bcc/Renderscript/RSInfo.h>
29    #include <bcinfo/MetadataExtractor.h>
30    #include <cutils/properties.h>
31
32    #include <sys/types.h>
33    #include <sys/wait.h>
34    #include <unistd.h>
35
36    #include <string>
37    #include <vector>
38#endif
39
40#include <set>
41#include <string>
42#include <dlfcn.h>
43#include <stdlib.h>
44#include <string.h>
45#include <fstream>
46#include <iostream>
47
48#ifdef __LP64__
49#define SYSLIBPATH "/system/lib64"
50#else
51#define SYSLIBPATH "/system/lib"
52#endif
53
54namespace {
55
56// Create a len length string containing random characters from [A-Za-z0-9].
57static std::string getRandomString(size_t len) {
58    char buf[len + 1];
59    for (size_t i = 0; i < len; i++) {
60        uint32_t r = arc4random() & 0xffff;
61        r %= 62;
62        if (r < 26) {
63            // lowercase
64            buf[i] = 'a' + r;
65        } else if (r < 52) {
66            // uppercase
67            buf[i] = 'A' + (r - 26);
68        } else {
69            // Use a number
70            buf[i] = '0' + (r - 52);
71        }
72    }
73    buf[len] = '\0';
74    return std::string(buf);
75}
76
77// Check if a path exists and attempt to create it if it doesn't.
78static bool ensureCacheDirExists(const char *path) {
79    if (access(path, R_OK | W_OK | X_OK) == 0) {
80        // Done if we can rwx the directory
81        return true;
82    }
83    if (mkdir(path, 0700) == 0) {
84        return true;
85    }
86    return false;
87}
88
89// Copy the file named \p srcFile to \p dstFile.
90// Return 0 on success and -1 if anything wasn't copied.
91static int copyFile(const char *dstFile, const char *srcFile) {
92    std::ifstream srcStream(srcFile);
93    if (!srcStream) {
94        ALOGE("Could not verify or read source file: %s", srcFile);
95        return -1;
96    }
97    std::ofstream dstStream(dstFile);
98    if (!dstStream) {
99        ALOGE("Could not verify or write destination file: %s", dstFile);
100        return -1;
101    }
102    dstStream << srcStream.rdbuf();
103    if (!dstStream) {
104        ALOGE("Could not write destination file: %s", dstFile);
105        return -1;
106    }
107
108    srcStream.close();
109    dstStream.close();
110
111    return 0;
112}
113
114static std::string findSharedObjectName(const char *cacheDir,
115                                        const char *resName) {
116#ifndef RS_SERVER
117    std::string scriptSOName(cacheDir);
118#if defined(RS_COMPATIBILITY_LIB) && !defined(__LP64__)
119    size_t cutPos = scriptSOName.rfind("cache");
120    if (cutPos != std::string::npos) {
121        scriptSOName.erase(cutPos);
122    } else {
123        ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
124    }
125    scriptSOName.append("/lib/librs.");
126#else
127    scriptSOName.append("/librs.");
128#endif // RS_COMPATIBILITY_LIB
129
130#else
131    std::string scriptSOName("lib");
132#endif // RS_SERVER
133    scriptSOName.append(resName);
134    scriptSOName.append(".so");
135
136    return scriptSOName;
137}
138
139#ifndef RS_COMPATIBILITY_LIB
140
141static bool is_force_recompile() {
142#ifdef RS_SERVER
143  return false;
144#else
145  char buf[PROPERTY_VALUE_MAX];
146
147  // Re-compile if floating point precision has been overridden.
148  property_get("debug.rs.precision", buf, "");
149  if (buf[0] != '\0') {
150    return true;
151  }
152
153  // Re-compile if debug.rs.forcerecompile is set.
154  property_get("debug.rs.forcerecompile", buf, "0");
155  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
156    return true;
157  } else {
158    return false;
159  }
160#endif  // RS_SERVER
161}
162
163static void setCompileArguments(std::vector<const char*>* args,
164                                const std::string& bcFileName,
165                                const char* cacheDir, const char* resName,
166                                const char* core_lib, bool useRSDebugContext,
167                                const char* bccPluginName) {
168    rsAssert(cacheDir && resName && core_lib);
169    args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
170    args->push_back("-unroll-runtime");
171    args->push_back("-scalarize-load-store");
172    args->push_back("-o");
173    args->push_back(resName);
174    args->push_back("-output_path");
175    args->push_back(cacheDir);
176    args->push_back("-bclib");
177    args->push_back(core_lib);
178    args->push_back("-mtriple");
179    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
180
181    // Enable workaround for A53 codegen by default.
182#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
183    args->push_back("-aarch64-fix-cortex-a53-835769");
184#endif
185
186    // Execute the bcc compiler.
187    if (useRSDebugContext) {
188        args->push_back("-rs-debug-ctx");
189    } else {
190        // Only load additional libraries for compiles that don't use
191        // the debug context.
192        if (bccPluginName && strlen(bccPluginName) > 0) {
193            args->push_back("-load");
194            args->push_back(bccPluginName);
195        }
196    }
197
198    args->push_back("-fPIC");
199    args->push_back("-embedRSInfo");
200
201    args->push_back(bcFileName.c_str());
202    args->push_back(nullptr);
203}
204
205static bool compileBitcode(const std::string &bcFileName,
206                           const char *bitcode,
207                           size_t bitcodeSize,
208                           const char **compileArguments,
209                           const std::string &compileCommandLine) {
210    rsAssert(bitcode && bitcodeSize);
211
212    FILE *bcfile = fopen(bcFileName.c_str(), "w");
213    if (!bcfile) {
214        ALOGE("Could not write to %s", bcFileName.c_str());
215        return false;
216    }
217    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
218    fclose(bcfile);
219    if (nwritten != bitcodeSize) {
220        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
221              bcFileName.c_str());
222        return false;
223    }
224
225    pid_t pid = fork();
226
227    switch (pid) {
228    case -1: {  // Error occurred (we attempt no recovery)
229        ALOGE("Couldn't fork for bcc compiler execution");
230        return false;
231    }
232    case 0: {  // Child process
233        ALOGV("Invoking BCC with: %s", compileCommandLine.c_str());
234        execv(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
235              (char* const*)compileArguments);
236
237        ALOGE("execv() failed: %s", strerror(errno));
238        abort();
239        return false;
240    }
241    default: {  // Parent process (actual driver)
242        // Wait on child process to finish compiling the source.
243        int status = 0;
244        pid_t w = waitpid(pid, &status, 0);
245        if (w == -1) {
246            ALOGE("Could not wait for bcc compiler");
247            return false;
248        }
249
250        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
251            return true;
252        }
253
254        ALOGE("bcc compiler terminated unexpectedly");
255        return false;
256    }
257    }
258}
259
260#endif  // !defined(RS_COMPATIBILITY_LIB)
261}  // namespace
262
263namespace android {
264namespace renderscript {
265
266const char* SharedLibraryUtils::LD_EXE_PATH = "/system/bin/ld.mc";
267const char* SharedLibraryUtils::RS_CACHE_DIR = "com.android.renderscript.cache";
268
269#ifndef RS_COMPATIBILITY_LIB
270
271bool SharedLibraryUtils::createSharedLibrary(const char *cacheDir, const char *resName) {
272    std::string sharedLibName = findSharedObjectName(cacheDir, resName);
273    std::string objFileName = cacheDir;
274    objFileName.append("/");
275    objFileName.append(resName);
276    objFileName.append(".o");
277
278    const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so";
279    std::vector<const char *> args = {
280        LD_EXE_PATH,
281        "-shared",
282        "-nostdlib",
283        compiler_rt,
284        "-mtriple", DEFAULT_TARGET_TRIPLE_STRING,
285        "-L", SYSLIBPATH,
286        "-lRSDriver", "-lm", "-lc",
287        objFileName.c_str(),
288        "-o", sharedLibName.c_str(),
289        nullptr
290    };
291
292    std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data());
293
294    pid_t pid = fork();
295
296    switch (pid) {
297    case -1: {  // Error occurred (we attempt no recovery)
298        ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH);
299        return false;
300    }
301    case 0: {  // Child process
302        ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str());
303        execv(LD_EXE_PATH, (char* const*) args.data());
304
305        ALOGE("execv() failed: %s", strerror(errno));
306        abort();
307        return false;
308    }
309    default: {  // Parent process (actual driver)
310        // Wait on child process to finish compiling the source.
311        int status = 0;
312        pid_t w = waitpid(pid, &status, 0);
313        if (w == -1) {
314            ALOGE("Could not wait for linker (%s)", LD_EXE_PATH);
315            return false;
316        }
317
318        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
319            return true;
320        }
321
322        ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH);
323        return false;
324    }
325    }
326}
327
328#endif  // RS_COMPATIBILITY_LIB
329
330
331void* SharedLibraryUtils::loadSharedLibrary(const char *cacheDir, const char *resName, const char *nativeLibDir) {
332    void *loaded = nullptr;
333
334#if defined(RS_COMPATIBILITY_LIB) && defined(__LP64__)
335    std::string scriptSOName = findSharedObjectName(nativeLibDir, resName);
336#else
337    std::string scriptSOName = findSharedObjectName(cacheDir, resName);
338#endif
339
340    // We should check if we can load the library from the standard app
341    // location for shared libraries first.
342    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
343
344    if (loaded == nullptr) {
345        ALOGE("Unable to open shared library (%s): %s",
346              scriptSOName.c_str(), dlerror());
347
348#ifdef RS_COMPATIBILITY_LIB
349        // One final attempt to find the library in "/system/lib".
350        // We do this to allow bundled applications to use the compatibility
351        // library fallback path. Those applications don't have a private
352        // library path, so they need to install to the system directly.
353        // Note that this is really just a testing path.
354        std::string scriptSONameSystem("/system/lib/librs.");
355        scriptSONameSystem.append(resName);
356        scriptSONameSystem.append(".so");
357        loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
358                              resName);
359        if (loaded == nullptr) {
360            ALOGE("Unable to open system shared library (%s): %s",
361                  scriptSONameSystem.c_str(), dlerror());
362        }
363#endif
364    }
365
366    return loaded;
367}
368
369void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDir,
370                                       const char *resName) {
371    // Keep track of which .so libraries have been loaded. Once a library is
372    // in the set (per-process granularity), we must instead make a copy of
373    // the original shared object (randomly named .so file) and load that one
374    // instead. If we don't do this, we end up aliasing global data between
375    // the various Script instances (which are supposed to be completely
376    // independent).
377    static std::set<std::string> LoadedLibraries;
378
379    void *loaded = nullptr;
380
381    // Skip everything if we don't even have the original library available.
382    if (access(origName, F_OK) != 0) {
383        return nullptr;
384    }
385
386    // Common path is that we have not loaded this Script/library before.
387    if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
388        loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
389        if (loaded) {
390            LoadedLibraries.insert(origName);
391        }
392        return loaded;
393    }
394
395    std::string newName(cacheDir);
396
397    // Append RS_CACHE_DIR only if it is not found in cacheDir
398    // In driver mode, RS_CACHE_DIR is already appended to cacheDir.
399    if (newName.find(RS_CACHE_DIR) == std::string::npos) {
400        newName.append("/");
401        newName.append(RS_CACHE_DIR);
402        newName.append("/");
403    }
404
405    if (!ensureCacheDirExists(newName.c_str())) {
406        ALOGE("Could not verify or create cache dir: %s", cacheDir);
407        return nullptr;
408    }
409
410    // Construct an appropriately randomized filename for the copy.
411    newName.append("librs.");
412    newName.append(resName);
413    newName.append("#");
414    newName.append(getRandomString(6));  // 62^6 potential filename variants.
415    newName.append(".so");
416
417    int r = copyFile(newName.c_str(), origName);
418    if (r != 0) {
419        ALOGE("Could not create copy %s -> %s", origName, newName.c_str());
420        return nullptr;
421    }
422    loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
423    r = unlink(newName.c_str());
424    if (r != 0) {
425        ALOGE("Could not unlink copy %s", newName.c_str());
426    }
427    if (loaded) {
428        LoadedLibraries.insert(newName.c_str());
429    }
430
431    return loaded;
432}
433
434const char* RsdCpuScriptImpl::BCC_EXE_PATH = "/system/bin/bcc";
435
436#define MAXLINE 500
437#define MAKE_STR_HELPER(S) #S
438#define MAKE_STR(S) MAKE_STR_HELPER(S)
439#define EXPORT_VAR_STR "exportVarCount: "
440#define EXPORT_FUNC_STR "exportFuncCount: "
441#define EXPORT_FOREACH_STR "exportForEachCount: "
442#define OBJECT_SLOT_STR "objectSlotCount: "
443#define PRAGMA_STR "pragmaCount: "
444#define THREADABLE_STR "isThreadable: "
445
446// Copy up to a newline or size chars from str -> s, updating str
447// Returns s when successful and nullptr when '\0' is finally reached.
448static char* strgets(char *s, int size, const char **ppstr) {
449    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
450        return nullptr;
451    }
452
453    int i;
454    for (i = 0; i < (size - 1); i++) {
455        s[i] = **ppstr;
456        (*ppstr)++;
457        if (s[i] == '\0') {
458            return s;
459        } else if (s[i] == '\n') {
460            s[i+1] = '\0';
461            return s;
462        }
463    }
464
465    // size has been exceeded.
466    s[i] = '\0';
467
468    return s;
469}
470
471RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
472    mCtx = ctx;
473    mScript = s;
474
475    mScriptSO = nullptr;
476
477#ifndef RS_COMPATIBILITY_LIB
478    mCompilerDriver = nullptr;
479#endif
480
481
482    mRoot = nullptr;
483    mRootExpand = nullptr;
484    mInit = nullptr;
485    mFreeChildren = nullptr;
486    mScriptExec = nullptr;
487
488    mBoundAllocs = nullptr;
489    mIntrinsicData = nullptr;
490    mIsThreadable = true;
491}
492
493bool RsdCpuScriptImpl::storeRSInfoFromSO() {
494    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
495    if (mRoot) {
496        //ALOGE("Found root(): %p", mRoot);
497    }
498    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
499    if (mRootExpand) {
500        //ALOGE("Found root.expand(): %p", mRootExpand);
501    }
502    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
503    if (mInit) {
504        //ALOGE("Found init(): %p", mInit);
505    }
506    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
507    if (mFreeChildren) {
508        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
509    }
510
511    mScriptExec = ScriptExecutable::createFromSharedObject(
512            mCtx->getContext(), mScriptSO);
513
514    if (mScriptExec == nullptr) {
515        return false;
516    }
517
518    size_t varCount = mScriptExec->getExportedVariableCount();
519    if (varCount > 0) {
520        mBoundAllocs = new Allocation *[varCount];
521        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
522    }
523
524    mIsThreadable = mScriptExec->getThreadable();
525    //ALOGE("Script isThreadable? %d", mIsThreadable);
526
527    return true;
528}
529
530ScriptExecutable* ScriptExecutable::createFromSharedObject(
531    Context* RSContext, void* sharedObj) {
532    char line[MAXLINE];
533
534    size_t varCount = 0;
535    size_t funcCount = 0;
536    size_t forEachCount = 0;
537    size_t objectSlotCount = 0;
538    size_t pragmaCount = 0;
539    bool isThreadable = true;
540
541    void** fieldAddress = nullptr;
542    bool* fieldIsObject = nullptr;
543    InvokeFunc_t* invokeFunctions = nullptr;
544    ForEachFunc_t* forEachFunctions = nullptr;
545    uint32_t* forEachSignatures = nullptr;
546    const char ** pragmaKeys = nullptr;
547    const char ** pragmaValues = nullptr;
548
549    const char *rsInfo = (const char *) dlsym(sharedObj, ".rs.info");
550
551    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
552        return nullptr;
553    }
554    if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
555        ALOGE("Invalid export var count!: %s", line);
556        return nullptr;
557    }
558
559    fieldAddress = new void*[varCount];
560    if (fieldAddress == nullptr) {
561        return nullptr;
562    }
563
564    fieldIsObject = new bool[varCount];
565    if (fieldIsObject == nullptr) {
566        goto error;
567    }
568
569    for (size_t i = 0; i < varCount; ++i) {
570        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
571            goto error;
572        }
573        char *c = strrchr(line, '\n');
574        if (c) {
575            *c = '\0';
576        }
577        void* addr = dlsym(sharedObj, line);
578        if (addr == nullptr) {
579            ALOGE("Failed to find variable address for %s: %s",
580                  line, dlerror());
581            // Not a critical error if we don't find a global variable.
582        }
583        fieldAddress[i] = addr;
584        fieldIsObject[i] = false;
585    }
586
587    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
588        goto error;
589    }
590    if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
591        ALOGE("Invalid export func count!: %s", line);
592        goto error;
593    }
594
595    invokeFunctions = new InvokeFunc_t[funcCount];
596    if (invokeFunctions == nullptr) {
597        goto error;
598    }
599
600    for (size_t i = 0; i < funcCount; ++i) {
601        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
602            goto error;
603        }
604        char *c = strrchr(line, '\n');
605        if (c) {
606            *c = '\0';
607        }
608
609        invokeFunctions[i] = (InvokeFunc_t) dlsym(sharedObj, line);
610        if (invokeFunctions[i] == nullptr) {
611            ALOGE("Failed to get function address for %s(): %s",
612                  line, dlerror());
613            goto error;
614        }
615    }
616
617    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
618        goto error;
619    }
620    if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
621        ALOGE("Invalid export forEach count!: %s", line);
622        goto error;
623    }
624
625    forEachFunctions = new ForEachFunc_t[forEachCount];
626    if (forEachFunctions == nullptr) {
627        goto error;
628    }
629
630    forEachSignatures = new uint32_t[forEachCount];
631    if (forEachSignatures == nullptr) {
632        goto error;
633    }
634
635    for (size_t i = 0; i < forEachCount; ++i) {
636        unsigned int tmpSig = 0;
637        char tmpName[MAXLINE];
638
639        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
640            goto error;
641        }
642        if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
643                   &tmpSig, tmpName) != 2) {
644          ALOGE("Invalid export forEach!: %s", line);
645          goto error;
646        }
647
648        // Lookup the expanded ForEach kernel.
649        strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
650        forEachSignatures[i] = tmpSig;
651        forEachFunctions[i] =
652            (ForEachFunc_t) dlsym(sharedObj, tmpName);
653        if (i != 0 && forEachFunctions[i] == nullptr) {
654            // Ignore missing root.expand functions.
655            // root() is always specified at location 0.
656            ALOGE("Failed to find forEach function address for %s: %s",
657                  tmpName, dlerror());
658            goto error;
659        }
660    }
661
662    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
663        goto error;
664    }
665    if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
666        ALOGE("Invalid object slot count!: %s", line);
667        goto error;
668    }
669
670    for (size_t i = 0; i < objectSlotCount; ++i) {
671        uint32_t varNum = 0;
672        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
673            goto error;
674        }
675        if (sscanf(line, "%u", &varNum) != 1) {
676            ALOGE("Invalid object slot!: %s", line);
677            goto error;
678        }
679
680        if (varNum < varCount) {
681            fieldIsObject[varNum] = true;
682        }
683    }
684
685#ifndef RS_COMPATIBILITY_LIB
686    // Do not attempt to read pragmas or isThreadable flag in compat lib path.
687    // Neither is applicable for compat lib
688
689    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
690        goto error;
691    }
692
693    if (sscanf(line, PRAGMA_STR "%zu", &pragmaCount) != 1) {
694        ALOGE("Invalid pragma count!: %s", line);
695        goto error;
696    }
697
698    pragmaKeys = new const char*[pragmaCount];
699    if (pragmaKeys == nullptr) {
700        goto error;
701    }
702
703    pragmaValues = new const char*[pragmaCount];
704    if (pragmaValues == nullptr) {
705        goto error;
706    }
707
708    bzero(pragmaKeys, sizeof(char*) * pragmaCount);
709    bzero(pragmaValues, sizeof(char*) * pragmaCount);
710
711    for (size_t i = 0; i < pragmaCount; ++i) {
712        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
713            ALOGE("Unable to read pragma at index %zu!", i);
714            goto error;
715        }
716
717        char key[MAXLINE];
718        char value[MAXLINE] = ""; // initialize in case value is empty
719
720        // pragmas can just have a key and no value.  Only check to make sure
721        // that the key is not empty
722        if (sscanf(line, "%" MAKE_STR(MAXLINE) "s - %" MAKE_STR(MAXLINE) "s",
723                   key, value) == 0 ||
724            strlen(key) == 0)
725        {
726            ALOGE("Invalid pragma value!: %s", line);
727
728            goto error;
729        }
730
731        char *pKey = new char[strlen(key)+1];
732        strcpy(pKey, key);
733        pragmaKeys[i] = pKey;
734
735        char *pValue = new char[strlen(value)+1];
736        strcpy(pValue, value);
737        pragmaValues[i] = pValue;
738        //ALOGE("Pragma %zu: Key: '%s' Value: '%s'", i, pKey, pValue);
739    }
740
741    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
742        goto error;
743    }
744
745    char tmpFlag[4];
746    if (sscanf(line, THREADABLE_STR "%4s", tmpFlag) != 1) {
747        ALOGE("Invalid threadable flag!: %s", line);
748        goto error;
749    }
750    if (strcmp(tmpFlag, "yes") == 0) {
751        isThreadable = true;
752    } else if (strcmp(tmpFlag, "no") == 0) {
753        isThreadable = false;
754    } else {
755        ALOGE("Invalid threadable flag!: %s", tmpFlag);
756        goto error;
757    }
758
759#endif  // RS_COMPATIBILITY_LIB
760
761    return new ScriptExecutable(
762        RSContext, fieldAddress, fieldIsObject, varCount,
763        invokeFunctions, funcCount,
764        forEachFunctions, forEachSignatures, forEachCount,
765        pragmaKeys, pragmaValues, pragmaCount,
766        isThreadable);
767
768error:
769
770#ifndef RS_COMPATIBILITY_LIB
771    for (size_t idx = 0; idx < pragmaCount; ++idx) {
772        delete [] pragmaKeys[idx];
773        delete [] pragmaValues[idx];
774    }
775
776    delete[] pragmaValues;
777    delete[] pragmaKeys;
778#endif  // RS_COMPATIBILITY_LIB
779
780    delete[] forEachSignatures;
781    delete[] forEachFunctions;
782    delete[] invokeFunctions;
783    delete[] fieldIsObject;
784    delete[] fieldAddress;
785
786    return nullptr;
787}
788
789bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
790                            uint8_t const *bitcode, size_t bitcodeSize,
791                            uint32_t flags, char const *bccPluginName) {
792    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
793    // bitcode, bitcodeSize, flags, lookupFunc);
794    //ALOGE("rsdScriptInit %p %p", rsc, script);
795
796    mCtx->lockMutex();
797#ifndef RS_COMPATIBILITY_LIB
798    bool useRSDebugContext = false;
799
800    mCompilerDriver = nullptr;
801
802    mCompilerDriver = new bcc::RSCompilerDriver();
803    if (mCompilerDriver == nullptr) {
804        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
805        mCtx->unlockMutex();
806        return false;
807    }
808
809    // Run any compiler setup functions we have been provided with.
810    RSSetupCompilerCallback setupCompilerCallback =
811            mCtx->getSetupCompilerCallback();
812    if (setupCompilerCallback != nullptr) {
813        setupCompilerCallback(mCompilerDriver);
814    }
815
816    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
817    if (!bitcodeMetadata.extract()) {
818        ALOGE("Could not extract metadata from bitcode");
819        mCtx->unlockMutex();
820        return false;
821    }
822
823    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
824
825    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
826        mCompilerDriver->setDebugContext(true);
827        useRSDebugContext = true;
828    }
829
830    std::string bcFileName(cacheDir);
831    bcFileName.append("/");
832    bcFileName.append(resName);
833    bcFileName.append(".bc");
834
835    std::vector<const char*> compileArguments;
836    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
837                        useRSDebugContext, bccPluginName);
838    // The last argument of compileArguments ia a nullptr, so remove 1 from the size.
839    std::string compileCommandLine =
840                bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data());
841
842    if (!is_force_recompile() && !useRSDebugContext) {
843        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
844    }
845
846    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
847    // again.
848    if (mScriptSO == nullptr) {
849        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
850                            compileArguments.data(), compileCommandLine))
851        {
852            ALOGE("bcc: FAILS to compile '%s'", resName);
853            mCtx->unlockMutex();
854            return false;
855        }
856
857        if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
858            ALOGE("Linker: Failed to link object file '%s'", resName);
859            mCtx->unlockMutex();
860            return false;
861        }
862
863        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
864        if (mScriptSO == nullptr) {
865            ALOGE("Unable to load '%s'", resName);
866            mCtx->unlockMutex();
867            return false;
868        }
869    }
870
871    mBitcodeFilePath = bcFileName;
872
873    // Read RS symbol information from the .so.
874    if ( !mScriptSO) {
875        goto error;
876    }
877
878    if ( !storeRSInfoFromSO()) {
879      goto error;
880    }
881#else  // RS_COMPATIBILITY_LIB is defined
882    const char *nativeLibDir = mCtx->getContext()->getNativeLibDir();
883    mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir);
884
885    if (!mScriptSO) {
886        goto error;
887    }
888
889    if (!storeRSInfoFromSO()) {
890        goto error;
891    }
892#endif
893    mCtx->unlockMutex();
894    return true;
895
896error:
897
898    mCtx->unlockMutex();
899    if (mScriptSO) {
900        dlclose(mScriptSO);
901        mScriptSO = nullptr;
902    }
903    return false;
904}
905
906#ifndef RS_COMPATIBILITY_LIB
907
908const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
909                                          size_t bitcodeSize) {
910    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
911
912    // If we're debugging, use the debug library.
913    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
914        return SYSLIBPATH"/libclcore_debug.bc";
915    }
916
917    // If a callback has been registered to specify a library, use that.
918    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
919    if (selectRTCallback != nullptr) {
920        return selectRTCallback((const char*)bitcode, bitcodeSize);
921    }
922
923    // Check for a platform specific library
924#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
925    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
926    if (prec == bcinfo::RS_FP_Relaxed) {
927        // NEON-capable ARMv7a devices can use an accelerated math library
928        // for all reduced precision scripts.
929        // ARMv8 does not use NEON, as ASIMD can be used with all precision
930        // levels.
931        return SYSLIBPATH"/libclcore_neon.bc";
932    } else {
933        return defaultLib;
934    }
935#elif defined(__i386__) || defined(__x86_64__)
936    // x86 devices will use an optimized library.
937    return SYSLIBPATH"/libclcore_x86.bc";
938#else
939    return defaultLib;
940#endif
941}
942
943#endif
944
945void RsdCpuScriptImpl::populateScript(Script *script) {
946    // Copy info over to runtime
947    script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
948    script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
949    script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
950    script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
951    script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
952
953    // Bug, need to stash in metadata
954    if (mRootExpand) {
955        script->mHal.info.root = mRootExpand;
956    } else {
957        script->mHal.info.root = mRoot;
958    }
959}
960
961
962typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
963
964bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
965                                        uint32_t inLen,
966                                        Allocation * aout,
967                                        const void * usr, uint32_t usrLen,
968                                        const RsScriptCall *sc,
969                                        MTLaunchStruct *mtls) {
970
971    memset(mtls, 0, sizeof(MTLaunchStruct));
972
973    for (int index = inLen; --index >= 0;) {
974        const Allocation* ain = ains[index];
975
976        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
977        if (ain != nullptr &&
978            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
979
980            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
981                                         "rsForEach called with null in allocations");
982            return false;
983        }
984    }
985
986    if (aout &&
987        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
988
989        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
990                                     "rsForEach called with null out allocations");
991        return false;
992    }
993
994    if (inLen > 0) {
995        const Allocation *ain0   = ains[0];
996        const Type       *inType = ain0->getType();
997
998        mtls->fep.dim.x = inType->getDimX();
999        mtls->fep.dim.y = inType->getDimY();
1000        mtls->fep.dim.z = inType->getDimZ();
1001
1002        for (int Index = inLen; --Index >= 1;) {
1003            if (!ain0->hasSameDims(ains[Index])) {
1004                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1005                  "Failed to launch kernel; dimensions of input and output"
1006                  "allocations do not match.");
1007
1008                return false;
1009            }
1010        }
1011
1012    } else if (aout != nullptr) {
1013        const Type *outType = aout->getType();
1014
1015        mtls->fep.dim.x = outType->getDimX();
1016        mtls->fep.dim.y = outType->getDimY();
1017        mtls->fep.dim.z = outType->getDimZ();
1018
1019    } else {
1020        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1021                                     "rsForEach called with null allocations");
1022        return false;
1023    }
1024
1025    if (inLen > 0 && aout != nullptr) {
1026        if (!ains[0]->hasSameDims(aout)) {
1027            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
1028              "Failed to launch kernel; dimensions of input and output allocations do not match.");
1029
1030            return false;
1031        }
1032    }
1033
1034    if (!sc || (sc->xEnd == 0)) {
1035        mtls->end.x = mtls->fep.dim.x;
1036    } else {
1037        mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
1038        mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
1039        if (mtls->start.x >= mtls->end.x) return false;
1040    }
1041
1042    if (!sc || (sc->yEnd == 0)) {
1043        mtls->end.y = mtls->fep.dim.y;
1044    } else {
1045        mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
1046        mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
1047        if (mtls->start.y >= mtls->end.y) return false;
1048    }
1049
1050    if (!sc || (sc->zEnd == 0)) {
1051        mtls->end.z = mtls->fep.dim.z;
1052    } else {
1053        mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
1054        mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
1055        if (mtls->start.z >= mtls->end.z) return false;
1056    }
1057
1058    if (!sc || (sc->arrayEnd == 0)) {
1059        mtls->end.array[0] = mtls->fep.dim.array[0];
1060    } else {
1061        mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
1062        mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
1063        if (mtls->start.array[0] >= mtls->end.array[0]) return false;
1064    }
1065
1066    if (!sc || (sc->array2End == 0)) {
1067        mtls->end.array[1] = mtls->fep.dim.array[1];
1068    } else {
1069        mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
1070        mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
1071        if (mtls->start.array[1] >= mtls->end.array[1]) return false;
1072    }
1073
1074    if (!sc || (sc->array3End == 0)) {
1075        mtls->end.array[2] = mtls->fep.dim.array[2];
1076    } else {
1077        mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
1078        mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
1079        if (mtls->start.array[2] >= mtls->end.array[2]) return false;
1080    }
1081
1082    if (!sc || (sc->array4End == 0)) {
1083        mtls->end.array[3] = mtls->fep.dim.array[3];
1084    } else {
1085        mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
1086        mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
1087        if (mtls->start.array[3] >= mtls->end.array[3]) return false;
1088    }
1089
1090
1091    // The X & Y walkers always want 0-1 min even if dim is not present
1092    mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
1093    mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
1094
1095    mtls->rsc        = mCtx;
1096    if (ains) {
1097        memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
1098    }
1099    mtls->aout[0]    = aout;
1100    mtls->fep.usr    = usr;
1101    mtls->fep.usrLen = usrLen;
1102    mtls->mSliceSize = 1;
1103    mtls->mSliceNum  = 0;
1104
1105    mtls->isThreadable  = mIsThreadable;
1106
1107    if (inLen > 0) {
1108        mtls->fep.inLen = inLen;
1109        for (int index = inLen; --index >= 0;) {
1110            mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
1111            mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
1112        }
1113    }
1114
1115    if (aout != nullptr) {
1116        mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
1117        mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
1118    }
1119
1120    // All validation passed, ok to launch threads
1121    return true;
1122}
1123
1124
1125void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
1126                                     const Allocation ** ains,
1127                                     uint32_t inLen,
1128                                     Allocation * aout,
1129                                     const void * usr,
1130                                     uint32_t usrLen,
1131                                     const RsScriptCall *sc) {
1132
1133    MTLaunchStruct mtls;
1134
1135    if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
1136        forEachKernelSetup(slot, &mtls);
1137
1138        RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1139        mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
1140        mCtx->setTLS(oldTLS);
1141    }
1142}
1143
1144void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
1145    mtls->script = this;
1146    mtls->fep.slot = slot;
1147    mtls->kernel = mScriptExec->getForEachFunction(slot);
1148    rsAssert(mtls->kernel != nullptr);
1149    mtls->sig = mScriptExec->getForEachSignature(slot);
1150}
1151
1152int RsdCpuScriptImpl::invokeRoot() {
1153    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1154    int ret = mRoot();
1155    mCtx->setTLS(oldTLS);
1156    return ret;
1157}
1158
1159void RsdCpuScriptImpl::invokeInit() {
1160    if (mInit) {
1161        mInit();
1162    }
1163}
1164
1165void RsdCpuScriptImpl::invokeFreeChildren() {
1166    if (mFreeChildren) {
1167        mFreeChildren();
1168    }
1169}
1170
1171void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
1172                                      size_t paramLength) {
1173    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
1174    void * ap = nullptr;
1175
1176#if defined(__x86_64__)
1177    // The invoked function could have input parameter of vector type for example float4 which
1178    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
1179    // So try to align void* params before passing them into RS exported function.
1180
1181    if ((uint8_t)(uint64_t)params & 0x0F) {
1182        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
1183            memcpy(ap, params, paramLength);
1184        } else {
1185            ALOGE("x86_64: invokeFunction memalign error, still use params which"
1186                  " is not 16 bytes aligned.");
1187        }
1188    }
1189#endif
1190
1191    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1192    reinterpret_cast<void (*)(const void *, uint32_t)>(
1193        mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
1194
1195    mCtx->setTLS(oldTLS);
1196}
1197
1198void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
1199    //rsAssert(!script->mFieldIsObject[slot]);
1200    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
1201
1202    //if (mIntrinsicID) {
1203        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
1204        //return;
1205    //}
1206
1207    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1208    if (!destPtr) {
1209        //ALOGV("Calling setVar on slot = %i which is null", slot);
1210        return;
1211    }
1212
1213    memcpy(destPtr, data, dataLength);
1214}
1215
1216void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
1217    //rsAssert(!script->mFieldIsObject[slot]);
1218    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
1219
1220    int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1221    if (!srcPtr) {
1222        //ALOGV("Calling setVar on slot = %i which is null", slot);
1223        return;
1224    }
1225    memcpy(data, srcPtr, dataLength);
1226}
1227
1228
1229void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
1230                                                const Element *elem,
1231                                                const uint32_t *dims, size_t dimLength) {
1232    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1233    if (!destPtr) {
1234        //ALOGV("Calling setVar on slot = %i which is null", slot);
1235        return;
1236    }
1237
1238    // We want to look at dimension in terms of integer components,
1239    // but dimLength is given in terms of bytes.
1240    dimLength /= sizeof(int);
1241
1242    // Only a single dimension is currently supported.
1243    rsAssert(dimLength == 1);
1244    if (dimLength == 1) {
1245        // First do the increment loop.
1246        size_t stride = elem->getSizeBytes();
1247        const char *cVal = reinterpret_cast<const char *>(data);
1248        for (uint32_t i = 0; i < dims[0]; i++) {
1249            elem->incRefs(cVal);
1250            cVal += stride;
1251        }
1252
1253        // Decrement loop comes after (to prevent race conditions).
1254        char *oldVal = reinterpret_cast<char *>(destPtr);
1255        for (uint32_t i = 0; i < dims[0]; i++) {
1256            elem->decRefs(oldVal);
1257            oldVal += stride;
1258        }
1259    }
1260
1261    memcpy(destPtr, data, dataLength);
1262}
1263
1264void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
1265
1266    //rsAssert(!script->mFieldIsObject[slot]);
1267    //ALOGE("setGlobalBind %i %p", slot, data);
1268
1269    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1270    if (!destPtr) {
1271        //ALOGV("Calling setVar on slot = %i which is null", slot);
1272        return;
1273    }
1274
1275    void *ptr = nullptr;
1276    mBoundAllocs[slot] = data;
1277    if (data) {
1278        ptr = data->mHal.drvState.lod[0].mallocPtr;
1279    }
1280    memcpy(destPtr, &ptr, sizeof(void *));
1281}
1282
1283void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
1284
1285    //rsAssert(script->mFieldIsObject[slot]);
1286    //ALOGE("setGlobalObj %i %p", slot, data);
1287
1288    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
1289    if (!destPtr) {
1290        //ALOGV("Calling setVar on slot = %i which is null", slot);
1291        return;
1292    }
1293
1294    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
1295}
1296
1297RsdCpuScriptImpl::~RsdCpuScriptImpl() {
1298#ifndef RS_COMPATIBILITY_LIB
1299    if (mCompilerDriver) {
1300        delete mCompilerDriver;
1301    }
1302#endif
1303
1304    if (mScriptExec != nullptr) {
1305        delete mScriptExec;
1306    }
1307    if (mBoundAllocs) delete[] mBoundAllocs;
1308    if (mScriptSO) {
1309        dlclose(mScriptSO);
1310    }
1311}
1312
1313Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
1314    if (!ptr) {
1315        return nullptr;
1316    }
1317
1318    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
1319        Allocation *a = mBoundAllocs[ct];
1320        if (!a) continue;
1321        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
1322            return a;
1323        }
1324    }
1325    ALOGE("rsGetAllocation, failed to find %p", ptr);
1326    return nullptr;
1327}
1328
1329void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
1330                                 uint32_t inLen, Allocation * aout,
1331                                 const void * usr, uint32_t usrLen,
1332                                 const RsScriptCall *sc) {}
1333
1334void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
1335                                  uint32_t inLen, Allocation * aout,
1336                                  const void * usr, uint32_t usrLen,
1337                                  const RsScriptCall *sc) {}
1338
1339
1340}
1341}
1342