rsCpuScript.cpp revision 45e753a46e587c69b3b0d0c5138e88715a24a29a
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19
20#ifdef RS_COMPATIBILITY_LIB
21    #include <stdio.h>
22    #include <sys/stat.h>
23    #include <unistd.h>
24#else
25    #include <bcc/BCCContext.h>
26    #include <bcc/Config/Config.h>
27    #include <bcc/Renderscript/RSCompilerDriver.h>
28    #include <bcc/Renderscript/RSInfo.h>
29    #include <bcinfo/MetadataExtractor.h>
30    #include <cutils/properties.h>
31
32    #include <sys/types.h>
33    #include <sys/wait.h>
34    #include <unistd.h>
35
36    #include <string>
37    #include <vector>
38#endif
39
40#include <set>
41#include <string>
42#include <dlfcn.h>
43#include <stdlib.h>
44#include <string.h>
45#include <fstream>
46#include <iostream>
47
48#ifdef __LP64__
49#define SYSLIBPATH "/system/lib64"
50#else
51#define SYSLIBPATH "/system/lib"
52#endif
53
54namespace {
55
56// Create a len length string containing random characters from [A-Za-z0-9].
57static std::string getRandomString(size_t len) {
58    char buf[len + 1];
59    for (size_t i = 0; i < len; i++) {
60        uint32_t r = arc4random() & 0xffff;
61        r %= 62;
62        if (r < 26) {
63            // lowercase
64            buf[i] = 'a' + r;
65        } else if (r < 52) {
66            // uppercase
67            buf[i] = 'A' + (r - 26);
68        } else {
69            // Use a number
70            buf[i] = '0' + (r - 52);
71        }
72    }
73    buf[len] = '\0';
74    return std::string(buf);
75}
76
77// Check if a path exists and attempt to create it if it doesn't.
78static bool ensureCacheDirExists(const char *path) {
79    if (access(path, R_OK | W_OK | X_OK) == 0) {
80        // Done if we can rwx the directory
81        return true;
82    }
83    if (mkdir(path, 0700) == 0) {
84        return true;
85    }
86    return false;
87}
88
89// Copy the file named \p srcFile to \p dstFile.
90// Return 0 on success and -1 if anything wasn't copied.
91static int copyFile(const char *dstFile, const char *srcFile) {
92    std::ifstream srcStream(srcFile);
93    if (!srcStream) {
94        ALOGE("Could not verify or read source file: %s", srcFile);
95        return -1;
96    }
97    std::ofstream dstStream(dstFile);
98    if (!dstStream) {
99        ALOGE("Could not verify or write destination file: %s", dstFile);
100        return -1;
101    }
102    dstStream << srcStream.rdbuf();
103    if (!dstStream) {
104        ALOGE("Could not write destination file: %s", dstFile);
105        return -1;
106    }
107
108    srcStream.close();
109    dstStream.close();
110
111    return 0;
112}
113
114#define RS_CACHE_DIR "com.android.renderscript.cache"
115
116// Attempt to load the shared library from origName, but then fall back to
117// creating a copy of the shared library if necessary (to ensure instancing).
118// This function returns the dlopen()-ed handle if successful.
119static void *loadSOHelper(const char *origName, const char *cacheDir,
120                          const char *resName) {
121    // Keep track of which .so libraries have been loaded. Once a library is
122    // in the set (per-process granularity), we must instead make a copy of
123    // the original shared object (randomly named .so file) and load that one
124    // instead. If we don't do this, we end up aliasing global data between
125    // the various Script instances (which are supposed to be completely
126    // independent).
127    static std::set<std::string> LoadedLibraries;
128
129    void *loaded = nullptr;
130
131    // Skip everything if we don't even have the original library available.
132    if (access(origName, F_OK) != 0) {
133        return nullptr;
134    }
135
136    // Common path is that we have not loaded this Script/library before.
137    if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
138        loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
139        if (loaded) {
140            LoadedLibraries.insert(origName);
141        }
142        return loaded;
143    }
144
145    std::string newName(cacheDir);
146
147    // Append RS_CACHE_DIR only if it is not found in cacheDir
148    // In driver mode, RS_CACHE_DIR is already appended to cacheDir.
149    if (newName.find(RS_CACHE_DIR) == std::string::npos) {
150        newName.append("/" RS_CACHE_DIR "/");
151    }
152
153    if (!ensureCacheDirExists(newName.c_str())) {
154        ALOGE("Could not verify or create cache dir: %s", cacheDir);
155        return nullptr;
156    }
157
158    // Construct an appropriately randomized filename for the copy.
159    newName.append("librs.");
160    newName.append(resName);
161    newName.append("#");
162    newName.append(getRandomString(6));  // 62^6 potential filename variants.
163    newName.append(".so");
164
165    int r = copyFile(newName.c_str(), origName);
166    if (r != 0) {
167        ALOGE("Could not create copy %s -> %s", origName, newName.c_str());
168        return nullptr;
169    }
170    loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
171    r = unlink(newName.c_str());
172    if (r != 0) {
173        ALOGE("Could not unlink copy %s", newName.c_str());
174    }
175    if (loaded) {
176        LoadedLibraries.insert(newName.c_str());
177    }
178
179    return loaded;
180}
181
182static std::string findSharedObjectName(const char *cacheDir,
183                                        const char *resName) {
184
185#ifndef RS_SERVER
186    std::string scriptSOName(cacheDir);
187#ifdef RS_COMPATIBILITY_LIB
188    size_t cutPos = scriptSOName.rfind("cache");
189    if (cutPos != std::string::npos) {
190        scriptSOName.erase(cutPos);
191    } else {
192        ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
193    }
194    scriptSOName.append("/lib/librs.");
195#else
196    scriptSOName.append("/librs.");
197#endif
198
199#else
200    std::string scriptSOName("lib");
201#endif
202    scriptSOName.append(resName);
203    scriptSOName.append(".so");
204
205    return scriptSOName;
206}
207
208// Load the shared library referred to by cacheDir and resName. If we have
209// already loaded this library, we instead create a new copy (in the
210// cache dir) and then load that. We then immediately destroy the copy.
211// This is required behavior to implement script instancing for the support
212// library, since shared objects are loaded and de-duped by name only.
213static void *loadSharedLibrary(const char *cacheDir, const char *resName) {
214    void *loaded = nullptr;
215
216    std::string scriptSOName = findSharedObjectName(cacheDir, resName);
217
218    // We should check if we can load the library from the standard app
219    // location for shared libraries first.
220    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
221
222    if (loaded == nullptr) {
223        ALOGE("Unable to open shared library (%s): %s",
224              scriptSOName.c_str(), dlerror());
225
226        // One final attempt to find the library in "/system/lib".
227        // We do this to allow bundled applications to use the compatibility
228        // library fallback path. Those applications don't have a private
229        // library path, so they need to install to the system directly.
230        // Note that this is really just a testing path.
231        std::string scriptSONameSystem("/system/lib/librs.");
232        scriptSONameSystem.append(resName);
233        scriptSONameSystem.append(".so");
234        loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
235                              resName);
236        if (loaded == nullptr) {
237            ALOGE("Unable to open system shared library (%s): %s",
238                  scriptSONameSystem.c_str(), dlerror());
239        }
240    }
241
242    return loaded;
243}
244
245#ifndef RS_COMPATIBILITY_LIB
246
247static bool is_force_recompile() {
248#ifdef RS_SERVER
249  return false;
250#else
251  char buf[PROPERTY_VALUE_MAX];
252
253  // Re-compile if floating point precision has been overridden.
254  property_get("debug.rs.precision", buf, "");
255  if (buf[0] != '\0') {
256    return true;
257  }
258
259  // Re-compile if debug.rs.forcerecompile is set.
260  property_get("debug.rs.forcerecompile", buf, "0");
261  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
262    return true;
263  } else {
264    return false;
265  }
266#endif  // RS_SERVER
267}
268
269const static char *BCC_EXE_PATH = "/system/bin/bcc";
270
271static void setCompileArguments(std::vector<const char*>* args,
272                                const std::string& bcFileName,
273                                const char* cacheDir, const char* resName,
274                                const char* core_lib, bool useRSDebugContext,
275                                const char* bccPluginName) {
276    rsAssert(cacheDir && resName && core_lib);
277    args->push_back(BCC_EXE_PATH);
278    args->push_back("-unroll-runtime");
279    args->push_back("-scalarize-load-store");
280    args->push_back("-o");
281    args->push_back(resName);
282    args->push_back("-output_path");
283    args->push_back(cacheDir);
284    args->push_back("-bclib");
285    args->push_back(core_lib);
286    args->push_back("-mtriple");
287    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
288
289    // Enable workaround for A53 codegen by default.
290#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
291    args->push_back("-aarch64-fix-cortex-a53-835769");
292#endif
293
294    // Execute the bcc compiler.
295    if (useRSDebugContext) {
296        args->push_back("-rs-debug-ctx");
297    } else {
298        // Only load additional libraries for compiles that don't use
299        // the debug context.
300        if (bccPluginName && strlen(bccPluginName) > 0) {
301            args->push_back("-load");
302            args->push_back(bccPluginName);
303        }
304    }
305
306    args->push_back("-fPIC");
307    args->push_back("-embedRSInfo");
308
309    args->push_back(bcFileName.c_str());
310    args->push_back(nullptr);
311}
312
313static bool compileBitcode(const std::string &bcFileName,
314                           const char *bitcode,
315                           size_t bitcodeSize,
316                           const char **compileArguments,
317                           const std::string &compileCommandLine) {
318    rsAssert(bitcode && bitcodeSize);
319
320    FILE *bcfile = fopen(bcFileName.c_str(), "w");
321    if (!bcfile) {
322        ALOGE("Could not write to %s", bcFileName.c_str());
323        return false;
324    }
325    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
326    fclose(bcfile);
327    if (nwritten != bitcodeSize) {
328        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
329              bcFileName.c_str());
330        return false;
331    }
332
333    pid_t pid = fork();
334
335    switch (pid) {
336    case -1: {  // Error occurred (we attempt no recovery)
337        ALOGE("Couldn't fork for bcc compiler execution");
338        return false;
339    }
340    case 0: {  // Child process
341        ALOGV("Invoking BCC with: %s", compileCommandLine.c_str());
342        execv(BCC_EXE_PATH, (char* const*)compileArguments);
343
344        ALOGE("execv() failed: %s", strerror(errno));
345        abort();
346        return false;
347    }
348    default: {  // Parent process (actual driver)
349        // Wait on child process to finish compiling the source.
350        int status = 0;
351        pid_t w = waitpid(pid, &status, 0);
352        if (w == -1) {
353            ALOGE("Could not wait for bcc compiler");
354            return false;
355        }
356
357        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
358            return true;
359        }
360
361        ALOGE("bcc compiler terminated unexpectedly");
362        return false;
363    }
364    }
365}
366
367const static char *LD_EXE_PATH = "/system/bin/ld.mc";
368
369static bool createSharedLib(const char *cacheDir, const char *resName) {
370    std::string sharedLibName = findSharedObjectName(cacheDir, resName);
371    std::string objFileName = cacheDir;
372    objFileName.append("/");
373    objFileName.append(resName);
374    objFileName.append(".o");
375
376    const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so";
377    std::vector<const char *> args = {
378        LD_EXE_PATH,
379        "-shared",
380        "-nostdlib",
381        compiler_rt,
382        "-mtriple", DEFAULT_TARGET_TRIPLE_STRING,
383        "-L", SYSLIBPATH,
384        "-lRSDriver", "-lm", "-lc",
385        objFileName.c_str(),
386        "-o", sharedLibName.c_str(),
387        nullptr
388    };
389
390    std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data());
391
392    pid_t pid = fork();
393
394    switch (pid) {
395    case -1: {  // Error occurred (we attempt no recovery)
396        ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH);
397        return false;
398    }
399    case 0: {  // Child process
400        ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str());
401        execv(LD_EXE_PATH, (char* const*) args.data());
402
403        ALOGE("execv() failed: %s", strerror(errno));
404        abort();
405        return false;
406    }
407    default: {  // Parent process (actual driver)
408        // Wait on child process to finish compiling the source.
409        int status = 0;
410        pid_t w = waitpid(pid, &status, 0);
411        if (w == -1) {
412            ALOGE("Could not wait for linker (%s)", LD_EXE_PATH);
413            return false;
414        }
415
416        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
417            return true;
418        }
419
420        ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH);
421        return false;
422    }
423    }
424}
425#endif  // !defined(RS_COMPATIBILITY_LIB)
426}  // namespace
427
428namespace android {
429namespace renderscript {
430
431#define MAXLINE 500
432#define MAKE_STR_HELPER(S) #S
433#define MAKE_STR(S) MAKE_STR_HELPER(S)
434#define EXPORT_VAR_STR "exportVarCount: "
435#define EXPORT_FUNC_STR "exportFuncCount: "
436#define EXPORT_FOREACH_STR "exportForEachCount: "
437#define OBJECT_SLOT_STR "objectSlotCount: "
438
439// Copy up to a newline or size chars from str -> s, updating str
440// Returns s when successful and nullptr when '\0' is finally reached.
441static char* strgets(char *s, int size, const char **ppstr) {
442    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
443        return nullptr;
444    }
445
446    int i;
447    for (i = 0; i < (size - 1); i++) {
448        s[i] = **ppstr;
449        (*ppstr)++;
450        if (s[i] == '\0') {
451            return s;
452        } else if (s[i] == '\n') {
453            s[i+1] = '\0';
454            return s;
455        }
456    }
457
458    // size has been exceeded.
459    s[i] = '\0';
460
461    return s;
462}
463
464RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
465    mCtx = ctx;
466    mScript = s;
467
468    mScriptSO = nullptr;
469
470    mInvokeFunctions = nullptr;
471    mForEachFunctions = nullptr;
472    mFieldAddress = nullptr;
473    mFieldIsObject = nullptr;
474    mForEachSignatures = nullptr;
475
476#ifndef RS_COMPATIBILITY_LIB
477    mCompilerDriver = nullptr;
478#endif
479
480
481    mRoot = nullptr;
482    mRootExpand = nullptr;
483    mInit = nullptr;
484    mFreeChildren = nullptr;
485
486
487    mBoundAllocs = nullptr;
488    mIntrinsicData = nullptr;
489    mIsThreadable = true;
490}
491
492bool RsdCpuScriptImpl::storeRSInfoFromSO() {
493    char line[MAXLINE];
494    size_t varCount = 0;
495    size_t funcCount = 0;
496    size_t forEachCount = 0;
497    size_t objectSlotCount = 0;
498
499    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
500    if (mRoot) {
501        //ALOGE("Found root(): %p", mRoot);
502    }
503    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
504    if (mRootExpand) {
505        //ALOGE("Found root.expand(): %p", mRootExpand);
506    }
507    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
508    if (mInit) {
509        //ALOGE("Found init(): %p", mInit);
510    }
511    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
512    if (mFreeChildren) {
513        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
514    }
515
516    const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info");
517    if (rsInfo) {
518        //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo);
519    }
520
521    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
522        goto error;
523    }
524    if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
525        ALOGE("Invalid export var count!: %s", line);
526        goto error;
527    }
528
529    mExportedVariableCount = varCount;
530    //ALOGE("varCount: %zu", varCount);
531    if (varCount > 0) {
532        // Start by creating/zeroing this member, since we don't want to
533        // accidentally clean up invalid pointers later (if we error out).
534        mFieldIsObject = new bool[varCount];
535        if (mFieldIsObject == nullptr) {
536            goto error;
537        }
538        memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject));
539        mFieldAddress = new void*[varCount];
540        if (mFieldAddress == nullptr) {
541            goto error;
542        }
543        for (size_t i = 0; i < varCount; ++i) {
544            if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
545                goto error;
546            }
547            char *c = strrchr(line, '\n');
548            if (c) {
549                *c = '\0';
550            }
551            mFieldAddress[i] = dlsym(mScriptSO, line);
552            if (mFieldAddress[i] == nullptr) {
553                ALOGE("Failed to find variable address for %s: %s",
554                      line, dlerror());
555                // Not a critical error if we don't find a global variable.
556            }
557            else {
558                //ALOGE("Found variable %s at %p", line,
559                //mFieldAddress[i]);
560            }
561        }
562    }
563
564    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
565        goto error;
566    }
567    if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
568        ALOGE("Invalid export func count!: %s", line);
569        goto error;
570    }
571
572    mExportedFunctionCount = funcCount;
573    //ALOGE("funcCount: %zu", funcCount);
574
575    if (funcCount > 0) {
576        mInvokeFunctions = new InvokeFunc_t[funcCount];
577        if (mInvokeFunctions == nullptr) {
578            goto error;
579        }
580        for (size_t i = 0; i < funcCount; ++i) {
581            if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
582                goto error;
583            }
584            char *c = strrchr(line, '\n');
585            if (c) {
586                *c = '\0';
587            }
588
589            mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line);
590            if (mInvokeFunctions[i] == nullptr) {
591                ALOGE("Failed to get function address for %s(): %s",
592                      line, dlerror());
593                goto error;
594            }
595            else {
596                //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]);
597            }
598        }
599    }
600
601    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
602        goto error;
603    }
604    if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
605        ALOGE("Invalid export forEach count!: %s", line);
606        goto error;
607    }
608
609    if (forEachCount > 0) {
610
611        mForEachSignatures = new uint32_t[forEachCount];
612        if (mForEachSignatures == nullptr) {
613            goto error;
614        }
615        mForEachFunctions = new ForEachFunc_t[forEachCount];
616        if (mForEachFunctions == nullptr) {
617            goto error;
618        }
619        for (size_t i = 0; i < forEachCount; ++i) {
620            unsigned int tmpSig = 0;
621            char tmpName[MAXLINE];
622
623            if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
624                goto error;
625            }
626            if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
627                       &tmpSig, tmpName) != 2) {
628                ALOGE("Invalid export forEach!: %s", line);
629                goto error;
630            }
631
632            // Lookup the expanded ForEach kernel.
633            strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
634            mForEachSignatures[i] = tmpSig;
635            mForEachFunctions[i] =
636                    (ForEachFunc_t) dlsym(mScriptSO, tmpName);
637            if (i != 0 && mForEachFunctions[i] == nullptr) {
638                // Ignore missing root.expand functions.
639                // root() is always specified at location 0.
640                ALOGE("Failed to find forEach function address for %s: %s",
641                      tmpName, dlerror());
642                goto error;
643            }
644            else {
645                //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]);
646            }
647        }
648    }
649
650    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
651        goto error;
652    }
653    if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
654        ALOGE("Invalid object slot count!: %s", line);
655        goto error;
656    }
657
658    if (objectSlotCount > 0) {
659        rsAssert(varCount > 0);
660        for (size_t i = 0; i < objectSlotCount; ++i) {
661            uint32_t varNum = 0;
662            if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
663                goto error;
664            }
665            if (sscanf(line, "%u", &varNum) != 1) {
666                ALOGE("Invalid object slot!: %s", line);
667                goto error;
668            }
669
670            if (varNum < varCount) {
671                mFieldIsObject[varNum] = true;
672            }
673        }
674    }
675
676    if (varCount > 0) {
677        mBoundAllocs = new Allocation *[varCount];
678        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
679    }
680
681    if (mScriptSO == (void*)1) {
682        //rsdLookupRuntimeStub(script, "acos");
683    }
684
685    return true;
686
687error:
688    delete[] mInvokeFunctions;
689    delete[] mForEachFunctions;
690    delete[] mFieldAddress;
691    delete[] mFieldIsObject;
692    delete[] mForEachSignatures;
693    delete[] mBoundAllocs;
694
695    return false;
696}
697
698bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
699                            uint8_t const *bitcode, size_t bitcodeSize,
700                            uint32_t flags, char const *bccPluginName) {
701    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
702    //ALOGE("rsdScriptInit %p %p", rsc, script);
703
704    mCtx->lockMutex();
705#ifndef RS_COMPATIBILITY_LIB
706    bool useRSDebugContext = false;
707
708    mCompilerDriver = nullptr;
709
710    mCompilerDriver = new bcc::RSCompilerDriver();
711    if (mCompilerDriver == nullptr) {
712        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
713        mCtx->unlockMutex();
714        return false;
715    }
716
717    // Run any compiler setup functions we have been provided with.
718    RSSetupCompilerCallback setupCompilerCallback =
719            mCtx->getSetupCompilerCallback();
720    if (setupCompilerCallback != nullptr) {
721        setupCompilerCallback(mCompilerDriver);
722    }
723
724    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
725    if (!bitcodeMetadata.extract()) {
726        ALOGE("Could not extract metadata from bitcode");
727        mCtx->unlockMutex();
728        return false;
729    }
730
731    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
732
733    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
734        mCompilerDriver->setDebugContext(true);
735        useRSDebugContext = true;
736    }
737
738    std::string bcFileName(cacheDir);
739    bcFileName.append("/");
740    bcFileName.append(resName);
741    bcFileName.append(".bc");
742
743    std::vector<const char*> compileArguments;
744    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
745                        useRSDebugContext, bccPluginName);
746    // The last argument of compileArguments ia a nullptr, so remove 1 from the size.
747    std::string compileCommandLine =
748                bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data());
749
750    if (!is_force_recompile()) {
751        mScriptSO = loadSharedLibrary(cacheDir, resName);
752    }
753
754    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
755    // again.
756    if (mScriptSO == nullptr) {
757        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
758                            compileArguments.data(), compileCommandLine))
759        {
760            ALOGE("bcc: FAILS to compile '%s'", resName);
761            mCtx->unlockMutex();
762            return false;
763        }
764
765        if (!createSharedLib(cacheDir, resName)) {
766            ALOGE("Linker: Failed to link object file '%s'", resName);
767            mCtx->unlockMutex();
768            return false;
769        }
770
771        mScriptSO = loadSharedLibrary(cacheDir, resName);
772        if (mScriptSO == nullptr) {
773            ALOGE("Unable to load '%s'", resName);
774            mCtx->unlockMutex();
775            return false;
776        }
777    }
778
779    // Read RS symbol information from the .so.
780    if ( !mScriptSO) {
781        goto error;
782    }
783
784    if ( !storeRSInfoFromSO()) {
785      goto error;
786    }
787#else  // RS_COMPATIBILITY_LIB is defined
788
789    mScriptSO = loadSharedLibrary(cacheDir, resName);
790
791    if (!mScriptSO) {
792        goto error;
793    }
794
795    if (!storeRSInfoFromSO()) {
796        goto error;
797    }
798#endif
799    mCtx->unlockMutex();
800    return true;
801
802error:
803
804    mCtx->unlockMutex();
805    if (mScriptSO) {
806        dlclose(mScriptSO);
807    }
808    return false;
809}
810
811#ifndef RS_COMPATIBILITY_LIB
812
813const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
814                                          size_t bitcodeSize) {
815    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
816
817    // If we're debugging, use the debug library.
818    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
819        return SYSLIBPATH"/libclcore_debug.bc";
820    }
821
822    // If a callback has been registered to specify a library, use that.
823    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
824    if (selectRTCallback != nullptr) {
825        return selectRTCallback((const char*)bitcode, bitcodeSize);
826    }
827
828    // Check for a platform specific library
829#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
830    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
831    if (prec == bcinfo::RS_FP_Relaxed) {
832        // NEON-capable ARMv7a devices can use an accelerated math library
833        // for all reduced precision scripts.
834        // ARMv8 does not use NEON, as ASIMD can be used with all precision
835        // levels.
836        return SYSLIBPATH"/libclcore_neon.bc";
837    } else {
838        return defaultLib;
839    }
840#elif defined(__i386__) || defined(__x86_64__)
841    // x86 devices will use an optimized library.
842    return SYSLIBPATH"/libclcore_x86.bc";
843#else
844    return defaultLib;
845#endif
846}
847
848#endif
849
850void RsdCpuScriptImpl::populateScript(Script *script) {
851    // Copy info over to runtime
852    script->mHal.info.exportedFunctionCount = mExportedFunctionCount;
853    script->mHal.info.exportedVariableCount = mExportedVariableCount;
854    script->mHal.info.exportedPragmaCount = 0;
855    script->mHal.info.exportedPragmaKeyList = 0;
856    script->mHal.info.exportedPragmaValueList = 0;
857
858    // Bug, need to stash in metadata
859    if (mRootExpand) {
860        script->mHal.info.root = mRootExpand;
861    } else {
862        script->mHal.info.root = mRoot;
863    }
864}
865
866
867typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
868
869void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
870                                        uint32_t inLen,
871                                        Allocation * aout,
872                                        const void * usr, uint32_t usrLen,
873                                        const RsScriptCall *sc,
874                                        MTLaunchStruct *mtls) {
875
876    memset(mtls, 0, sizeof(MTLaunchStruct));
877
878    for (int index = inLen; --index >= 0;) {
879        const Allocation* ain = ains[index];
880
881        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
882        if (ain != nullptr &&
883            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
884
885            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
886                                         "rsForEach called with null in allocations");
887            return;
888        }
889    }
890
891    if (aout &&
892        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
893
894        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
895                                     "rsForEach called with null out allocations");
896        return;
897    }
898
899    if (inLen > 0) {
900        const Allocation *ain0   = ains[0];
901        const Type       *inType = ain0->getType();
902
903        mtls->fep.dim.x = inType->getDimX();
904        mtls->fep.dim.y = inType->getDimY();
905        mtls->fep.dim.z = inType->getDimZ();
906
907        for (int Index = inLen; --Index >= 1;) {
908            if (!ain0->hasSameDims(ains[Index])) {
909                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
910                  "Failed to launch kernel; dimensions of input and output allocations do not match.");
911
912                return;
913            }
914        }
915
916    } else if (aout != nullptr) {
917        const Type *outType = aout->getType();
918
919        mtls->fep.dim.x = outType->getDimX();
920        mtls->fep.dim.y = outType->getDimY();
921        mtls->fep.dim.z = outType->getDimZ();
922
923    } else {
924        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
925                                     "rsForEach called with null allocations");
926        return;
927    }
928
929    if (inLen > 0 && aout != nullptr) {
930        if (!ains[0]->hasSameDims(aout)) {
931            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
932              "Failed to launch kernel; dimensions of input and output allocations do not match.");
933
934            return;
935        }
936    }
937
938    if (!sc || (sc->xEnd == 0)) {
939        mtls->xEnd = mtls->fep.dim.x;
940    } else {
941        rsAssert(sc->xStart < mtls->fep.dim.x);
942        rsAssert(sc->xEnd <= mtls->fep.dim.x);
943        rsAssert(sc->xStart < sc->xEnd);
944        mtls->xStart = rsMin(mtls->fep.dim.x, sc->xStart);
945        mtls->xEnd = rsMin(mtls->fep.dim.x, sc->xEnd);
946        if (mtls->xStart >= mtls->xEnd) return;
947    }
948
949    if (!sc || (sc->yEnd == 0)) {
950        mtls->yEnd = mtls->fep.dim.y;
951    } else {
952        rsAssert(sc->yStart < mtls->fep.dim.y);
953        rsAssert(sc->yEnd <= mtls->fep.dim.y);
954        rsAssert(sc->yStart < sc->yEnd);
955        mtls->yStart = rsMin(mtls->fep.dim.y, sc->yStart);
956        mtls->yEnd = rsMin(mtls->fep.dim.y, sc->yEnd);
957        if (mtls->yStart >= mtls->yEnd) return;
958    }
959
960    if (!sc || (sc->zEnd == 0)) {
961        mtls->zEnd = mtls->fep.dim.z;
962    } else {
963        rsAssert(sc->zStart < mtls->fep.dim.z);
964        rsAssert(sc->zEnd <= mtls->fep.dim.z);
965        rsAssert(sc->zStart < sc->zEnd);
966        mtls->zStart = rsMin(mtls->fep.dim.z, sc->zStart);
967        mtls->zEnd = rsMin(mtls->fep.dim.z, sc->zEnd);
968        if (mtls->zStart >= mtls->zEnd) return;
969    }
970
971    mtls->xEnd     = rsMax((uint32_t)1, mtls->xEnd);
972    mtls->yEnd     = rsMax((uint32_t)1, mtls->yEnd);
973    mtls->zEnd     = rsMax((uint32_t)1, mtls->zEnd);
974    mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
975
976    rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0));
977
978    mtls->rsc        = mCtx;
979    if (ains) {
980        memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
981    }
982    mtls->aout[0]    = aout;
983    mtls->fep.usr    = usr;
984    mtls->fep.usrLen = usrLen;
985    mtls->mSliceSize = 1;
986    mtls->mSliceNum  = 0;
987
988    mtls->isThreadable  = mIsThreadable;
989
990    if (inLen > 0) {
991        mtls->fep.inLen = inLen;
992        for (int index = inLen; --index >= 0;) {
993            mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
994            mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
995        }
996    }
997
998    if (aout != nullptr) {
999        mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
1000        mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
1001    }
1002}
1003
1004
1005void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
1006                                     const Allocation ** ains,
1007                                     uint32_t inLen,
1008                                     Allocation * aout,
1009                                     const void * usr,
1010                                     uint32_t usrLen,
1011                                     const RsScriptCall *sc) {
1012
1013    MTLaunchStruct mtls;
1014
1015    forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
1016    forEachKernelSetup(slot, &mtls);
1017
1018    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1019    mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
1020    mCtx->setTLS(oldTLS);
1021}
1022
1023void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
1024    mtls->script = this;
1025    mtls->fep.slot = slot;
1026    mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]);
1027    rsAssert(mtls->kernel != nullptr);
1028    mtls->sig = mForEachSignatures[slot];
1029}
1030
1031int RsdCpuScriptImpl::invokeRoot() {
1032    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1033    int ret = mRoot();
1034    mCtx->setTLS(oldTLS);
1035    return ret;
1036}
1037
1038void RsdCpuScriptImpl::invokeInit() {
1039    if (mInit) {
1040        mInit();
1041    }
1042}
1043
1044void RsdCpuScriptImpl::invokeFreeChildren() {
1045    if (mFreeChildren) {
1046        mFreeChildren();
1047    }
1048}
1049
1050void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
1051                                      size_t paramLength) {
1052    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
1053    void * ap = nullptr;
1054
1055#if defined(__x86_64__)
1056    // The invoked function could have input parameter of vector type for example float4 which
1057    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
1058    // So try to align void* params before passing them into RS exported function.
1059
1060    if ((uint8_t)(uint64_t)params & 0x0F) {
1061        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
1062            memcpy(ap, params, paramLength);
1063        } else {
1064            ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned.");
1065        }
1066    }
1067#endif
1068
1069    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1070    reinterpret_cast<void (*)(const void *, uint32_t)>(
1071        mInvokeFunctions[slot])(ap? (const void *) ap: params, paramLength);
1072
1073    mCtx->setTLS(oldTLS);
1074}
1075
1076void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
1077    //rsAssert(!script->mFieldIsObject[slot]);
1078    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
1079
1080    //if (mIntrinsicID) {
1081        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
1082        //return;
1083    //}
1084
1085    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1086    if (!destPtr) {
1087        //ALOGV("Calling setVar on slot = %i which is null", slot);
1088        return;
1089    }
1090
1091    memcpy(destPtr, data, dataLength);
1092}
1093
1094void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
1095    //rsAssert(!script->mFieldIsObject[slot]);
1096    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
1097
1098    int32_t *srcPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1099    if (!srcPtr) {
1100        //ALOGV("Calling setVar on slot = %i which is null", slot);
1101        return;
1102    }
1103    memcpy(data, srcPtr, dataLength);
1104}
1105
1106
1107void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
1108                                                const Element *elem,
1109                                                const uint32_t *dims, size_t dimLength) {
1110    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1111    if (!destPtr) {
1112        //ALOGV("Calling setVar on slot = %i which is null", slot);
1113        return;
1114    }
1115
1116    // We want to look at dimension in terms of integer components,
1117    // but dimLength is given in terms of bytes.
1118    dimLength /= sizeof(int);
1119
1120    // Only a single dimension is currently supported.
1121    rsAssert(dimLength == 1);
1122    if (dimLength == 1) {
1123        // First do the increment loop.
1124        size_t stride = elem->getSizeBytes();
1125        const char *cVal = reinterpret_cast<const char *>(data);
1126        for (uint32_t i = 0; i < dims[0]; i++) {
1127            elem->incRefs(cVal);
1128            cVal += stride;
1129        }
1130
1131        // Decrement loop comes after (to prevent race conditions).
1132        char *oldVal = reinterpret_cast<char *>(destPtr);
1133        for (uint32_t i = 0; i < dims[0]; i++) {
1134            elem->decRefs(oldVal);
1135            oldVal += stride;
1136        }
1137    }
1138
1139    memcpy(destPtr, data, dataLength);
1140}
1141
1142void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
1143
1144    //rsAssert(!script->mFieldIsObject[slot]);
1145    //ALOGE("setGlobalBind %i %p", slot, data);
1146
1147    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1148    if (!destPtr) {
1149        //ALOGV("Calling setVar on slot = %i which is null", slot);
1150        return;
1151    }
1152
1153    void *ptr = nullptr;
1154    mBoundAllocs[slot] = data;
1155    if (data) {
1156        ptr = data->mHal.drvState.lod[0].mallocPtr;
1157    }
1158    memcpy(destPtr, &ptr, sizeof(void *));
1159}
1160
1161void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
1162
1163    //rsAssert(script->mFieldIsObject[slot]);
1164    //ALOGE("setGlobalObj %i %p", slot, data);
1165
1166    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1167    if (!destPtr) {
1168        //ALOGV("Calling setVar on slot = %i which is null", slot);
1169        return;
1170    }
1171
1172    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
1173}
1174
1175RsdCpuScriptImpl::~RsdCpuScriptImpl() {
1176#ifndef RS_COMPATIBILITY_LIB
1177
1178    if (mCompilerDriver) {
1179        delete mCompilerDriver;
1180    }
1181
1182#endif
1183
1184    if (mFieldIsObject) {
1185        for (size_t i = 0; i < mExportedVariableCount; ++i) {
1186            if (mFieldIsObject[i]) {
1187                if (mFieldAddress[i] != nullptr) {
1188                    rs_object_base *obj_addr =
1189                        reinterpret_cast<rs_object_base *>(mFieldAddress[i]);
1190                    rsrClearObject(mCtx->getContext(), obj_addr);
1191                }
1192            }
1193        }
1194    }
1195
1196    if (mInvokeFunctions) delete[] mInvokeFunctions;
1197    if (mForEachFunctions) delete[] mForEachFunctions;
1198    if (mFieldAddress) delete[] mFieldAddress;
1199    if (mFieldIsObject) delete[] mFieldIsObject;
1200    if (mForEachSignatures) delete[] mForEachSignatures;
1201    if (mBoundAllocs) delete[] mBoundAllocs;
1202    if (mScriptSO) {
1203        dlclose(mScriptSO);
1204    }
1205}
1206
1207Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
1208    if (!ptr) {
1209        return nullptr;
1210    }
1211
1212    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
1213        Allocation *a = mBoundAllocs[ct];
1214        if (!a) continue;
1215        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
1216            return a;
1217        }
1218    }
1219    ALOGE("rsGetAllocation, failed to find %p", ptr);
1220    return nullptr;
1221}
1222
1223void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
1224                                 uint32_t inLen, Allocation * aout,
1225                                 const void * usr, uint32_t usrLen,
1226                                 const RsScriptCall *sc) {}
1227
1228void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
1229                                  uint32_t inLen, Allocation * aout,
1230                                  const void * usr, uint32_t usrLen,
1231                                  const RsScriptCall *sc) {}
1232
1233
1234}
1235}
1236