rsCpuScript.cpp revision eaba5a3ca215729258dcf9ac6f0bb5f88c78f998
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19
20#ifdef RS_COMPATIBILITY_LIB
21    #include <set>
22    #include <string>
23    #include <dlfcn.h>
24    #include <stdio.h>
25    #include <stdlib.h>
26    #include <string.h>
27    #include <sys/stat.h>
28    #include <unistd.h>
29    #include <fstream>
30    #include <iostream>
31#else
32    #include <bcc/BCCContext.h>
33    #include <bcc/Config/Config.h>
34    #include <bcc/Renderscript/RSCompilerDriver.h>
35    #include <bcc/Renderscript/RSExecutable.h>
36    #include <bcc/Renderscript/RSInfo.h>
37    #include <bcinfo/MetadataExtractor.h>
38    #include <cutils/properties.h>
39
40    #include <sys/types.h>
41    #include <sys/wait.h>
42    #include <unistd.h>
43
44    #include <string>
45    #include <vector>
46#endif
47
48namespace {
49#ifdef RS_COMPATIBILITY_LIB
50
51// Create a len length string containing random characters from [A-Za-z0-9].
52static std::string getRandomString(size_t len) {
53    char buf[len + 1];
54    for (size_t i = 0; i < len; i++) {
55        uint32_t r = arc4random() & 0xffff;
56        r %= 62;
57        if (r < 26) {
58            // lowercase
59            buf[i] = 'a' + r;
60        } else if (r < 52) {
61            // uppercase
62            buf[i] = 'A' + (r - 26);
63        } else {
64            // Use a number
65            buf[i] = '0' + (r - 52);
66        }
67    }
68    buf[len] = '\0';
69    return std::string(buf);
70}
71
72// Check if a path exists and attempt to create it if it doesn't.
73static bool ensureCacheDirExists(const char *path) {
74    if (access(path, R_OK | W_OK | X_OK) == 0) {
75        // Done if we can rwx the directory
76        return true;
77    }
78    if (mkdir(path, 0700) == 0) {
79        return true;
80    }
81    return false;
82}
83
84// Copy the file named \p srcFile to \p dstFile.
85// Return 0 on success and -1 if anything wasn't copied.
86static int copyFile(const char *dstFile, const char *srcFile) {
87    std::ifstream srcStream(srcFile);
88    if (!srcStream) {
89        ALOGE("Could not verify or read source file: %s", srcFile);
90        return -1;
91    }
92    std::ofstream dstStream(dstFile);
93    if (!dstStream) {
94        ALOGE("Could not verify or write destination file: %s", dstFile);
95        return -1;
96    }
97    dstStream << srcStream.rdbuf();
98    if (!dstStream) {
99        ALOGE("Could not write destination file: %s", dstFile);
100        return -1;
101    }
102
103    srcStream.close();
104    dstStream.close();
105
106    return 0;
107}
108
109// Attempt to load the shared library from origName, but then fall back to
110// creating a copy of the shared library if necessary (to ensure instancing).
111// This function returns the dlopen()-ed handle if successful.
112static void *loadSOHelper(const char *origName, const char *cacheDir,
113                          const char *resName) {
114    // Keep track of which .so libraries have been loaded. Once a library is
115    // in the set (per-process granularity), we must instead make a copy of
116    // the original shared object (randomly named .so file) and load that one
117    // instead. If we don't do this, we end up aliasing global data between
118    // the various Script instances (which are supposed to be completely
119    // independent).
120    static std::set<std::string> LoadedLibraries;
121
122    void *loaded = nullptr;
123
124    // Skip everything if we don't even have the original library available.
125    if (access(origName, F_OK) != 0) {
126        return nullptr;
127    }
128
129    // Common path is that we have not loaded this Script/library before.
130    if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
131        loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
132        if (loaded) {
133            LoadedLibraries.insert(origName);
134        }
135        return loaded;
136    }
137
138    std::string newName(cacheDir);
139    newName.append("/com.android.renderscript.cache/");
140
141    if (!ensureCacheDirExists(newName.c_str())) {
142        ALOGE("Could not verify or create cache dir: %s", cacheDir);
143        return nullptr;
144    }
145
146    // Construct an appropriately randomized filename for the copy.
147    newName.append("librs.");
148    newName.append(resName);
149    newName.append("#");
150    newName.append(getRandomString(6));  // 62^6 potential filename variants.
151    newName.append(".so");
152
153    int r = copyFile(newName.c_str(), origName);
154    if (r != 0) {
155        ALOGE("Could not create copy %s -> %s", origName, newName.c_str());
156        return nullptr;
157    }
158    loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
159    r = unlink(newName.c_str());
160    if (r != 0) {
161        ALOGE("Could not unlink copy %s", newName.c_str());
162    }
163    if (loaded) {
164        LoadedLibraries.insert(newName.c_str());
165    }
166
167    return loaded;
168}
169
170// Load the shared library referred to by cacheDir and resName. If we have
171// already loaded this library, we instead create a new copy (in the
172// cache dir) and then load that. We then immediately destroy the copy.
173// This is required behavior to implement script instancing for the support
174// library, since shared objects are loaded and de-duped by name only.
175static void *loadSharedLibrary(const char *cacheDir, const char *resName) {
176    void *loaded = nullptr;
177#ifndef RS_SERVER
178    std::string scriptSOName(cacheDir);
179    size_t cutPos = scriptSOName.rfind("cache");
180    if (cutPos != std::string::npos) {
181        scriptSOName.erase(cutPos);
182    } else {
183        ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
184    }
185    scriptSOName.append("/lib/librs.");
186#else
187    std::string scriptSOName("lib");
188#endif
189    scriptSOName.append(resName);
190    scriptSOName.append(".so");
191
192    // We should check if we can load the library from the standard app
193    // location for shared libraries first.
194    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
195
196    if (loaded == nullptr) {
197        ALOGE("Unable to open shared library (%s): %s",
198              scriptSOName.c_str(), dlerror());
199
200        // One final attempt to find the library in "/system/lib".
201        // We do this to allow bundled applications to use the compatibility
202        // library fallback path. Those applications don't have a private
203        // library path, so they need to install to the system directly.
204        // Note that this is really just a testing path.
205        std::string scriptSONameSystem("/system/lib/librs.");
206        scriptSONameSystem.append(resName);
207        scriptSONameSystem.append(".so");
208        loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
209                              resName);
210        if (loaded == nullptr) {
211            ALOGE("Unable to open system shared library (%s): %s",
212                  scriptSONameSystem.c_str(), dlerror());
213        }
214    }
215
216    return loaded;
217}
218
219#else  // RS_COMPATIBILITY_LIB is not defined
220
221static bool is_force_recompile() {
222#ifdef RS_SERVER
223  return false;
224#else
225  char buf[PROPERTY_VALUE_MAX];
226
227  // Re-compile if floating point precision has been overridden.
228  property_get("debug.rs.precision", buf, "");
229  if (buf[0] != '\0') {
230    return true;
231  }
232
233  // Re-compile if debug.rs.forcerecompile is set.
234  property_get("debug.rs.forcerecompile", buf, "0");
235  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
236    return true;
237  } else {
238    return false;
239  }
240#endif  // RS_SERVER
241}
242
243const static char *BCC_EXE_PATH = "/system/bin/bcc";
244
245static void setCompileArguments(std::vector<const char*>* args,
246                                const std::string& bcFileName,
247                                const char* cacheDir, const char* resName,
248                                const char* core_lib, bool useRSDebugContext,
249                                const char* bccPluginName) {
250    rsAssert(cacheDir && resName && core_lib);
251    args->push_back(BCC_EXE_PATH);
252    args->push_back("-o");
253    args->push_back(resName);
254    args->push_back("-output_path");
255    args->push_back(cacheDir);
256    args->push_back("-bclib");
257    args->push_back(core_lib);
258    args->push_back("-mtriple");
259    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
260
261    // Execute the bcc compiler.
262    if (useRSDebugContext) {
263        args->push_back("-rs-debug-ctx");
264    } else {
265        // Only load additional libraries for compiles that don't use
266        // the debug context.
267        if (bccPluginName && strlen(bccPluginName) > 0) {
268            args->push_back("-load");
269            args->push_back(bccPluginName);
270        }
271    }
272
273    args->push_back(bcFileName.c_str());
274    args->push_back(nullptr);
275}
276
277static bool compileBitcode(const std::string &bcFileName,
278                           const char *bitcode,
279                           size_t bitcodeSize,
280                           const char **compileArguments,
281                           const std::string &compileCommandLine) {
282    rsAssert(bitcode && bitcodeSize);
283
284    FILE *bcfile = fopen(bcFileName.c_str(), "w");
285    if (!bcfile) {
286        ALOGE("Could not write to %s", bcFileName.c_str());
287        return false;
288    }
289    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
290    fclose(bcfile);
291    if (nwritten != bitcodeSize) {
292        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
293              bcFileName.c_str());
294        return false;
295    }
296
297    pid_t pid = fork();
298
299    switch (pid) {
300    case -1: {  // Error occurred (we attempt no recovery)
301        ALOGE("Couldn't fork for bcc compiler execution");
302        return false;
303    }
304    case 0: {  // Child process
305        ALOGV("Invoking BCC with: %s", compileCommandLine.c_str());
306        execv(BCC_EXE_PATH, (char* const*)compileArguments);
307
308        ALOGE("execv() failed: %s", strerror(errno));
309        abort();
310        return false;
311    }
312    default: {  // Parent process (actual driver)
313        // Wait on child process to finish compiling the source.
314        int status = 0;
315        pid_t w = waitpid(pid, &status, 0);
316        if (w == -1) {
317            ALOGE("Could not wait for bcc compiler");
318            return false;
319        }
320
321        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
322            return true;
323        }
324
325        ALOGE("bcc compiler terminated unexpectedly");
326        return false;
327    }
328    }
329}
330
331#endif  // !defined(RS_COMPATIBILITY_LIB)
332}  // namespace
333
334namespace android {
335namespace renderscript {
336
337#ifdef RS_COMPATIBILITY_LIB
338#define MAXLINE 500
339#define MAKE_STR_HELPER(S) #S
340#define MAKE_STR(S) MAKE_STR_HELPER(S)
341#define EXPORT_VAR_STR "exportVarCount: "
342#define EXPORT_FUNC_STR "exportFuncCount: "
343#define EXPORT_FOREACH_STR "exportForEachCount: "
344#define OBJECT_SLOT_STR "objectSlotCount: "
345
346// Copy up to a newline or size chars from str -> s, updating str
347// Returns s when successful and nullptr when '\0' is finally reached.
348static char* strgets(char *s, int size, const char **ppstr) {
349    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
350        return nullptr;
351    }
352
353    int i;
354    for (i = 0; i < (size - 1); i++) {
355        s[i] = **ppstr;
356        (*ppstr)++;
357        if (s[i] == '\0') {
358            return s;
359        } else if (s[i] == '\n') {
360            s[i+1] = '\0';
361            return s;
362        }
363    }
364
365    // size has been exceeded.
366    s[i] = '\0';
367
368    return s;
369}
370#endif
371
372RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
373    mCtx = ctx;
374    mScript = s;
375
376#ifdef RS_COMPATIBILITY_LIB
377    mScriptSO = nullptr;
378    mInvokeFunctions = nullptr;
379    mForEachFunctions = nullptr;
380    mFieldAddress = nullptr;
381    mFieldIsObject = nullptr;
382    mForEachSignatures = nullptr;
383#else
384    mCompilerContext = nullptr;
385    mCompilerDriver = nullptr;
386    mExecutable = nullptr;
387#endif
388
389
390    mRoot = nullptr;
391    mRootExpand = nullptr;
392    mInit = nullptr;
393    mFreeChildren = nullptr;
394
395
396    mBoundAllocs = nullptr;
397    mIntrinsicData = nullptr;
398    mIsThreadable = true;
399}
400
401
402bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
403                            uint8_t const *bitcode, size_t bitcodeSize,
404                            uint32_t flags, char const *bccPluginName) {
405    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
406    //ALOGE("rsdScriptInit %p %p", rsc, script);
407
408    mCtx->lockMutex();
409#ifndef RS_COMPATIBILITY_LIB
410    bool useRSDebugContext = false;
411
412    mCompilerContext = nullptr;
413    mCompilerDriver = nullptr;
414    mExecutable = nullptr;
415
416    mCompilerContext = new bcc::BCCContext();
417    if (mCompilerContext == nullptr) {
418        ALOGE("bcc: FAILS to create compiler context (out of memory)");
419        mCtx->unlockMutex();
420        return false;
421    }
422
423    mCompilerDriver = new bcc::RSCompilerDriver();
424    if (mCompilerDriver == nullptr) {
425        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
426        mCtx->unlockMutex();
427        return false;
428    }
429
430    // Configure symbol resolvers (via compiler-rt and the RS runtime).
431    mRSRuntime.setLookupFunction(lookupRuntimeStub);
432    mRSRuntime.setContext(this);
433    mResolver.chainResolver(mCompilerRuntime);
434    mResolver.chainResolver(mRSRuntime);
435
436    // Run any compiler setup functions we have been provided with.
437    RSSetupCompilerCallback setupCompilerCallback =
438            mCtx->getSetupCompilerCallback();
439    if (setupCompilerCallback != nullptr) {
440        setupCompilerCallback(mCompilerDriver);
441    }
442
443    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
444    if (!bitcodeMetadata.extract()) {
445        ALOGE("Could not extract metadata from bitcode");
446        mCtx->unlockMutex();
447        return false;
448    }
449
450    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
451
452    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
453        mCompilerDriver->setDebugContext(true);
454        useRSDebugContext = true;
455    }
456
457    std::string bcFileName(cacheDir);
458    bcFileName.append("/");
459    bcFileName.append(resName);
460    bcFileName.append(".bc");
461
462    std::vector<const char*> compileArguments;
463    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
464                        useRSDebugContext, bccPluginName);
465    // The last argument of compileArguments ia a nullptr, so remove 1 from the size.
466    std::string compileCommandLine =
467                bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data());
468
469    if (!is_force_recompile()) {
470        // Load the compiled script that's in the cache, if any.
471        mExecutable = bcc::RSCompilerDriver::loadScript(cacheDir, resName, (const char*)bitcode,
472                                                        bitcodeSize, compileCommandLine.c_str(),
473                                                        mResolver);
474    }
475
476    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
477    // again.
478    if (mExecutable == nullptr) {
479        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize, compileArguments.data(),
480                            compileCommandLine)) {
481            ALOGE("bcc: FAILS to compile '%s'", resName);
482            mCtx->unlockMutex();
483            return false;
484        }
485        mExecutable = bcc::RSCompilerDriver::loadScript(cacheDir, resName, (const char*)bitcode,
486                                                        bitcodeSize, compileCommandLine.c_str(),
487                                                        mResolver);
488        if (mExecutable == nullptr) {
489            ALOGE("bcc: FAILS to load freshly compiled executable for '%s'", resName);
490            mCtx->unlockMutex();
491            return false;
492        }
493    }
494
495    mExecutable->setThreadable(mIsThreadable);
496    if (!mExecutable->syncInfo()) {
497        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
498    }
499
500    mRoot = reinterpret_cast<int (*)()>(mExecutable->getSymbolAddress("root"));
501    mRootExpand =
502        reinterpret_cast<int (*)()>(mExecutable->getSymbolAddress("root.expand"));
503    mInit = reinterpret_cast<void (*)()>(mExecutable->getSymbolAddress("init"));
504    mFreeChildren =
505        reinterpret_cast<void (*)()>(mExecutable->getSymbolAddress(".rs.dtor"));
506
507
508    if (bitcodeMetadata.getExportVarCount()) {
509        mBoundAllocs = new Allocation *[bitcodeMetadata.getExportVarCount()];
510        memset(mBoundAllocs, 0, sizeof(void *) * bitcodeMetadata.getExportVarCount());
511    }
512
513    for (size_t i = 0; i < bitcodeMetadata.getExportForEachSignatureCount(); i++) {
514        char* name = new char[strlen(bitcodeMetadata.getExportForEachNameList()[i]) + 1];
515        mExportedForEachFuncList.push_back(
516                    std::make_pair(name, bitcodeMetadata.getExportForEachSignatureList()[i]));
517    }
518
519#else  // RS_COMPATIBILITY_LIB is defined
520
521    mScriptSO = loadSharedLibrary(cacheDir, resName);
522
523    if (mScriptSO) {
524        char line[MAXLINE];
525        mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
526        if (mRoot) {
527            //ALOGE("Found root(): %p", mRoot);
528        }
529        mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
530        if (mRootExpand) {
531            //ALOGE("Found root.expand(): %p", mRootExpand);
532        }
533        mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
534        if (mInit) {
535            //ALOGE("Found init(): %p", mInit);
536        }
537        mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
538        if (mFreeChildren) {
539            //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
540        }
541
542        const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info");
543        if (rsInfo) {
544            //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo);
545        }
546
547        size_t varCount = 0;
548        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
549            goto error;
550        }
551        if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
552            ALOGE("Invalid export var count!: %s", line);
553            goto error;
554        }
555
556        mExportedVariableCount = varCount;
557        //ALOGE("varCount: %zu", varCount);
558        if (varCount > 0) {
559            // Start by creating/zeroing this member, since we don't want to
560            // accidentally clean up invalid pointers later (if we error out).
561            mFieldIsObject = new bool[varCount];
562            if (mFieldIsObject == nullptr) {
563                goto error;
564            }
565            memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject));
566            mFieldAddress = new void*[varCount];
567            if (mFieldAddress == nullptr) {
568                goto error;
569            }
570            for (size_t i = 0; i < varCount; ++i) {
571                if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
572                    goto error;
573                }
574                char *c = strrchr(line, '\n');
575                if (c) {
576                    *c = '\0';
577                }
578                mFieldAddress[i] = dlsym(mScriptSO, line);
579                if (mFieldAddress[i] == nullptr) {
580                    ALOGE("Failed to find variable address for %s: %s",
581                          line, dlerror());
582                    // Not a critical error if we don't find a global variable.
583                }
584                else {
585                    //ALOGE("Found variable %s at %p", line,
586                    //mFieldAddress[i]);
587                }
588            }
589        }
590
591        size_t funcCount = 0;
592        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
593            goto error;
594        }
595        if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
596            ALOGE("Invalid export func count!: %s", line);
597            goto error;
598        }
599
600        mExportedFunctionCount = funcCount;
601        //ALOGE("funcCount: %zu", funcCount);
602
603        if (funcCount > 0) {
604            mInvokeFunctions = new InvokeFunc_t[funcCount];
605            if (mInvokeFunctions == nullptr) {
606                goto error;
607            }
608            for (size_t i = 0; i < funcCount; ++i) {
609                if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
610                    goto error;
611                }
612                char *c = strrchr(line, '\n');
613                if (c) {
614                    *c = '\0';
615                }
616
617                mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line);
618                if (mInvokeFunctions[i] == nullptr) {
619                    ALOGE("Failed to get function address for %s(): %s",
620                          line, dlerror());
621                    goto error;
622                }
623                else {
624                    //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]);
625                }
626            }
627        }
628
629        size_t forEachCount = 0;
630        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
631            goto error;
632        }
633        if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
634            ALOGE("Invalid export forEach count!: %s", line);
635            goto error;
636        }
637
638        if (forEachCount > 0) {
639
640            mForEachSignatures = new uint32_t[forEachCount];
641            if (mForEachSignatures == nullptr) {
642                goto error;
643            }
644            mForEachFunctions = new ForEachFunc_t[forEachCount];
645            if (mForEachFunctions == nullptr) {
646                goto error;
647            }
648            for (size_t i = 0; i < forEachCount; ++i) {
649                unsigned int tmpSig = 0;
650                char tmpName[MAXLINE];
651
652                if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
653                    goto error;
654                }
655                if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
656                           &tmpSig, tmpName) != 2) {
657                    ALOGE("Invalid export forEach!: %s", line);
658                    goto error;
659                }
660
661                // Lookup the expanded ForEach kernel.
662                strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
663                mForEachSignatures[i] = tmpSig;
664                mForEachFunctions[i] =
665                        (ForEachFunc_t) dlsym(mScriptSO, tmpName);
666                if (i != 0 && mForEachFunctions[i] == nullptr) {
667                    // Ignore missing root.expand functions.
668                    // root() is always specified at location 0.
669                    ALOGE("Failed to find forEach function address for %s: %s",
670                          tmpName, dlerror());
671                    goto error;
672                }
673                else {
674                    //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]);
675                }
676            }
677        }
678
679        size_t objectSlotCount = 0;
680        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
681            goto error;
682        }
683        if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
684            ALOGE("Invalid object slot count!: %s", line);
685            goto error;
686        }
687
688        if (objectSlotCount > 0) {
689            rsAssert(varCount > 0);
690            for (size_t i = 0; i < objectSlotCount; ++i) {
691                uint32_t varNum = 0;
692                if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
693                    goto error;
694                }
695                if (sscanf(line, "%u", &varNum) != 1) {
696                    ALOGE("Invalid object slot!: %s", line);
697                    goto error;
698                }
699
700                if (varNum < varCount) {
701                    mFieldIsObject[varNum] = true;
702                }
703            }
704        }
705
706        if (varCount > 0) {
707            mBoundAllocs = new Allocation *[varCount];
708            memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
709        }
710
711        if (mScriptSO == (void*)1) {
712            //rsdLookupRuntimeStub(script, "acos");
713        }
714    } else {
715        goto error;
716    }
717#endif
718    mCtx->unlockMutex();
719    return true;
720
721#ifdef RS_COMPATIBILITY_LIB
722error:
723
724    mCtx->unlockMutex();
725    delete[] mInvokeFunctions;
726    delete[] mForEachFunctions;
727    delete[] mFieldAddress;
728    delete[] mFieldIsObject;
729    delete[] mForEachSignatures;
730    delete[] mBoundAllocs;
731    if (mScriptSO) {
732        dlclose(mScriptSO);
733    }
734    return false;
735#endif
736}
737
738#ifndef RS_COMPATIBILITY_LIB
739
740#ifdef __LP64__
741#define SYSLIBPATH "/system/lib64"
742#else
743#define SYSLIBPATH "/system/lib"
744#endif
745
746const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
747                                          size_t bitcodeSize) {
748    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
749
750    // If we're debugging, use the debug library.
751    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
752        return SYSLIBPATH"/libclcore_debug.bc";
753    }
754
755    // If a callback has been registered to specify a library, use that.
756    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
757    if (selectRTCallback != nullptr) {
758        return selectRTCallback((const char*)bitcode, bitcodeSize);
759    }
760
761    // Check for a platform specific library
762#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
763    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
764    if (prec == bcinfo::RS_FP_Relaxed) {
765        // NEON-capable ARMv7a devices can use an accelerated math library
766        // for all reduced precision scripts.
767        // ARMv8 does not use NEON, as ASIMD can be used with all precision
768        // levels.
769        return SYSLIBPATH"/libclcore_neon.bc";
770    } else {
771        return defaultLib;
772    }
773#elif defined(__i386__) || defined(__x86_64__)
774    // x86 devices will use an optimized library.
775    return SYSLIBPATH"/libclcore_x86.bc";
776#else
777    return defaultLib;
778#endif
779}
780
781#endif
782
783void RsdCpuScriptImpl::populateScript(Script *script) {
784#ifndef RS_COMPATIBILITY_LIB
785    // Copy info over to runtime
786    script->mHal.info.exportedFunctionCount = mExecutable->getExportFuncAddrs().size();
787    script->mHal.info.exportedVariableCount = mExecutable->getExportVarAddrs().size();
788    script->mHal.info.exportedForeachFuncList = &mExportedForEachFuncList[0];
789    script->mHal.info.exportedPragmaCount = mExecutable->getPragmaKeys().size();
790    script->mHal.info.exportedPragmaKeyList =
791        const_cast<const char**>(&mExecutable->getPragmaKeys().front());
792    script->mHal.info.exportedPragmaValueList =
793        const_cast<const char**>(&mExecutable->getPragmaValues().front());
794
795    if (mRootExpand) {
796        script->mHal.info.root = mRootExpand;
797    } else {
798        script->mHal.info.root = mRoot;
799    }
800#else
801    // Copy info over to runtime
802    script->mHal.info.exportedFunctionCount = mExportedFunctionCount;
803    script->mHal.info.exportedVariableCount = mExportedVariableCount;
804    script->mHal.info.exportedPragmaCount = 0;
805    script->mHal.info.exportedPragmaKeyList = 0;
806    script->mHal.info.exportedPragmaValueList = 0;
807
808    // Bug, need to stash in metadata
809    if (mRootExpand) {
810        script->mHal.info.root = mRootExpand;
811    } else {
812        script->mHal.info.root = mRoot;
813    }
814#endif
815}
816
817
818typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
819
820void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
821                                        uint32_t inLen,
822                                        Allocation * aout,
823                                        const void * usr, uint32_t usrLen,
824                                        const RsScriptCall *sc,
825                                        MTLaunchStruct *mtls) {
826
827    memset(mtls, 0, sizeof(MTLaunchStruct));
828
829    for (int index = inLen; --index >= 0;) {
830        const Allocation* ain = ains[index];
831
832        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
833        if (ain != nullptr &&
834            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
835
836            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
837                                         "rsForEach called with null in allocations");
838            return;
839        }
840    }
841
842    if (aout &&
843        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
844
845        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
846                                     "rsForEach called with null out allocations");
847        return;
848    }
849
850    if (inLen > 0) {
851        const Allocation *ain0   = ains[0];
852        const Type       *inType = ain0->getType();
853
854        mtls->fep.dimX = inType->getDimX();
855        mtls->fep.dimY = inType->getDimY();
856        mtls->fep.dimZ = inType->getDimZ();
857
858        for (int Index = inLen; --Index >= 1;) {
859            if (!ain0->hasSameDims(ains[Index])) {
860                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
861                  "Failed to launch kernel; dimensions of input and output allocations do not match.");
862
863                return;
864            }
865        }
866
867    } else if (aout != nullptr) {
868        const Type *outType = aout->getType();
869
870        mtls->fep.dimX = outType->getDimX();
871        mtls->fep.dimY = outType->getDimY();
872        mtls->fep.dimZ = outType->getDimZ();
873
874    } else {
875        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
876                                     "rsForEach called with null allocations");
877        return;
878    }
879
880    if (inLen > 0 && aout != nullptr) {
881        if (!ains[0]->hasSameDims(aout)) {
882            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
883              "Failed to launch kernel; dimensions of input and output allocations do not match.");
884
885            return;
886        }
887    }
888
889    if (!sc || (sc->xEnd == 0)) {
890        mtls->xEnd = mtls->fep.dimX;
891    } else {
892        rsAssert(sc->xStart < mtls->fep.dimX);
893        rsAssert(sc->xEnd <= mtls->fep.dimX);
894        rsAssert(sc->xStart < sc->xEnd);
895        mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
896        mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
897        if (mtls->xStart >= mtls->xEnd) return;
898    }
899
900    if (!sc || (sc->yEnd == 0)) {
901        mtls->yEnd = mtls->fep.dimY;
902    } else {
903        rsAssert(sc->yStart < mtls->fep.dimY);
904        rsAssert(sc->yEnd <= mtls->fep.dimY);
905        rsAssert(sc->yStart < sc->yEnd);
906        mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
907        mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
908        if (mtls->yStart >= mtls->yEnd) return;
909    }
910
911    if (!sc || (sc->zEnd == 0)) {
912        mtls->zEnd = mtls->fep.dimZ;
913    } else {
914        rsAssert(sc->zStart < mtls->fep.dimZ);
915        rsAssert(sc->zEnd <= mtls->fep.dimZ);
916        rsAssert(sc->zStart < sc->zEnd);
917        mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
918        mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
919        if (mtls->zStart >= mtls->zEnd) return;
920    }
921
922    mtls->xEnd     = rsMax((uint32_t)1, mtls->xEnd);
923    mtls->yEnd     = rsMax((uint32_t)1, mtls->yEnd);
924    mtls->zEnd     = rsMax((uint32_t)1, mtls->zEnd);
925    mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
926
927    rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0));
928
929    mtls->rsc        = mCtx;
930    mtls->ains       = ains;
931    mtls->aout       = aout;
932    mtls->fep.usr    = usr;
933    mtls->fep.usrLen = usrLen;
934    mtls->mSliceSize = 1;
935    mtls->mSliceNum  = 0;
936
937    mtls->fep.inPtrs    = nullptr;
938    mtls->fep.inStrides = nullptr;
939    mtls->isThreadable  = mIsThreadable;
940
941    if (inLen > 0) {
942
943        if (inLen <= RS_KERNEL_INPUT_THRESHOLD) {
944            mtls->fep.inPtrs    = (const uint8_t**)mtls->inPtrsBuff;
945            mtls->fep.inStrides = mtls->inStridesBuff;
946        } else {
947            mtls->fep.heapAllocatedArrays = true;
948
949            mtls->fep.inPtrs    = new const uint8_t*[inLen];
950            mtls->fep.inStrides = new StridePair[inLen];
951        }
952
953        mtls->fep.inLen = inLen;
954
955        for (int index = inLen; --index >= 0;) {
956            const Allocation *ain = ains[index];
957
958            mtls->fep.inPtrs[index] =
959              (const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr;
960
961            mtls->fep.inStrides[index].eStride =
962              ain->getType()->getElementSizeBytes();
963            mtls->fep.inStrides[index].yStride =
964              ain->mHal.drvState.lod[0].stride;
965        }
966    }
967
968    mtls->fep.outPtr            = nullptr;
969    mtls->fep.outStride.eStride = 0;
970    mtls->fep.outStride.yStride = 0;
971    if (aout != nullptr) {
972        mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
973
974        mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes();
975        mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride;
976    }
977}
978
979
980void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
981                                     const Allocation ** ains,
982                                     uint32_t inLen,
983                                     Allocation * aout,
984                                     const void * usr,
985                                     uint32_t usrLen,
986                                     const RsScriptCall *sc) {
987
988    MTLaunchStruct mtls;
989
990    forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
991    forEachKernelSetup(slot, &mtls);
992
993    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
994    mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
995    mCtx->setTLS(oldTLS);
996}
997
998void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
999    mtls->script = this;
1000    mtls->fep.slot = slot;
1001#ifndef RS_COMPATIBILITY_LIB
1002    rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
1003    mtls->kernel = reinterpret_cast<ForEachFunc_t>(
1004                      mExecutable->getExportForeachFuncAddrs()[slot]);
1005    rsAssert(mtls->kernel != nullptr);
1006    mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
1007#else
1008    mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]);
1009    rsAssert(mtls->kernel != nullptr);
1010    mtls->sig = mForEachSignatures[slot];
1011#endif
1012}
1013
1014int RsdCpuScriptImpl::invokeRoot() {
1015    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1016    int ret = mRoot();
1017    mCtx->setTLS(oldTLS);
1018    return ret;
1019}
1020
1021void RsdCpuScriptImpl::invokeInit() {
1022    if (mInit) {
1023        mInit();
1024    }
1025}
1026
1027void RsdCpuScriptImpl::invokeFreeChildren() {
1028    if (mFreeChildren) {
1029        mFreeChildren();
1030    }
1031}
1032
1033void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
1034                                      size_t paramLength) {
1035    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
1036    void * ap = nullptr;
1037
1038#if defined(__x86_64__)
1039    // The invoked function could have input parameter of vector type for example float4 which
1040    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
1041    // So try to align void* params before passing them into RS exported function.
1042
1043    if ((uint8_t)(uint64_t)params & 0x0F) {
1044        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
1045            memcpy(ap, params, paramLength);
1046        } else {
1047            ALOGE("x86_64: invokeFunction memalign error, still use params which is not 16 bytes aligned.");
1048        }
1049    }
1050#endif
1051
1052    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
1053    reinterpret_cast<void (*)(const void *, uint32_t)>(
1054#ifndef RS_COMPATIBILITY_LIB
1055        mExecutable->getExportFuncAddrs()[slot])(ap ? (const void *)ap : params, paramLength);
1056#else
1057        mInvokeFunctions[slot])(ap ? (const void *)ap : params, paramLength);
1058#endif
1059
1060#if defined(__x86_64__)
1061    if (ap) free(ap);
1062#endif
1063
1064    mCtx->setTLS(oldTLS);
1065}
1066
1067void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
1068    //rsAssert(!script->mFieldIsObject[slot]);
1069    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
1070
1071    //if (mIntrinsicID) {
1072        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
1073        //return;
1074    //}
1075
1076#ifndef RS_COMPATIBILITY_LIB
1077    int32_t *destPtr = reinterpret_cast<int32_t *>(
1078                          mExecutable->getExportVarAddrs()[slot]);
1079#else
1080    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1081#endif
1082    if (!destPtr) {
1083        //ALOGV("Calling setVar on slot = %i which is null", slot);
1084        return;
1085    }
1086
1087    memcpy(destPtr, data, dataLength);
1088}
1089
1090void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
1091    //rsAssert(!script->mFieldIsObject[slot]);
1092    //ALOGE("getGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
1093
1094#ifndef RS_COMPATIBILITY_LIB
1095    int32_t *srcPtr = reinterpret_cast<int32_t *>(
1096                          mExecutable->getExportVarAddrs()[slot]);
1097#else
1098    int32_t *srcPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1099#endif
1100    if (!srcPtr) {
1101        //ALOGV("Calling setVar on slot = %i which is null", slot);
1102        return;
1103    }
1104    memcpy(data, srcPtr, dataLength);
1105}
1106
1107
1108void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
1109                                                const Element *elem,
1110                                                const uint32_t *dims, size_t dimLength) {
1111
1112#ifndef RS_COMPATIBILITY_LIB
1113    int32_t *destPtr = reinterpret_cast<int32_t *>(
1114        mExecutable->getExportVarAddrs()[slot]);
1115#else
1116    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1117#endif
1118    if (!destPtr) {
1119        //ALOGV("Calling setVar on slot = %i which is null", slot);
1120        return;
1121    }
1122
1123    // We want to look at dimension in terms of integer components,
1124    // but dimLength is given in terms of bytes.
1125    dimLength /= sizeof(int);
1126
1127    // Only a single dimension is currently supported.
1128    rsAssert(dimLength == 1);
1129    if (dimLength == 1) {
1130        // First do the increment loop.
1131        size_t stride = elem->getSizeBytes();
1132        const char *cVal = reinterpret_cast<const char *>(data);
1133        for (uint32_t i = 0; i < dims[0]; i++) {
1134            elem->incRefs(cVal);
1135            cVal += stride;
1136        }
1137
1138        // Decrement loop comes after (to prevent race conditions).
1139        char *oldVal = reinterpret_cast<char *>(destPtr);
1140        for (uint32_t i = 0; i < dims[0]; i++) {
1141            elem->decRefs(oldVal);
1142            oldVal += stride;
1143        }
1144    }
1145
1146    memcpy(destPtr, data, dataLength);
1147}
1148
1149void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
1150
1151    //rsAssert(!script->mFieldIsObject[slot]);
1152    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
1153
1154#ifndef RS_COMPATIBILITY_LIB
1155    int32_t *destPtr = reinterpret_cast<int32_t *>(
1156                          mExecutable->getExportVarAddrs()[slot]);
1157#else
1158    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1159#endif
1160    if (!destPtr) {
1161        //ALOGV("Calling setVar on slot = %i which is null", slot);
1162        return;
1163    }
1164
1165    void *ptr = nullptr;
1166    mBoundAllocs[slot] = data;
1167    if(data) {
1168        ptr = data->mHal.drvState.lod[0].mallocPtr;
1169    }
1170    memcpy(destPtr, &ptr, sizeof(void *));
1171}
1172
1173void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
1174
1175    //rsAssert(script->mFieldIsObject[slot]);
1176    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
1177
1178#ifndef RS_COMPATIBILITY_LIB
1179    int32_t *destPtr = reinterpret_cast<int32_t *>(
1180                          mExecutable->getExportVarAddrs()[slot]);
1181#else
1182    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1183#endif
1184
1185    if (!destPtr) {
1186        //ALOGV("Calling setVar on slot = %i which is null", slot);
1187        return;
1188    }
1189
1190    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
1191}
1192
1193RsdCpuScriptImpl::~RsdCpuScriptImpl() {
1194#ifndef RS_COMPATIBILITY_LIB
1195    if (mExecutable) {
1196        std::vector<void *>::const_iterator var_addr_iter =
1197            mExecutable->getExportVarAddrs().begin();
1198        std::vector<void *>::const_iterator var_addr_end =
1199            mExecutable->getExportVarAddrs().end();
1200
1201        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
1202            mExecutable->getInfo().getObjectSlots().begin();
1203        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
1204            mExecutable->getInfo().getObjectSlots().end();
1205
1206        while ((var_addr_iter != var_addr_end) &&
1207               (is_object_iter != is_object_end)) {
1208            // The field address can be nullptr if the script-side has optimized
1209            // the corresponding global variable away.
1210            rs_object_base *obj_addr =
1211                reinterpret_cast<rs_object_base *>(*var_addr_iter);
1212            if (*is_object_iter) {
1213                if (*var_addr_iter != nullptr && mCtx->getContext() != nullptr) {
1214                    rsrClearObject(mCtx->getContext(), obj_addr);
1215                }
1216            }
1217            var_addr_iter++;
1218            is_object_iter++;
1219        }
1220    }
1221
1222    if (mCompilerContext) {
1223        delete mCompilerContext;
1224    }
1225    if (mCompilerDriver) {
1226        delete mCompilerDriver;
1227    }
1228    if (mExecutable) {
1229        delete mExecutable;
1230    }
1231    if (mBoundAllocs) {
1232        delete[] mBoundAllocs;
1233    }
1234
1235    for (size_t i = 0; i < mExportedForEachFuncList.size(); i++) {
1236        delete[] mExportedForEachFuncList[i].first;
1237    }
1238#else
1239    if (mFieldIsObject) {
1240        for (size_t i = 0; i < mExportedVariableCount; ++i) {
1241            if (mFieldIsObject[i]) {
1242                if (mFieldAddress[i] != nullptr) {
1243                    rs_object_base *obj_addr =
1244                        reinterpret_cast<rs_object_base *>(mFieldAddress[i]);
1245                    rsrClearObject(mCtx->getContext(), obj_addr);
1246                }
1247            }
1248        }
1249    }
1250
1251    if (mInvokeFunctions) delete[] mInvokeFunctions;
1252    if (mForEachFunctions) delete[] mForEachFunctions;
1253    if (mFieldAddress) delete[] mFieldAddress;
1254    if (mFieldIsObject) delete[] mFieldIsObject;
1255    if (mForEachSignatures) delete[] mForEachSignatures;
1256    if (mBoundAllocs) delete[] mBoundAllocs;
1257    if (mScriptSO) {
1258        dlclose(mScriptSO);
1259    }
1260#endif
1261}
1262
1263Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
1264    if (!ptr) {
1265        return nullptr;
1266    }
1267
1268    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
1269        Allocation *a = mBoundAllocs[ct];
1270        if (!a) continue;
1271        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
1272            return a;
1273        }
1274    }
1275    ALOGE("rsGetAllocation, failed to find %p", ptr);
1276    return nullptr;
1277}
1278
1279void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
1280                                 uint32_t inLen, Allocation * aout,
1281                                 const void * usr, uint32_t usrLen,
1282                                 const RsScriptCall *sc) {}
1283
1284void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
1285                                  uint32_t inLen, Allocation * aout,
1286                                  const void * usr, uint32_t usrLen,
1287                                  const RsScriptCall *sc) {}
1288
1289
1290}
1291}
1292