rsCpuScript.cpp revision e195a3f57ace3b66d313a6ee88c6e93d5c9d87f4
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7< *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19
20#ifndef FAKE_ARM64_BUILD
21#ifdef RS_COMPATIBILITY_LIB
22    #include <set>
23    #include <string>
24    #include <dlfcn.h>
25    #include <stdio.h>
26    #include <stdlib.h>
27    #include <string.h>
28    #include <sys/stat.h>
29    #include <unistd.h>
30#else
31    #include <bcc/BCCContext.h>
32    #include <bcc/Renderscript/RSCompilerDriver.h>
33    #include <bcc/Renderscript/RSExecutable.h>
34    #include <bcc/Renderscript/RSInfo.h>
35    #include <bcinfo/MetadataExtractor.h>
36    #include <cutils/properties.h>
37
38    #include <sys/types.h>
39    #include <sys/wait.h>
40    #include <unistd.h>
41#endif
42#endif
43
44namespace {
45#ifndef FAKE_ARM64_BUILD
46#ifdef RS_COMPATIBILITY_LIB
47
48// Create a len length string containing random characters from [A-Za-z0-9].
49static std::string getRandomString(size_t len) {
50    char buf[len + 1];
51    for (size_t i = 0; i < len; i++) {
52        uint32_t r = arc4random() & 0xffff;
53        r %= 62;
54        if (r < 26) {
55            // lowercase
56            buf[i] = 'a' + r;
57        } else if (r < 52) {
58            // uppercase
59            buf[i] = 'A' + (r - 26);
60        } else {
61            // Use a number
62            buf[i] = '0' + (r - 52);
63        }
64    }
65    buf[len] = '\0';
66    return std::string(buf);
67}
68
69// Check if a path exists and attempt to create it if it doesn't.
70static bool ensureCacheDirExists(const char *path) {
71    if (access(path, R_OK | W_OK | X_OK) == 0) {
72        // Done if we can rwx the directory
73        return true;
74    }
75    if (mkdir(path, 0700) == 0) {
76        return true;
77    }
78    return false;
79}
80
81// Attempt to load the shared library from origName, but then fall back to
82// creating the symlinked shared library if necessary (to ensure instancing).
83// This function returns the dlopen()-ed handle if successful.
84static void *loadSOHelper(const char *origName, const char *cacheDir,
85                          const char *resName) {
86    // Keep track of which .so libraries have been loaded. Once a library is
87    // in the set (per-process granularity), we must instead make a symlink to
88    // the original shared object (randomly named .so file) and load that one
89    // instead. If we don't do this, we end up aliasing global data between
90    // the various Script instances (which are supposed to be completely
91    // independent).
92    static std::set<std::string> LoadedLibraries;
93
94    void *loaded = NULL;
95
96    // Skip everything if we don't even have the original library available.
97    if (access(origName, F_OK) != 0) {
98        return NULL;
99    }
100
101    // Common path is that we have not loaded this Script/library before.
102    if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
103        loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
104        if (loaded) {
105            LoadedLibraries.insert(origName);
106        }
107        return loaded;
108    }
109
110    std::string newName(cacheDir);
111    newName.append("/com.android.renderscript.cache/");
112
113    if (!ensureCacheDirExists(newName.c_str())) {
114        ALOGE("Could not verify or create cache dir: %s", cacheDir);
115        return NULL;
116    }
117
118    // Construct an appropriately randomized filename for the symlink.
119    newName.append("librs.");
120    newName.append(resName);
121    newName.append("#");
122    newName.append(getRandomString(6));  // 62^6 potential filename variants.
123    newName.append(".so");
124
125    int r = symlink(origName, newName.c_str());
126    if (r != 0) {
127        ALOGE("Could not create symlink %s -> %s", newName.c_str(), origName);
128        return NULL;
129    }
130    loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
131    r = unlink(newName.c_str());
132    if (r != 0) {
133        ALOGE("Could not unlink symlink %s", newName.c_str());
134    }
135    if (loaded) {
136        LoadedLibraries.insert(newName.c_str());
137    }
138
139    return loaded;
140}
141
142// Load the shared library referred to by cacheDir and resName. If we have
143// already loaded this library, we instead create a new symlink (in the
144// cache dir) and then load that. We then immediately destroy the symlink.
145// This is required behavior to implement script instancing for the support
146// library, since shared objects are loaded and de-duped by name only.
147static void *loadSharedLibrary(const char *cacheDir, const char *resName) {
148    void *loaded = NULL;
149    //arc4random_stir();
150#ifndef RS_SERVER
151    std::string scriptSOName(cacheDir);
152    size_t cutPos = scriptSOName.rfind("cache");
153    if (cutPos != std::string::npos) {
154        scriptSOName.erase(cutPos);
155    } else {
156        ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
157    }
158    scriptSOName.append("/lib/librs.");
159#else
160    std::string scriptSOName("lib");
161#endif
162    scriptSOName.append(resName);
163    scriptSOName.append(".so");
164
165    // We should check if we can load the library from the standard app
166    // location for shared libraries first.
167    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
168
169    if (loaded == NULL) {
170        ALOGE("Unable to open shared library (%s): %s",
171              scriptSOName.c_str(), dlerror());
172
173        // One final attempt to find the library in "/system/lib".
174        // We do this to allow bundled applications to use the compatibility
175        // library fallback path. Those applications don't have a private
176        // library path, so they need to install to the system directly.
177        // Note that this is really just a testing path.
178        android::String8 scriptSONameSystem("/system/lib/librs.");
179        scriptSONameSystem.append(resName);
180        scriptSONameSystem.append(".so");
181        loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
182                              resName);
183        if (loaded == NULL) {
184            ALOGE("Unable to open system shared library (%s): %s",
185                  scriptSONameSystem.c_str(), dlerror());
186        }
187    }
188
189    return loaded;
190}
191
192
193#else
194static bool is_force_recompile() {
195#ifdef RS_SERVER
196  return false;
197#else
198  char buf[PROPERTY_VALUE_MAX];
199
200  // Re-compile if floating point precision has been overridden.
201  property_get("debug.rs.precision", buf, "");
202  if (buf[0] != '\0') {
203    return true;
204  }
205
206  // Re-compile if debug.rs.forcerecompile is set.
207  property_get("debug.rs.forcerecompile", buf, "0");
208  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
209    return true;
210  } else {
211    return false;
212  }
213#endif  // RS_SERVER
214}
215
216//#define EXTERNAL_BCC_COMPILER 1
217#ifdef EXTERNAL_BCC_COMPILER
218const static char *BCC_EXE_PATH = "/system/bin/bcc";
219
220static bool compileBitcode(const char *cacheDir,
221                           const char *resName,
222                           const char *bitcode,
223                           size_t bitcodeSize,
224                           const char *core_lib) {
225    rsAssert(cacheDir && resName && bitcode && bitcodeSize && core_lib);
226
227    android::String8 bcFilename(cacheDir);
228    bcFilename.append("/");
229    bcFilename.append(resName);
230    bcFilename.append(".bc");
231    FILE *bcfile = fopen(bcFilename.string(), "w");
232    if (!bcfile) {
233        ALOGE("Could not write to %s", bcFilename.string());
234        return false;
235    }
236    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
237    fclose(bcfile);
238    if (nwritten != bitcodeSize) {
239        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
240              bcFilename.string());
241        return false;
242    }
243
244    pid_t pid = fork();
245    switch (pid) {
246    case -1: {  // Error occurred (we attempt no recovery)
247        ALOGE("Couldn't fork for bcc compiler execution");
248        return false;
249    }
250    case 0: {  // Child process
251        // Execute the bcc compiler.
252        execl(BCC_EXE_PATH,
253              BCC_EXE_PATH,
254              "-o", resName,
255              "-output_path", cacheDir,
256              "-bclib", core_lib,
257              bcFilename.string(),
258              (char *) NULL);
259        ALOGE("execl() failed: %s", strerror(errno));
260        abort();
261        return false;
262    }
263    default: {  // Parent process (actual driver)
264        // Wait on child process to finish compiling the source.
265        int status = 0;
266        pid_t w = waitpid(pid, &status, 0);
267        if (w == -1) {
268            ALOGE("Could not wait for bcc compiler");
269            return false;
270        }
271
272        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
273            return true;
274        }
275
276        ALOGE("bcc compiler terminated unexpectedly");
277        return false;
278    }
279    }
280}
281#endif  // EXTERNAL_BCC_COMPILER
282
283#endif  // !defined(RS_COMPATIBILITY_LIB)
284#endif
285}  // namespace
286
287namespace android {
288namespace renderscript {
289
290#ifdef RS_COMPATIBILITY_LIB
291#define MAXLINE 500
292#define MAKE_STR_HELPER(S) #S
293#define MAKE_STR(S) MAKE_STR_HELPER(S)
294#define EXPORT_VAR_STR "exportVarCount: "
295#define EXPORT_VAR_STR_LEN strlen(EXPORT_VAR_STR)
296#define EXPORT_FUNC_STR "exportFuncCount: "
297#define EXPORT_FUNC_STR_LEN strlen(EXPORT_FUNC_STR)
298#define EXPORT_FOREACH_STR "exportForEachCount: "
299#define EXPORT_FOREACH_STR_LEN strlen(EXPORT_FOREACH_STR)
300#define OBJECT_SLOT_STR "objectSlotCount: "
301#define OBJECT_SLOT_STR_LEN strlen(OBJECT_SLOT_STR)
302
303// Copy up to a newline or size chars from str -> s, updating str
304// Returns s when successful and NULL when '\0' is finally reached.
305static char* strgets(char *s, int size, const char **ppstr) {
306    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
307        return NULL;
308    }
309
310    int i;
311    for (i = 0; i < (size - 1); i++) {
312        s[i] = **ppstr;
313        (*ppstr)++;
314        if (s[i] == '\0') {
315            return s;
316        } else if (s[i] == '\n') {
317            s[i+1] = '\0';
318            return s;
319        }
320    }
321
322    // size has been exceeded.
323    s[i] = '\0';
324
325    return s;
326}
327#endif
328
329RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
330    mCtx = ctx;
331    mScript = s;
332
333#ifndef FAKE_ARM64_BUILD
334#ifdef RS_COMPATIBILITY_LIB
335    mScriptSO = NULL;
336    mInvokeFunctions = NULL;
337    mForEachFunctions = NULL;
338    mFieldAddress = NULL;
339    mFieldIsObject = NULL;
340    mForEachSignatures = NULL;
341#else
342    mCompilerContext = NULL;
343    mCompilerDriver = NULL;
344    mExecutable = NULL;
345#endif
346
347
348    mRoot = NULL;
349    mRootExpand = NULL;
350    mInit = NULL;
351    mFreeChildren = NULL;
352
353
354    mBoundAllocs = NULL;
355    mIntrinsicData = NULL;
356    mIsThreadable = true;
357#endif
358}
359
360
361bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
362                            uint8_t const *bitcode, size_t bitcodeSize,
363                            uint32_t flags) {
364    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
365    //ALOGE("rsdScriptInit %p %p", rsc, script);
366
367    mCtx->lockMutex();
368#ifndef FAKE_ARM64_BUILD
369#ifndef RS_COMPATIBILITY_LIB
370    bcc::RSExecutable *exec = NULL;
371
372    mCompilerContext = NULL;
373    mCompilerDriver = NULL;
374    mExecutable = NULL;
375
376    mCompilerContext = new bcc::BCCContext();
377    if (mCompilerContext == NULL) {
378        ALOGE("bcc: FAILS to create compiler context (out of memory)");
379        mCtx->unlockMutex();
380        return false;
381    }
382
383    mCompilerDriver = new bcc::RSCompilerDriver();
384    if (mCompilerDriver == NULL) {
385        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
386        mCtx->unlockMutex();
387        return false;
388    }
389
390    mCompilerDriver->setRSRuntimeLookupFunction(lookupRuntimeStub);
391    mCompilerDriver->setRSRuntimeLookupContext(this);
392
393    // Run any compiler setup functions we have been provided with.
394    RSSetupCompilerCallback setupCompilerCallback =
395            mCtx->getSetupCompilerCallback();
396    if (setupCompilerCallback != NULL) {
397        setupCompilerCallback(mCompilerDriver);
398    }
399
400    const char *core_lib = bcc::RSInfo::LibCLCorePath;
401
402    bcinfo::MetadataExtractor ME((const char *) bitcode, bitcodeSize);
403    if (!ME.extract()) {
404        ALOGE("Could not extract metadata from bitcode");
405        return false;
406    }
407
408    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
409    switch (prec) {
410    case bcinfo::RS_FP_Imprecise:
411    case bcinfo::RS_FP_Relaxed:
412#if defined(ARCH_ARM_HAVE_NEON)
413        // NEON-capable devices can use an accelerated math library for all
414        // reduced precision scripts.
415        core_lib = bcc::RSInfo::LibCLCoreNEONPath;
416#endif
417        break;
418    case bcinfo::RS_FP_Full:
419        break;
420    default:
421        ALOGE("Unknown precision for bitcode");
422        return false;
423    }
424
425#if defined(__i386__)
426    // x86 devices will use an optimized library.
427     core_lib = bcc::RSInfo::LibCLCoreX86Path;
428#endif
429
430    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
431    if (selectRTCallback != NULL) {
432        core_lib = selectRTCallback((const char *)bitcode, bitcodeSize);
433    }
434
435    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
436        // Use the libclcore_debug.bc instead of the default library.
437        core_lib = bcc::RSInfo::LibCLCoreDebugPath;
438        mCompilerDriver->setDebugContext(true);
439        // Skip the cache lookup
440    } else if (!is_force_recompile()) {
441        // Attempt to just load the script from cache first if we can.
442        exec = mCompilerDriver->loadScript(cacheDir, resName,
443                                           (const char *)bitcode, bitcodeSize);
444    }
445
446    if (exec == NULL) {
447#ifdef EXTERNAL_BCC_COMPILER
448        bool built = compileBitcode(cacheDir, resName, (const char *)bitcode,
449                                    bitcodeSize, core_lib);
450#else
451        bool built = mCompilerDriver->build(*mCompilerContext, cacheDir,
452                                            resName, (const char *)bitcode,
453                                            bitcodeSize, core_lib,
454                                            mCtx->getLinkRuntimeCallback());
455#endif  // EXTERNAL_BCC_COMPILER
456        if (built) {
457            exec = mCompilerDriver->loadScript(cacheDir, resName,
458                                               (const char *)bitcode,
459                                               bitcodeSize);
460        }
461    }
462
463    if (exec == NULL) {
464        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
465        mCtx->unlockMutex();
466        return false;
467    }
468
469    mExecutable = exec;
470
471    exec->setThreadable(mIsThreadable);
472    if (!exec->syncInfo()) {
473        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
474    }
475
476    mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
477    mRootExpand =
478        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
479    mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
480    mFreeChildren =
481        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
482
483
484    const bcc::RSInfo *info = &mExecutable->getInfo();
485    if (info->getExportVarNames().size()) {
486        mBoundAllocs = new Allocation *[info->getExportVarNames().size()];
487        memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size());
488    }
489
490#else
491
492    mScriptSO = loadSharedLibrary(cacheDir, resName);
493
494    if (mScriptSO) {
495        char line[MAXLINE];
496        mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
497        if (mRoot) {
498            //ALOGE("Found root(): %p", mRoot);
499        }
500        mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
501        if (mRootExpand) {
502            //ALOGE("Found root.expand(): %p", mRootExpand);
503        }
504        mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
505        if (mInit) {
506            //ALOGE("Found init(): %p", mInit);
507        }
508        mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
509        if (mFreeChildren) {
510            //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
511        }
512
513        const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info");
514        if (rsInfo) {
515            //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo);
516        }
517
518        size_t varCount = 0;
519        if (strgets(line, MAXLINE, &rsInfo) == NULL) {
520            goto error;
521        }
522        if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
523            ALOGE("Invalid export var count!: %s", line);
524            goto error;
525        }
526
527        mExportedVariableCount = varCount;
528        //ALOGE("varCount: %zu", varCount);
529        if (varCount > 0) {
530            // Start by creating/zeroing this member, since we don't want to
531            // accidentally clean up invalid pointers later (if we error out).
532            mFieldIsObject = new bool[varCount];
533            if (mFieldIsObject == NULL) {
534                goto error;
535            }
536            memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject));
537            mFieldAddress = new void*[varCount];
538            if (mFieldAddress == NULL) {
539                goto error;
540            }
541            for (size_t i = 0; i < varCount; ++i) {
542                if (strgets(line, MAXLINE, &rsInfo) == NULL) {
543                    goto error;
544                }
545                char *c = strrchr(line, '\n');
546                if (c) {
547                    *c = '\0';
548                }
549                mFieldAddress[i] = dlsym(mScriptSO, line);
550                if (mFieldAddress[i] == NULL) {
551                    ALOGE("Failed to find variable address for %s: %s",
552                          line, dlerror());
553                    // Not a critical error if we don't find a global variable.
554                }
555                else {
556                    //ALOGE("Found variable %s at %p", line,
557                    //mFieldAddress[i]);
558                }
559            }
560        }
561
562        size_t funcCount = 0;
563        if (strgets(line, MAXLINE, &rsInfo) == NULL) {
564            goto error;
565        }
566        if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
567            ALOGE("Invalid export func count!: %s", line);
568            goto error;
569        }
570
571        mExportedFunctionCount = funcCount;
572        //ALOGE("funcCount: %zu", funcCount);
573
574        if (funcCount > 0) {
575            mInvokeFunctions = new InvokeFunc_t[funcCount];
576            if (mInvokeFunctions == NULL) {
577                goto error;
578            }
579            for (size_t i = 0; i < funcCount; ++i) {
580                if (strgets(line, MAXLINE, &rsInfo) == NULL) {
581                    goto error;
582                }
583                char *c = strrchr(line, '\n');
584                if (c) {
585                    *c = '\0';
586                }
587
588                mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line);
589                if (mInvokeFunctions[i] == NULL) {
590                    ALOGE("Failed to get function address for %s(): %s",
591                          line, dlerror());
592                    goto error;
593                }
594                else {
595                    //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]);
596                }
597            }
598        }
599
600        size_t forEachCount = 0;
601        if (strgets(line, MAXLINE, &rsInfo) == NULL) {
602            goto error;
603        }
604        if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
605            ALOGE("Invalid export forEach count!: %s", line);
606            goto error;
607        }
608
609        if (forEachCount > 0) {
610
611            mForEachSignatures = new uint32_t[forEachCount];
612            if (mForEachSignatures == NULL) {
613                goto error;
614            }
615            mForEachFunctions = new ForEachFunc_t[forEachCount];
616            if (mForEachFunctions == NULL) {
617                goto error;
618            }
619            for (size_t i = 0; i < forEachCount; ++i) {
620                unsigned int tmpSig = 0;
621                char tmpName[MAXLINE];
622
623                if (strgets(line, MAXLINE, &rsInfo) == NULL) {
624                    goto error;
625                }
626                if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
627                           &tmpSig, tmpName) != 2) {
628                    ALOGE("Invalid export forEach!: %s", line);
629                    goto error;
630                }
631
632                // Lookup the expanded ForEach kernel.
633                strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
634                mForEachSignatures[i] = tmpSig;
635                mForEachFunctions[i] =
636                        (ForEachFunc_t) dlsym(mScriptSO, tmpName);
637                if (i != 0 && mForEachFunctions[i] == NULL) {
638                    // Ignore missing root.expand functions.
639                    // root() is always specified at location 0.
640                    ALOGE("Failed to find forEach function address for %s: %s",
641                          tmpName, dlerror());
642                    goto error;
643                }
644                else {
645                    //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]);
646                }
647            }
648        }
649
650        size_t objectSlotCount = 0;
651        if (strgets(line, MAXLINE, &rsInfo) == NULL) {
652            goto error;
653        }
654        if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
655            ALOGE("Invalid object slot count!: %s", line);
656            goto error;
657        }
658
659        if (objectSlotCount > 0) {
660            rsAssert(varCount > 0);
661            for (size_t i = 0; i < objectSlotCount; ++i) {
662                uint32_t varNum = 0;
663                if (strgets(line, MAXLINE, &rsInfo) == NULL) {
664                    goto error;
665                }
666                if (sscanf(line, "%u", &varNum) != 1) {
667                    ALOGE("Invalid object slot!: %s", line);
668                    goto error;
669                }
670
671                if (varNum < varCount) {
672                    mFieldIsObject[varNum] = true;
673                }
674            }
675        }
676
677        if (varCount > 0) {
678            mBoundAllocs = new Allocation *[varCount];
679            memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
680        }
681
682        if (mScriptSO == (void*)1) {
683            //rsdLookupRuntimeStub(script, "acos");
684        }
685    } else {
686        goto error;
687    }
688#endif
689#endif // FAKE_ARM64_BUILD
690    mCtx->unlockMutex();
691    return true;
692
693#ifdef RS_COMPATIBILITY_LIB
694error:
695
696    mCtx->unlockMutex();
697    delete[] mInvokeFunctions;
698    delete[] mForEachFunctions;
699    delete[] mFieldAddress;
700    delete[] mFieldIsObject;
701    delete[] mForEachSignatures;
702    delete[] mBoundAllocs;
703    if (mScriptSO) {
704        dlclose(mScriptSO);
705    }
706    return false;
707#endif
708}
709
710void RsdCpuScriptImpl::populateScript(Script *script) {
711#ifndef FAKE_ARM64_BUILD
712#ifndef RS_COMPATIBILITY_LIB
713    const bcc::RSInfo *info = &mExecutable->getInfo();
714
715    // Copy info over to runtime
716    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
717    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
718    script->mHal.info.exportedForeachFuncList = info->getExportForeachFuncs().array();
719    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
720    script->mHal.info.exportedPragmaKeyList =
721        const_cast<const char**>(mExecutable->getPragmaKeys().array());
722    script->mHal.info.exportedPragmaValueList =
723        const_cast<const char**>(mExecutable->getPragmaValues().array());
724
725    if (mRootExpand) {
726        script->mHal.info.root = mRootExpand;
727    } else {
728        script->mHal.info.root = mRoot;
729    }
730#else
731    // Copy info over to runtime
732    script->mHal.info.exportedFunctionCount = mExportedFunctionCount;
733    script->mHal.info.exportedVariableCount = mExportedVariableCount;
734    script->mHal.info.exportedPragmaCount = 0;
735    script->mHal.info.exportedPragmaKeyList = 0;
736    script->mHal.info.exportedPragmaValueList = 0;
737
738    // Bug, need to stash in metadata
739    if (mRootExpand) {
740        script->mHal.info.root = mRootExpand;
741    } else {
742        script->mHal.info.root = mRoot;
743    }
744#endif
745#endif
746}
747
748
749typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
750
751void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
752                                        const void * usr, uint32_t usrLen,
753                                        const RsScriptCall *sc,
754                                        MTLaunchStruct *mtls) {
755
756    memset(mtls, 0, sizeof(MTLaunchStruct));
757
758    // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
759    if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
760        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
761        return;
762    }
763    if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
764        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
765        return;
766    }
767
768    if (ain) {
769        mtls->fep.dimX = ain->getType()->getDimX();
770        mtls->fep.dimY = ain->getType()->getDimY();
771        mtls->fep.dimZ = ain->getType()->getDimZ();
772        //mtls->dimArray = ain->getType()->getDimArray();
773    } else if (aout) {
774        mtls->fep.dimX = aout->getType()->getDimX();
775        mtls->fep.dimY = aout->getType()->getDimY();
776        mtls->fep.dimZ = aout->getType()->getDimZ();
777        //mtls->dimArray = aout->getType()->getDimArray();
778    } else {
779        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
780        return;
781    }
782
783    if (!sc || (sc->xEnd == 0)) {
784        mtls->xEnd = mtls->fep.dimX;
785    } else {
786        rsAssert(sc->xStart < mtls->fep.dimX);
787        rsAssert(sc->xEnd <= mtls->fep.dimX);
788        rsAssert(sc->xStart < sc->xEnd);
789        mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
790        mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
791        if (mtls->xStart >= mtls->xEnd) return;
792    }
793
794    if (!sc || (sc->yEnd == 0)) {
795        mtls->yEnd = mtls->fep.dimY;
796    } else {
797        rsAssert(sc->yStart < mtls->fep.dimY);
798        rsAssert(sc->yEnd <= mtls->fep.dimY);
799        rsAssert(sc->yStart < sc->yEnd);
800        mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
801        mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
802        if (mtls->yStart >= mtls->yEnd) return;
803    }
804
805    if (!sc || (sc->zEnd == 0)) {
806        mtls->zEnd = mtls->fep.dimZ;
807    } else {
808        rsAssert(sc->zStart < mtls->fep.dimZ);
809        rsAssert(sc->zEnd <= mtls->fep.dimZ);
810        rsAssert(sc->zStart < sc->zEnd);
811        mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
812        mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
813        if (mtls->zStart >= mtls->zEnd) return;
814    }
815
816    mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
817    mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
818    mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
819    mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
820
821    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
822
823    mtls->rsc = mCtx;
824    mtls->ain = ain;
825    mtls->aout = aout;
826    mtls->fep.usr = usr;
827    mtls->fep.usrLen = usrLen;
828    mtls->mSliceSize = 1;
829    mtls->mSliceNum = 0;
830
831    mtls->fep.ptrIn = NULL;
832    mtls->fep.eStrideIn = 0;
833    mtls->isThreadable = mIsThreadable;
834
835    if (ain) {
836        mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
837        mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
838        mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
839    }
840
841    mtls->fep.ptrOut = NULL;
842    mtls->fep.eStrideOut = 0;
843    if (aout) {
844        mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
845        mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
846        mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
847    }
848}
849
850
851void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
852                                     const Allocation * ain,
853                                     Allocation * aout,
854                                     const void * usr,
855                                     uint32_t usrLen,
856                                     const RsScriptCall *sc) {
857
858    MTLaunchStruct mtls;
859    forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
860    forEachKernelSetup(slot, &mtls);
861
862    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
863    mCtx->launchThreads(ain, aout, sc, &mtls);
864    mCtx->setTLS(oldTLS);
865}
866
867void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
868    mtls->script = this;
869    mtls->fep.slot = slot;
870#ifndef FAKE_ARM64_BUILD
871#ifndef RS_COMPATIBILITY_LIB
872    rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
873    mtls->kernel = reinterpret_cast<ForEachFunc_t>(
874                      mExecutable->getExportForeachFuncAddrs()[slot]);
875    rsAssert(mtls->kernel != NULL);
876    mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
877#else
878    mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]);
879    rsAssert(mtls->kernel != NULL);
880    mtls->sig = mForEachSignatures[slot];
881#endif
882#endif
883}
884
885int RsdCpuScriptImpl::invokeRoot() {
886    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
887#ifndef FAKE_ARM64_BUILD
888    int ret = mRoot();
889#else
890    int ret = 0;
891#endif
892    mCtx->setTLS(oldTLS);
893    return ret;
894}
895
896void RsdCpuScriptImpl::invokeInit() {
897#ifndef FAKE_ARM64_BUILD
898    if (mInit) {
899        mInit();
900    }
901#endif
902}
903
904void RsdCpuScriptImpl::invokeFreeChildren() {
905#ifndef FAKE_ARM64_BUILD
906    if (mFreeChildren) {
907        mFreeChildren();
908    }
909#endif
910}
911
912void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
913                                      size_t paramLength) {
914    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
915
916    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
917#ifndef FAKE_ARM64_BUILD
918    reinterpret_cast<void (*)(const void *, uint32_t)>(
919#ifndef RS_COMPATIBILITY_LIB
920        mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
921#else
922        mInvokeFunctions[slot])(params, paramLength);
923#endif
924#endif
925    mCtx->setTLS(oldTLS);
926}
927
928void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
929    //rsAssert(!script->mFieldIsObject[slot]);
930    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
931
932    //if (mIntrinsicID) {
933        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
934        //return;
935    //}
936
937#ifndef FAKE_ARM64_BUILD
938#ifndef RS_COMPATIBILITY_LIB
939    int32_t *destPtr = reinterpret_cast<int32_t *>(
940                          mExecutable->getExportVarAddrs()[slot]);
941#else
942    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
943#endif
944#else
945    int32_t *destPtr = NULL;
946#endif
947    if (!destPtr) {
948        //ALOGV("Calling setVar on slot = %i which is null", slot);
949        return;
950    }
951
952    memcpy(destPtr, data, dataLength);
953}
954
955void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
956    //rsAssert(!script->mFieldIsObject[slot]);
957    //ALOGE("getGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
958
959#ifndef FAKE_ARM64_BUILD
960#ifndef RS_COMPATIBILITY_LIB
961    int32_t *srcPtr = reinterpret_cast<int32_t *>(
962                          mExecutable->getExportVarAddrs()[slot]);
963#else
964    int32_t *srcPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
965#endif
966#else
967    int32_t *srcPtr = NULL;
968#endif
969    if (!srcPtr) {
970        //ALOGV("Calling setVar on slot = %i which is null", slot);
971        return;
972    }
973    memcpy(data, srcPtr, dataLength);
974}
975
976
977void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
978                                                const Element *elem,
979                                                const size_t *dims, size_t dimLength) {
980
981#ifndef FAKE_ARM64_BUILD
982#ifndef RS_COMPATIBILITY_LIB
983    int32_t *destPtr = reinterpret_cast<int32_t *>(
984        mExecutable->getExportVarAddrs()[slot]);
985#else
986    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
987#endif
988#else
989    int32_t *destPtr = NULL;
990#endif
991    if (!destPtr) {
992        //ALOGV("Calling setVar on slot = %i which is null", slot);
993        return;
994    }
995
996    // We want to look at dimension in terms of integer components,
997    // but dimLength is given in terms of bytes.
998    dimLength /= sizeof(int);
999
1000    // Only a single dimension is currently supported.
1001    rsAssert(dimLength == 1);
1002    if (dimLength == 1) {
1003        // First do the increment loop.
1004        size_t stride = elem->getSizeBytes();
1005        const char *cVal = reinterpret_cast<const char *>(data);
1006        for (size_t i = 0; i < dims[0]; i++) {
1007            elem->incRefs(cVal);
1008            cVal += stride;
1009        }
1010
1011        // Decrement loop comes after (to prevent race conditions).
1012        char *oldVal = reinterpret_cast<char *>(destPtr);
1013        for (size_t i = 0; i < dims[0]; i++) {
1014            elem->decRefs(oldVal);
1015            oldVal += stride;
1016        }
1017    }
1018
1019    memcpy(destPtr, data, dataLength);
1020}
1021
1022void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
1023
1024    //rsAssert(!script->mFieldIsObject[slot]);
1025    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
1026
1027#ifndef FAKE_ARM64_BUILD
1028#ifndef RS_COMPATIBILITY_LIB
1029    int32_t *destPtr = reinterpret_cast<int32_t *>(
1030                          mExecutable->getExportVarAddrs()[slot]);
1031#else
1032    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1033#endif
1034#else
1035    int32_t *destPtr = NULL;
1036#endif
1037    if (!destPtr) {
1038        //ALOGV("Calling setVar on slot = %i which is null", slot);
1039        return;
1040    }
1041
1042    void *ptr = NULL;
1043    mBoundAllocs[slot] = data;
1044    if(data) {
1045        ptr = data->mHal.drvState.lod[0].mallocPtr;
1046    }
1047    memcpy(destPtr, &ptr, sizeof(void *));
1048}
1049
1050void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
1051
1052    //rsAssert(script->mFieldIsObject[slot]);
1053    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
1054
1055    //if (mIntrinsicID) {
1056        //mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, alloc);
1057        //return;
1058    //}
1059
1060#ifndef FAKE_ARM64_BUILD
1061#ifndef RS_COMPATIBILITY_LIB
1062    int32_t *destPtr = reinterpret_cast<int32_t *>(
1063                          mExecutable->getExportVarAddrs()[slot]);
1064#else
1065    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
1066#endif
1067#else
1068    int32_t *destPtr = NULL;
1069#endif
1070
1071
1072    if (!destPtr) {
1073        //ALOGV("Calling setVar on slot = %i which is null", slot);
1074        return;
1075    }
1076
1077    rsrSetObject(mCtx->getContext(), (ObjectBase **)destPtr, data);
1078}
1079
1080RsdCpuScriptImpl::~RsdCpuScriptImpl() {
1081#ifndef FAKE_ARM64_BUILD
1082#ifndef RS_COMPATIBILITY_LIB
1083    if (mExecutable) {
1084        Vector<void *>::const_iterator var_addr_iter =
1085            mExecutable->getExportVarAddrs().begin();
1086        Vector<void *>::const_iterator var_addr_end =
1087            mExecutable->getExportVarAddrs().end();
1088
1089        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
1090            mExecutable->getInfo().getObjectSlots().begin();
1091        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
1092            mExecutable->getInfo().getObjectSlots().end();
1093
1094        while ((var_addr_iter != var_addr_end) &&
1095               (is_object_iter != is_object_end)) {
1096            // The field address can be NULL if the script-side has optimized
1097            // the corresponding global variable away.
1098            ObjectBase **obj_addr =
1099                reinterpret_cast<ObjectBase **>(*var_addr_iter);
1100            if (*is_object_iter) {
1101                if (*var_addr_iter != NULL) {
1102                    rsrClearObject(mCtx->getContext(), obj_addr);
1103                }
1104            }
1105            var_addr_iter++;
1106            is_object_iter++;
1107        }
1108    }
1109
1110    if (mCompilerContext) {
1111        delete mCompilerContext;
1112    }
1113    if (mCompilerDriver) {
1114        delete mCompilerDriver;
1115    }
1116    if (mExecutable) {
1117        delete mExecutable;
1118    }
1119    if (mBoundAllocs) {
1120        delete[] mBoundAllocs;
1121    }
1122#else
1123    if (mFieldIsObject) {
1124        for (size_t i = 0; i < mExportedVariableCount; ++i) {
1125            if (mFieldIsObject[i]) {
1126                if (mFieldAddress[i] != NULL) {
1127                    ObjectBase **obj_addr =
1128                        reinterpret_cast<ObjectBase **>(mFieldAddress[i]);
1129                    rsrClearObject(mCtx->getContext(), obj_addr);
1130                }
1131            }
1132        }
1133    }
1134
1135    if (mInvokeFunctions) delete[] mInvokeFunctions;
1136    if (mForEachFunctions) delete[] mForEachFunctions;
1137    if (mFieldAddress) delete[] mFieldAddress;
1138    if (mFieldIsObject) delete[] mFieldIsObject;
1139    if (mForEachSignatures) delete[] mForEachSignatures;
1140    if (mBoundAllocs) delete[] mBoundAllocs;
1141    if (mScriptSO) {
1142        dlclose(mScriptSO);
1143    }
1144#endif
1145#endif
1146}
1147
1148Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
1149    if (!ptr) {
1150        return NULL;
1151    }
1152
1153    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
1154        Allocation *a = mBoundAllocs[ct];
1155        if (!a) continue;
1156        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
1157            return a;
1158        }
1159    }
1160    ALOGE("rsGetAllocation, failed to find %p", ptr);
1161    return NULL;
1162}
1163
1164void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain,
1165                       Allocation * aout, const void * usr,
1166                       uint32_t usrLen, const RsScriptCall *sc)
1167{
1168}
1169
1170void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain,
1171                        Allocation * aout, const void * usr,
1172                        uint32_t usrLen, const RsScriptCall *sc)
1173{
1174}
1175
1176
1177}
1178}
1179