rsCpuScript.cpp revision cee9e9898ed549d00c1fd1c911feff66dbb4225e
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19#include "rsCpuExecutable.h"
20
21#ifdef RS_COMPATIBILITY_LIB
22    #include <stdio.h>
23    #include <sys/stat.h>
24    #include <unistd.h>
25#else
26    #include "rsCppUtils.h"
27
28    #include <bcc/BCCContext.h>
29    #include <bcc/Config/Config.h>
30    #include <bcc/Renderscript/RSCompilerDriver.h>
31    #include <bcinfo/MetadataExtractor.h>
32    #include <cutils/properties.h>
33
34    #include <zlib.h>
35    #include <sys/file.h>
36    #include <sys/types.h>
37    #include <unistd.h>
38
39    #include <string>
40    #include <vector>
41#endif
42
43#include <set>
44#include <string>
45#include <dlfcn.h>
46#include <stdlib.h>
47#include <string.h>
48#include <iostream>
49#include <sstream>
50
51#ifdef __LP64__
52#define SYSLIBPATH "/system/lib64"
53#else
54#define SYSLIBPATH "/system/lib"
55#endif
56
57namespace {
58
59static const bool kDebugGlobalVariables = false;
60
61#ifndef RS_COMPATIBILITY_LIB
62
63static bool is_force_recompile() {
64#ifdef RS_SERVER
65  return false;
66#else
67  char buf[PROPERTY_VALUE_MAX];
68
69  // Re-compile if floating point precision has been overridden.
70  property_get("debug.rs.precision", buf, "");
71  if (buf[0] != '\0') {
72    return true;
73  }
74
75  // Re-compile if debug.rs.forcerecompile is set.
76  property_get("debug.rs.forcerecompile", buf, "0");
77  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
78    return true;
79  } else {
80    return false;
81  }
82#endif  // RS_SERVER
83}
84
85static void setCompileArguments(std::vector<const char*>* args,
86                                const std::string& bcFileName,
87                                const char* cacheDir, const char* resName,
88                                const char* core_lib, bool useRSDebugContext,
89                                const char* bccPluginName, bool emitGlobalInfo,
90                                bool emitGlobalInfoSkipConstant) {
91    rsAssert(cacheDir && resName && core_lib);
92    args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
93    args->push_back("-unroll-runtime");
94    args->push_back("-scalarize-load-store");
95    if (emitGlobalInfo) {
96        args->push_back("-rs-global-info");
97        if (emitGlobalInfoSkipConstant) {
98            args->push_back("-rs-global-info-skip-constant");
99        }
100    }
101    args->push_back("-o");
102    args->push_back(resName);
103    args->push_back("-output_path");
104    args->push_back(cacheDir);
105    args->push_back("-bclib");
106    args->push_back(core_lib);
107    args->push_back("-mtriple");
108    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
109
110    // Enable workaround for A53 codegen by default.
111#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
112    args->push_back("-aarch64-fix-cortex-a53-835769");
113#endif
114
115    // Execute the bcc compiler.
116    if (useRSDebugContext) {
117        args->push_back("-rs-debug-ctx");
118    } else {
119        // Only load additional libraries for compiles that don't use
120        // the debug context.
121        if (bccPluginName && strlen(bccPluginName) > 0) {
122            args->push_back("-load");
123            args->push_back(bccPluginName);
124        }
125    }
126
127    args->push_back("-fPIC");
128    args->push_back("-embedRSInfo");
129
130    args->push_back(bcFileName.c_str());
131    args->push_back(nullptr);
132}
133
134static bool compileBitcode(const std::string &bcFileName,
135                           const char *bitcode,
136                           size_t bitcodeSize,
137                           std::vector<const char *> &compileArguments) {
138    rsAssert(bitcode && bitcodeSize);
139
140    FILE *bcfile = fopen(bcFileName.c_str(), "w");
141    if (!bcfile) {
142        ALOGE("Could not write to %s", bcFileName.c_str());
143        return false;
144    }
145    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
146    fclose(bcfile);
147    if (nwritten != bitcodeSize) {
148        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
149              bcFileName.c_str());
150        return false;
151    }
152
153    return android::renderscript::rsuExecuteCommand(
154                   android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
155                   compileArguments.size()-1, compileArguments.data());
156}
157
158bool isChecksumNeeded() {
159    char buf[PROPERTY_VALUE_MAX];
160    property_get("ro.debuggable", buf, "");
161    return (buf[0] == '1');
162}
163
164bool addFileToChecksum(const char *fileName, uint32_t &checksum) {
165    int FD = open(fileName, O_RDONLY);
166    if (FD == -1) {
167        ALOGE("Cannot open file \'%s\' to compute checksum", fileName);
168        return false;
169    }
170
171    char buf[256];
172    while (true) {
173        ssize_t nread = read(FD, buf, sizeof(buf));
174        if (nread < 0) { // bail out on failed read
175            ALOGE("Error while computing checksum for file \'%s\'", fileName);
176            return false;
177        }
178
179        checksum = adler32(checksum, (const unsigned char *) buf, nread);
180        if (static_cast<size_t>(nread) < sizeof(buf)) // EOF
181            break;
182    }
183
184    if (close(FD) != 0) {
185        ALOGE("Cannot close file \'%s\' after computing checksum", fileName);
186        return false;
187    }
188    return true;
189}
190
191#endif  // !defined(RS_COMPATIBILITY_LIB)
192}  // namespace
193
194namespace android {
195namespace renderscript {
196
197#ifndef RS_COMPATIBILITY_LIB
198
199uint32_t constructBuildChecksum(uint8_t const *bitcode, size_t bitcodeSize,
200                                const char *commandLine,
201                                const char** bccFiles, size_t numFiles) {
202    uint32_t checksum = adler32(0L, Z_NULL, 0);
203
204    // include checksum of bitcode
205    if (bitcode != nullptr && bitcodeSize > 0) {
206        checksum = adler32(checksum, bitcode, bitcodeSize);
207    }
208
209    // include checksum of command line arguments
210    checksum = adler32(checksum, (const unsigned char *) commandLine,
211                       strlen(commandLine));
212
213    // include checksum of bccFiles
214    for (size_t i = 0; i < numFiles; i++) {
215        const char* bccFile = bccFiles[i];
216        if (bccFile[0] != 0 && !addFileToChecksum(bccFile, checksum)) {
217            // return empty checksum instead of something partial/corrupt
218            return 0;
219        }
220    }
221
222    return checksum;
223}
224
225#endif  // !RS_COMPATIBILITY_LIB
226
227RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
228    mCtx = ctx;
229    mScript = s;
230
231    mScriptSO = nullptr;
232
233#ifndef RS_COMPATIBILITY_LIB
234    mCompilerDriver = nullptr;
235#endif
236
237
238    mRoot = nullptr;
239    mRootExpand = nullptr;
240    mInit = nullptr;
241    mFreeChildren = nullptr;
242    mScriptExec = nullptr;
243
244    mBoundAllocs = nullptr;
245    mIntrinsicData = nullptr;
246    mIsThreadable = true;
247
248    mBuildChecksum = 0;
249    mChecksumNeeded = false;
250}
251
252bool RsdCpuScriptImpl::storeRSInfoFromSO() {
253    // The shared object may have an invalid build checksum.
254    // Validate and fail early.
255    mScriptExec = ScriptExecutable::createFromSharedObject(
256            mCtx->getContext(), mScriptSO,
257            mChecksumNeeded ? mBuildChecksum : 0);
258
259    if (mScriptExec == nullptr) {
260        return false;
261    }
262
263    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
264    if (mRoot) {
265        //ALOGE("Found root(): %p", mRoot);
266    }
267    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
268    if (mRootExpand) {
269        //ALOGE("Found root.expand(): %p", mRootExpand);
270    }
271    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
272    if (mInit) {
273        //ALOGE("Found init(): %p", mInit);
274    }
275    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
276    if (mFreeChildren) {
277        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
278    }
279
280    size_t varCount = mScriptExec->getExportedVariableCount();
281    if (varCount > 0) {
282        mBoundAllocs = new Allocation *[varCount];
283        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
284    }
285
286    mIsThreadable = mScriptExec->getThreadable();
287    //ALOGE("Script isThreadable? %d", mIsThreadable);
288
289    if (kDebugGlobalVariables) {
290        mScriptExec->dumpGlobalInfo();
291    }
292
293    return true;
294}
295
296bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
297                            uint8_t const *bitcode, size_t bitcodeSize,
298                            uint32_t flags, char const *bccPluginName) {
299    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
300    // bitcode, bitcodeSize, flags, lookupFunc);
301    //ALOGE("rsdScriptInit %p %p", rsc, script);
302
303    mCtx->lockMutex();
304#ifndef RS_COMPATIBILITY_LIB
305    bool useRSDebugContext = false;
306
307    mCompilerDriver = nullptr;
308
309    mCompilerDriver = new bcc::RSCompilerDriver();
310    if (mCompilerDriver == nullptr) {
311        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
312        mCtx->unlockMutex();
313        return false;
314    }
315
316    // Run any compiler setup functions we have been provided with.
317    RSSetupCompilerCallback setupCompilerCallback =
318            mCtx->getSetupCompilerCallback();
319    if (setupCompilerCallback != nullptr) {
320        setupCompilerCallback(mCompilerDriver);
321    }
322
323    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
324    if (!bitcodeMetadata.extract()) {
325        ALOGE("Could not extract metadata from bitcode");
326        mCtx->unlockMutex();
327        return false;
328    }
329
330    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
331
332    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
333        mCompilerDriver->setDebugContext(true);
334        useRSDebugContext = true;
335    }
336
337    std::string bcFileName(cacheDir);
338    bcFileName.append("/");
339    bcFileName.append(resName);
340    bcFileName.append(".bc");
341
342    std::vector<const char*> compileArguments;
343    bool emitGlobalInfo = mCtx->getEmbedGlobalInfo();
344    bool emitGlobalInfoSkipConstant = mCtx->getEmbedGlobalInfoSkipConstant();
345    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
346                        useRSDebugContext, bccPluginName, emitGlobalInfo,
347                        emitGlobalInfoSkipConstant);
348
349    mChecksumNeeded = isChecksumNeeded();
350    if (mChecksumNeeded) {
351        std::vector<const char *> bccFiles = { BCC_EXE_PATH,
352                                               core_lib,
353                                             };
354
355        // The last argument of compileArguments is a nullptr, so remove 1 from
356        // the size.
357        std::unique_ptr<const char> compileCommandLine(
358            rsuJoinStrings(compileArguments.size()-1, compileArguments.data()));
359
360        mBuildChecksum = constructBuildChecksum(bitcode, bitcodeSize,
361                                                compileCommandLine.get(),
362                                                bccFiles.data(), bccFiles.size());
363
364        if (mBuildChecksum == 0) {
365            // cannot compute checksum but verification is enabled
366            mCtx->unlockMutex();
367            return false;
368        }
369    }
370    else {
371        // add a dummy/constant as a checksum if verification is disabled
372        mBuildChecksum = 0xabadcafe;
373    }
374
375    // Append build checksum to commandline
376    // Handle the terminal nullptr in compileArguments
377    compileArguments.pop_back();
378    compileArguments.push_back("-build-checksum");
379    std::stringstream ss;
380    ss << std::hex << mBuildChecksum;
381    compileArguments.push_back(ss.str().c_str());
382    compileArguments.push_back(nullptr);
383
384    if (!is_force_recompile() && !useRSDebugContext) {
385        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
386
387        // Read RS info from the shared object to detect checksum mismatch
388        if (mScriptSO != nullptr && !storeRSInfoFromSO()) {
389            dlclose(mScriptSO);
390            mScriptSO = nullptr;
391        }
392    }
393
394    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
395    // again.
396    if (mScriptSO == nullptr) {
397        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
398                            compileArguments))
399        {
400            ALOGE("bcc: FAILS to compile '%s'", resName);
401            mCtx->unlockMutex();
402            return false;
403        }
404
405        if (!SharedLibraryUtils::createSharedLibrary(mCtx->getContext()->getDriverName(),
406                                                     cacheDir, resName)) {
407            ALOGE("Linker: Failed to link object file '%s'", resName);
408            mCtx->unlockMutex();
409            return false;
410        }
411
412        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
413        if (mScriptSO == nullptr) {
414            ALOGE("Unable to load '%s'", resName);
415            mCtx->unlockMutex();
416            return false;
417        }
418
419        // Read RS symbol information from the .so.
420        if (!storeRSInfoFromSO()) {
421            goto error;
422        }
423    }
424
425    mBitcodeFilePath.setTo(bcFileName.c_str());
426
427#else  // RS_COMPATIBILITY_LIB is defined
428    const char *nativeLibDir = mCtx->getContext()->getNativeLibDir();
429    mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir);
430
431    if (!mScriptSO) {
432        goto error;
433    }
434
435    if (!storeRSInfoFromSO()) {
436        goto error;
437    }
438#endif
439    mCtx->unlockMutex();
440    return true;
441
442error:
443
444    mCtx->unlockMutex();
445    if (mScriptSO) {
446        dlclose(mScriptSO);
447        mScriptSO = nullptr;
448    }
449    return false;
450}
451
452#ifndef RS_COMPATIBILITY_LIB
453
454const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
455                                          size_t bitcodeSize) {
456    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
457
458    // If we're debugging, use the debug library.
459    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
460        return SYSLIBPATH"/libclcore_debug.bc";
461    }
462
463    // If a callback has been registered to specify a library, use that.
464    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
465    if (selectRTCallback != nullptr) {
466        return selectRTCallback((const char*)bitcode, bitcodeSize);
467    }
468
469    // Check for a platform specific library
470#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
471    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
472    if (prec == bcinfo::RS_FP_Relaxed) {
473        // NEON-capable ARMv7a devices can use an accelerated math library
474        // for all reduced precision scripts.
475        // ARMv8 does not use NEON, as ASIMD can be used with all precision
476        // levels.
477        return SYSLIBPATH"/libclcore_neon.bc";
478    } else {
479        return defaultLib;
480    }
481#elif defined(__i386__) || defined(__x86_64__)
482    // x86 devices will use an optimized library.
483    return SYSLIBPATH"/libclcore_x86.bc";
484#else
485    return defaultLib;
486#endif
487}
488
489#endif
490
491void RsdCpuScriptImpl::populateScript(Script *script) {
492    // Copy info over to runtime
493    script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
494    script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
495    script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
496    script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
497    script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
498
499    // Bug, need to stash in metadata
500    if (mRootExpand) {
501        script->mHal.info.root = mRootExpand;
502    } else {
503        script->mHal.info.root = mRoot;
504    }
505}
506
507
508bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
509                                        uint32_t inLen,
510                                        Allocation * aout,
511                                        const void * usr, uint32_t usrLen,
512                                        const RsScriptCall *sc,
513                                        MTLaunchStruct *mtls) {
514
515    memset(mtls, 0, sizeof(MTLaunchStruct));
516
517    for (int index = inLen; --index >= 0;) {
518        const Allocation* ain = ains[index];
519
520        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
521        if (ain != nullptr &&
522            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
523
524            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
525                                         "rsForEach called with null in allocations");
526            return false;
527        }
528    }
529
530    if (aout &&
531        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
532
533        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
534                                     "rsForEach called with null out allocations");
535        return false;
536    }
537
538    if (inLen > 0) {
539        const Allocation *ain0   = ains[0];
540        const Type       *inType = ain0->getType();
541
542        mtls->fep.dim.x = inType->getDimX();
543        mtls->fep.dim.y = inType->getDimY();
544        mtls->fep.dim.z = inType->getDimZ();
545
546        for (int Index = inLen; --Index >= 1;) {
547            if (!ain0->hasSameDims(ains[Index])) {
548                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
549                  "Failed to launch kernel; dimensions of input and output"
550                  "allocations do not match.");
551
552                return false;
553            }
554        }
555
556    } else if (aout != nullptr) {
557        const Type *outType = aout->getType();
558
559        mtls->fep.dim.x = outType->getDimX();
560        mtls->fep.dim.y = outType->getDimY();
561        mtls->fep.dim.z = outType->getDimZ();
562
563    } else if (sc != nullptr) {
564        mtls->fep.dim.x = sc->xEnd;
565        mtls->fep.dim.y = sc->yEnd;
566        mtls->fep.dim.z = 0;
567    } else {
568        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
569                                     "rsForEach called with null allocations");
570        return false;
571    }
572
573    if (inLen > 0 && aout != nullptr) {
574        if (!ains[0]->hasSameDims(aout)) {
575            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
576              "Failed to launch kernel; dimensions of input and output allocations do not match.");
577
578            return false;
579        }
580    }
581
582    if (!sc || (sc->xEnd == 0)) {
583        mtls->end.x = mtls->fep.dim.x;
584    } else {
585        mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
586        mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
587        if (mtls->start.x >= mtls->end.x) {
588            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
589              "Failed to launch kernel; Invalid xStart or xEnd.");
590            return false;
591        }
592    }
593
594    if (!sc || (sc->yEnd == 0)) {
595        mtls->end.y = mtls->fep.dim.y;
596    } else {
597        mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
598        mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
599        if (mtls->start.y >= mtls->end.y) {
600            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
601              "Failed to launch kernel; Invalid yStart or yEnd.");
602            return false;
603        }
604    }
605
606    if (!sc || (sc->zEnd == 0)) {
607        mtls->end.z = mtls->fep.dim.z;
608    } else {
609        mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
610        mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
611        if (mtls->start.z >= mtls->end.z) {
612            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
613              "Failed to launch kernel; Invalid zStart or zEnd.");
614            return false;
615        }
616    }
617
618    if (!sc || (sc->arrayEnd == 0)) {
619        mtls->end.array[0] = mtls->fep.dim.array[0];
620    } else {
621        mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
622        mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
623        if (mtls->start.array[0] >= mtls->end.array[0]) {
624            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
625              "Failed to launch kernel; Invalid arrayStart or arrayEnd.");
626            return false;
627        }
628    }
629
630    if (!sc || (sc->array2End == 0)) {
631        mtls->end.array[1] = mtls->fep.dim.array[1];
632    } else {
633        mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
634        mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
635        if (mtls->start.array[1] >= mtls->end.array[1]) {
636            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
637              "Failed to launch kernel; Invalid array2Start or array2End.");
638            return false;
639        }
640    }
641
642    if (!sc || (sc->array3End == 0)) {
643        mtls->end.array[2] = mtls->fep.dim.array[2];
644    } else {
645        mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
646        mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
647        if (mtls->start.array[2] >= mtls->end.array[2]) {
648            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
649              "Failed to launch kernel; Invalid array3Start or array3End.");
650            return false;
651        }
652    }
653
654    if (!sc || (sc->array4End == 0)) {
655        mtls->end.array[3] = mtls->fep.dim.array[3];
656    } else {
657        mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
658        mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
659        if (mtls->start.array[3] >= mtls->end.array[3]) {
660            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
661              "Failed to launch kernel; Invalid array4Start or array4End.");
662            return false;
663        }
664    }
665
666
667    // The X & Y walkers always want 0-1 min even if dim is not present
668    mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
669    mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
670
671    mtls->rsc        = mCtx;
672    if (ains) {
673        memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
674    }
675    mtls->aout[0]    = aout;
676    mtls->fep.usr    = usr;
677    mtls->fep.usrLen = usrLen;
678    mtls->mSliceSize = 1;
679    mtls->mSliceNum  = 0;
680
681    mtls->isThreadable  = mIsThreadable;
682
683    if (inLen > 0) {
684        mtls->fep.inLen = inLen;
685        for (int index = inLen; --index >= 0;) {
686            mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
687            mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
688        }
689    }
690
691    if (aout != nullptr) {
692        mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
693        mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
694    }
695
696    // All validation passed, ok to launch threads
697    return true;
698}
699
700
701void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
702                                     const Allocation ** ains,
703                                     uint32_t inLen,
704                                     Allocation * aout,
705                                     const void * usr,
706                                     uint32_t usrLen,
707                                     const RsScriptCall *sc) {
708
709    MTLaunchStruct mtls;
710
711    if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
712        forEachKernelSetup(slot, &mtls);
713
714        RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
715        mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
716        mCtx->setTLS(oldTLS);
717    }
718}
719
720void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
721    mtls->script = this;
722    mtls->fep.slot = slot;
723    mtls->kernel = mScriptExec->getForEachFunction(slot);
724    rsAssert(mtls->kernel != nullptr);
725    mtls->sig = mScriptExec->getForEachSignature(slot);
726}
727
728int RsdCpuScriptImpl::invokeRoot() {
729    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
730    int ret = mRoot();
731    mCtx->setTLS(oldTLS);
732    return ret;
733}
734
735void RsdCpuScriptImpl::invokeInit() {
736    if (mInit) {
737        mInit();
738    }
739}
740
741void RsdCpuScriptImpl::invokeFreeChildren() {
742    if (mFreeChildren) {
743        mFreeChildren();
744    }
745}
746
747void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
748                                      size_t paramLength) {
749    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
750    void * ap = nullptr;
751
752#if defined(__x86_64__)
753    // The invoked function could have input parameter of vector type for example float4 which
754    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
755    // So try to align void* params before passing them into RS exported function.
756
757    if ((uint8_t)(uint64_t)params & 0x0F) {
758        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
759            memcpy(ap, params, paramLength);
760        } else {
761            ALOGE("x86_64: invokeFunction memalign error, still use params which"
762                  " is not 16 bytes aligned.");
763        }
764    }
765#endif
766
767    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
768    reinterpret_cast<void (*)(const void *, uint32_t)>(
769        mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
770
771    mCtx->setTLS(oldTLS);
772}
773
774void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
775    //rsAssert(!script->mFieldIsObject[slot]);
776    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
777
778    //if (mIntrinsicID) {
779        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
780        //return;
781    //}
782
783    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
784    if (!destPtr) {
785        //ALOGV("Calling setVar on slot = %i which is null", slot);
786        return;
787    }
788
789    memcpy(destPtr, data, dataLength);
790}
791
792void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
793    //rsAssert(!script->mFieldIsObject[slot]);
794    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
795
796    int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
797    if (!srcPtr) {
798        //ALOGV("Calling setVar on slot = %i which is null", slot);
799        return;
800    }
801    memcpy(data, srcPtr, dataLength);
802}
803
804
805void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
806                                                const Element *elem,
807                                                const uint32_t *dims, size_t dimLength) {
808    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
809    if (!destPtr) {
810        //ALOGV("Calling setVar on slot = %i which is null", slot);
811        return;
812    }
813
814    // We want to look at dimension in terms of integer components,
815    // but dimLength is given in terms of bytes.
816    dimLength /= sizeof(int);
817
818    // Only a single dimension is currently supported.
819    rsAssert(dimLength == 1);
820    if (dimLength == 1) {
821        // First do the increment loop.
822        size_t stride = elem->getSizeBytes();
823        const char *cVal = reinterpret_cast<const char *>(data);
824        for (uint32_t i = 0; i < dims[0]; i++) {
825            elem->incRefs(cVal);
826            cVal += stride;
827        }
828
829        // Decrement loop comes after (to prevent race conditions).
830        char *oldVal = reinterpret_cast<char *>(destPtr);
831        for (uint32_t i = 0; i < dims[0]; i++) {
832            elem->decRefs(oldVal);
833            oldVal += stride;
834        }
835    }
836
837    memcpy(destPtr, data, dataLength);
838}
839
840void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
841
842    //rsAssert(!script->mFieldIsObject[slot]);
843    //ALOGE("setGlobalBind %i %p", slot, data);
844
845    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
846    if (!destPtr) {
847        //ALOGV("Calling setVar on slot = %i which is null", slot);
848        return;
849    }
850
851    void *ptr = nullptr;
852    mBoundAllocs[slot] = data;
853    if (data) {
854        ptr = data->mHal.drvState.lod[0].mallocPtr;
855    }
856    memcpy(destPtr, &ptr, sizeof(void *));
857}
858
859void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
860
861    //rsAssert(script->mFieldIsObject[slot]);
862    //ALOGE("setGlobalObj %i %p", slot, data);
863
864    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
865    if (!destPtr) {
866        //ALOGV("Calling setVar on slot = %i which is null", slot);
867        return;
868    }
869
870    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
871}
872
873const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const {
874    return mScriptExec->getFieldName(slot);
875}
876
877RsdCpuScriptImpl::~RsdCpuScriptImpl() {
878#ifndef RS_COMPATIBILITY_LIB
879    if (mCompilerDriver) {
880        delete mCompilerDriver;
881    }
882#endif
883
884    if (mScriptExec != nullptr) {
885        delete mScriptExec;
886    }
887    if (mBoundAllocs) delete[] mBoundAllocs;
888    if (mScriptSO) {
889        dlclose(mScriptSO);
890    }
891}
892
893Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
894    if (!ptr) {
895        return nullptr;
896    }
897
898    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
899        Allocation *a = mBoundAllocs[ct];
900        if (!a) continue;
901        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
902            return a;
903        }
904    }
905    ALOGE("rsGetAllocation, failed to find %p", ptr);
906    return nullptr;
907}
908
909int RsdCpuScriptImpl::getGlobalEntries() const {
910    return mScriptExec->getGlobalEntries();
911}
912
913const char * RsdCpuScriptImpl::getGlobalName(int i) const {
914    return mScriptExec->getGlobalName(i);
915}
916
917const void * RsdCpuScriptImpl::getGlobalAddress(int i) const {
918    return mScriptExec->getGlobalAddress(i);
919}
920
921size_t RsdCpuScriptImpl::getGlobalSize(int i) const {
922    return mScriptExec->getGlobalSize(i);
923}
924
925uint32_t RsdCpuScriptImpl::getGlobalProperties(int i) const {
926    return mScriptExec->getGlobalProperties(i);
927}
928
929void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
930                                 uint32_t inLen, Allocation * aout,
931                                 const void * usr, uint32_t usrLen,
932                                 const RsScriptCall *sc) {}
933
934void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
935                                  uint32_t inLen, Allocation * aout,
936                                  const void * usr, uint32_t usrLen,
937                                  const RsScriptCall *sc) {}
938
939
940}
941}
942