rsCpuScript.cpp revision cb17015fed6b11a5028f31cc804a3847e379945d
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19#include "rsCpuExecutable.h"
20
21#ifdef RS_COMPATIBILITY_LIB
22    #include <stdio.h>
23    #include <sys/stat.h>
24    #include <unistd.h>
25#else
26    #include "rsCppUtils.h"
27
28    #include <bcc/BCCContext.h>
29    #include <bcc/Config/Config.h>
30    #include <bcc/Renderscript/RSCompilerDriver.h>
31    #include <bcinfo/MetadataExtractor.h>
32    #include <cutils/properties.h>
33
34    #include <zlib.h>
35    #include <sys/file.h>
36    #include <sys/types.h>
37    #include <unistd.h>
38
39    #include <string>
40    #include <vector>
41#endif
42
43#include <set>
44#include <string>
45#include <dlfcn.h>
46#include <stdlib.h>
47#include <string.h>
48#include <iostream>
49#include <sstream>
50
51#ifdef __LP64__
52#define SYSLIBPATH "/system/lib64"
53#else
54#define SYSLIBPATH "/system/lib"
55#endif
56
57namespace {
58#ifndef RS_COMPATIBILITY_LIB
59
60static bool is_force_recompile() {
61#ifdef RS_SERVER
62  return false;
63#else
64  char buf[PROPERTY_VALUE_MAX];
65
66  // Re-compile if floating point precision has been overridden.
67  property_get("debug.rs.precision", buf, "");
68  if (buf[0] != '\0') {
69    return true;
70  }
71
72  // Re-compile if debug.rs.forcerecompile is set.
73  property_get("debug.rs.forcerecompile", buf, "0");
74  if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
75    return true;
76  } else {
77    return false;
78  }
79#endif  // RS_SERVER
80}
81
82static void setCompileArguments(std::vector<const char*>* args,
83                                const std::string& bcFileName,
84                                const char* cacheDir, const char* resName,
85                                const char* core_lib, bool useRSDebugContext,
86                                const char* bccPluginName) {
87    rsAssert(cacheDir && resName && core_lib);
88    args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
89    args->push_back("-unroll-runtime");
90    args->push_back("-scalarize-load-store");
91    args->push_back("-o");
92    args->push_back(resName);
93    args->push_back("-output_path");
94    args->push_back(cacheDir);
95    args->push_back("-bclib");
96    args->push_back(core_lib);
97    args->push_back("-mtriple");
98    args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
99
100    // Enable workaround for A53 codegen by default.
101#if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
102    args->push_back("-aarch64-fix-cortex-a53-835769");
103#endif
104
105    // Execute the bcc compiler.
106    if (useRSDebugContext) {
107        args->push_back("-rs-debug-ctx");
108    } else {
109        // Only load additional libraries for compiles that don't use
110        // the debug context.
111        if (bccPluginName && strlen(bccPluginName) > 0) {
112            args->push_back("-load");
113            args->push_back(bccPluginName);
114        }
115    }
116
117    args->push_back("-fPIC");
118    args->push_back("-embedRSInfo");
119
120    args->push_back(bcFileName.c_str());
121    args->push_back(nullptr);
122}
123
124static bool compileBitcode(const std::string &bcFileName,
125                           const char *bitcode,
126                           size_t bitcodeSize,
127                           std::vector<const char *> &compileArguments) {
128    rsAssert(bitcode && bitcodeSize);
129
130    FILE *bcfile = fopen(bcFileName.c_str(), "w");
131    if (!bcfile) {
132        ALOGE("Could not write to %s", bcFileName.c_str());
133        return false;
134    }
135    size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
136    fclose(bcfile);
137    if (nwritten != bitcodeSize) {
138        ALOGE("Could not write %zu bytes to %s", bitcodeSize,
139              bcFileName.c_str());
140        return false;
141    }
142
143    return android::renderscript::rsuExecuteCommand(
144                   android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
145                   compileArguments.size()-1, compileArguments.data());
146}
147
148bool isChecksumNeeded() {
149    char buf[PROPERTY_VALUE_MAX];
150    property_get("ro.debuggable", buf, "");
151    return (buf[0] == '1');
152}
153
154bool addFileToChecksum(const char *fileName, uint32_t &checksum) {
155    int FD = open(fileName, O_RDONLY);
156    if (FD == -1) {
157        ALOGE("Cannot open file \'%s\' to compute checksum", fileName);
158        return false;
159    }
160
161    char buf[256];
162    while (true) {
163        ssize_t nread = read(FD, buf, sizeof(buf));
164        if (nread < 0) { // bail out on failed read
165            ALOGE("Error while computing checksum for file \'%s\'", fileName);
166            return false;
167        }
168
169        checksum = adler32(checksum, (const unsigned char *) buf, nread);
170        if (static_cast<size_t>(nread) < sizeof(buf)) // EOF
171            break;
172    }
173
174    if (TEMP_FAILURE_RETRY(close(FD)) != 0) {
175        ALOGE("Cannot close file \'%s\' after computing checksum", fileName);
176        return false;
177    }
178    return true;
179}
180
181#endif  // !defined(RS_COMPATIBILITY_LIB)
182}  // namespace
183
184namespace android {
185namespace renderscript {
186
187#ifndef RS_COMPATIBILITY_LIB
188
189uint32_t constructBuildChecksum(uint8_t const *bitcode, size_t bitcodeSize,
190                                const char *commandLine,
191                                const char** bccFiles, size_t numFiles) {
192    uint32_t checksum = adler32(0L, Z_NULL, 0);
193
194    // include checksum of bitcode
195    if (bitcode != nullptr && bitcodeSize > 0) {
196        checksum = adler32(checksum, bitcode, bitcodeSize);
197    }
198
199    // include checksum of command line arguments
200    checksum = adler32(checksum, (const unsigned char *) commandLine,
201                       strlen(commandLine));
202
203    // include checksum of bccFiles
204    for (size_t i = 0; i < numFiles; i++) {
205        const char* bccFile = bccFiles[i];
206        if (bccFile[0] != 0 && !addFileToChecksum(bccFile, checksum)) {
207            // return empty checksum instead of something partial/corrupt
208            return 0;
209        }
210    }
211
212    return checksum;
213}
214
215#endif  // !RS_COMPATIBILITY_LIB
216
217RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
218    mCtx = ctx;
219    mScript = s;
220
221    mScriptSO = nullptr;
222
223#ifndef RS_COMPATIBILITY_LIB
224    mCompilerDriver = nullptr;
225#endif
226
227
228    mRoot = nullptr;
229    mRootExpand = nullptr;
230    mInit = nullptr;
231    mFreeChildren = nullptr;
232    mScriptExec = nullptr;
233
234    mBoundAllocs = nullptr;
235    mIntrinsicData = nullptr;
236    mIsThreadable = true;
237
238    mBuildChecksum = 0;
239    mChecksumNeeded = false;
240}
241
242bool RsdCpuScriptImpl::storeRSInfoFromSO() {
243    // The shared object may have an invalid build checksum.
244    // Validate and fail early.
245    mScriptExec = ScriptExecutable::createFromSharedObject(
246            mCtx->getContext(), mScriptSO,
247            mChecksumNeeded ? mBuildChecksum : 0);
248
249    if (mScriptExec == nullptr) {
250        return false;
251    }
252
253    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
254    if (mRoot) {
255        //ALOGE("Found root(): %p", mRoot);
256    }
257    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
258    if (mRootExpand) {
259        //ALOGE("Found root.expand(): %p", mRootExpand);
260    }
261    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
262    if (mInit) {
263        //ALOGE("Found init(): %p", mInit);
264    }
265    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
266    if (mFreeChildren) {
267        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
268    }
269
270    size_t varCount = mScriptExec->getExportedVariableCount();
271    if (varCount > 0) {
272        mBoundAllocs = new Allocation *[varCount];
273        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
274    }
275
276    mIsThreadable = mScriptExec->getThreadable();
277    //ALOGE("Script isThreadable? %d", mIsThreadable);
278
279    return true;
280}
281
282bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
283                            uint8_t const *bitcode, size_t bitcodeSize,
284                            uint32_t flags, char const *bccPluginName) {
285    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
286    // bitcode, bitcodeSize, flags, lookupFunc);
287    //ALOGE("rsdScriptInit %p %p", rsc, script);
288
289    mCtx->lockMutex();
290#ifndef RS_COMPATIBILITY_LIB
291    bool useRSDebugContext = false;
292
293    mCompilerDriver = nullptr;
294
295    mCompilerDriver = new bcc::RSCompilerDriver();
296    if (mCompilerDriver == nullptr) {
297        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
298        mCtx->unlockMutex();
299        return false;
300    }
301
302    // Run any compiler setup functions we have been provided with.
303    RSSetupCompilerCallback setupCompilerCallback =
304            mCtx->getSetupCompilerCallback();
305    if (setupCompilerCallback != nullptr) {
306        setupCompilerCallback(mCompilerDriver);
307    }
308
309    bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
310    if (!bitcodeMetadata.extract()) {
311        ALOGE("Could not extract metadata from bitcode");
312        mCtx->unlockMutex();
313        return false;
314    }
315
316    const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
317
318    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
319        mCompilerDriver->setDebugContext(true);
320        useRSDebugContext = true;
321    }
322
323    std::string bcFileName(cacheDir);
324    bcFileName.append("/");
325    bcFileName.append(resName);
326    bcFileName.append(".bc");
327
328    std::vector<const char*> compileArguments;
329    setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
330                        useRSDebugContext, bccPluginName);
331
332    mChecksumNeeded = isChecksumNeeded();
333    if (mChecksumNeeded) {
334        std::vector<const char *> bccFiles = { BCC_EXE_PATH,
335                                               core_lib,
336                                             };
337
338        // The last argument of compileArguments is a nullptr, so remove 1 from
339        // the size.
340        std::unique_ptr<const char> compileCommandLine(
341            rsuJoinStrings(compileArguments.size()-1, compileArguments.data()));
342
343        mBuildChecksum = constructBuildChecksum(bitcode, bitcodeSize,
344                                                compileCommandLine.get(),
345                                                bccFiles.data(), bccFiles.size());
346
347        if (mBuildChecksum == 0) {
348            // cannot compute checksum but verification is enabled
349            mCtx->unlockMutex();
350            return false;
351        }
352    }
353    else {
354        // add a dummy/constant as a checksum if verification is disabled
355        mBuildChecksum = 0xabadcafe;
356    }
357
358    // Append build checksum to commandline
359    // Handle the terminal nullptr in compileArguments
360    compileArguments.pop_back();
361    compileArguments.push_back("-build-checksum");
362    std::stringstream ss;
363    ss << std::hex << mBuildChecksum;
364    compileArguments.push_back(ss.str().c_str());
365    compileArguments.push_back(nullptr);
366
367    if (!is_force_recompile() && !useRSDebugContext) {
368        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
369
370        // Read RS info from the shared object to detect checksum mismatch
371        if (mScriptSO != nullptr && !storeRSInfoFromSO()) {
372            dlclose(mScriptSO);
373            mScriptSO = nullptr;
374        }
375    }
376
377    // If we can't, it's either not there or out of date.  We compile the bit code and try loading
378    // again.
379    if (mScriptSO == nullptr) {
380        if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
381                            compileArguments))
382        {
383            ALOGE("bcc: FAILS to compile '%s'", resName);
384            mCtx->unlockMutex();
385            return false;
386        }
387
388        if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
389            ALOGE("Linker: Failed to link object file '%s'", resName);
390            mCtx->unlockMutex();
391            return false;
392        }
393
394        mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
395        if (mScriptSO == nullptr) {
396            ALOGE("Unable to load '%s'", resName);
397            mCtx->unlockMutex();
398            return false;
399        }
400
401        // Read RS symbol information from the .so.
402        if (!storeRSInfoFromSO()) {
403            goto error;
404        }
405    }
406
407    mBitcodeFilePath.setTo(bcFileName.c_str());
408
409#else  // RS_COMPATIBILITY_LIB is defined
410    const char *nativeLibDir = mCtx->getContext()->getNativeLibDir();
411    mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir);
412
413    if (!mScriptSO) {
414        goto error;
415    }
416
417    if (!storeRSInfoFromSO()) {
418        goto error;
419    }
420#endif
421    mCtx->unlockMutex();
422    return true;
423
424error:
425
426    mCtx->unlockMutex();
427    if (mScriptSO) {
428        dlclose(mScriptSO);
429        mScriptSO = nullptr;
430    }
431    return false;
432}
433
434#ifndef RS_COMPATIBILITY_LIB
435
436const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
437                                          size_t bitcodeSize) {
438    const char* defaultLib = SYSLIBPATH"/libclcore.bc";
439
440    // If we're debugging, use the debug library.
441    if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
442        return SYSLIBPATH"/libclcore_debug.bc";
443    }
444
445    // If a callback has been registered to specify a library, use that.
446    RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
447    if (selectRTCallback != nullptr) {
448        return selectRTCallback((const char*)bitcode, bitcodeSize);
449    }
450
451    // Check for a platform specific library
452#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
453    enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
454    if (prec == bcinfo::RS_FP_Relaxed) {
455        // NEON-capable ARMv7a devices can use an accelerated math library
456        // for all reduced precision scripts.
457        // ARMv8 does not use NEON, as ASIMD can be used with all precision
458        // levels.
459        return SYSLIBPATH"/libclcore_neon.bc";
460    } else {
461        return defaultLib;
462    }
463#elif defined(__i386__) || defined(__x86_64__)
464    // x86 devices will use an optimized library.
465    return SYSLIBPATH"/libclcore_x86.bc";
466#else
467    return defaultLib;
468#endif
469}
470
471#endif
472
473void RsdCpuScriptImpl::populateScript(Script *script) {
474    // Copy info over to runtime
475    script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
476    script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
477    script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
478    script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
479    script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
480
481    // Bug, need to stash in metadata
482    if (mRootExpand) {
483        script->mHal.info.root = mRootExpand;
484    } else {
485        script->mHal.info.root = mRoot;
486    }
487}
488
489
490typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
491
492bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
493                                        uint32_t inLen,
494                                        Allocation * aout,
495                                        const void * usr, uint32_t usrLen,
496                                        const RsScriptCall *sc,
497                                        MTLaunchStruct *mtls) {
498
499    memset(mtls, 0, sizeof(MTLaunchStruct));
500
501    for (int index = inLen; --index >= 0;) {
502        const Allocation* ain = ains[index];
503
504        // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
505        if (ain != nullptr &&
506            (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == nullptr) {
507
508            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
509                                         "rsForEach called with null in allocations");
510            return false;
511        }
512    }
513
514    if (aout &&
515        (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == nullptr) {
516
517        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
518                                     "rsForEach called with null out allocations");
519        return false;
520    }
521
522    if (inLen > 0) {
523        const Allocation *ain0   = ains[0];
524        const Type       *inType = ain0->getType();
525
526        mtls->fep.dim.x = inType->getDimX();
527        mtls->fep.dim.y = inType->getDimY();
528        mtls->fep.dim.z = inType->getDimZ();
529
530        for (int Index = inLen; --Index >= 1;) {
531            if (!ain0->hasSameDims(ains[Index])) {
532                mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
533                  "Failed to launch kernel; dimensions of input and output"
534                  "allocations do not match.");
535
536                return false;
537            }
538        }
539
540    } else if (aout != nullptr) {
541        const Type *outType = aout->getType();
542
543        mtls->fep.dim.x = outType->getDimX();
544        mtls->fep.dim.y = outType->getDimY();
545        mtls->fep.dim.z = outType->getDimZ();
546
547    } else {
548        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
549                                     "rsForEach called with null allocations");
550        return false;
551    }
552
553    if (inLen > 0 && aout != nullptr) {
554        if (!ains[0]->hasSameDims(aout)) {
555            mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
556              "Failed to launch kernel; dimensions of input and output allocations do not match.");
557
558            return false;
559        }
560    }
561
562    if (!sc || (sc->xEnd == 0)) {
563        mtls->end.x = mtls->fep.dim.x;
564    } else {
565        mtls->start.x = rsMin(mtls->fep.dim.x, sc->xStart);
566        mtls->end.x = rsMin(mtls->fep.dim.x, sc->xEnd);
567        if (mtls->start.x >= mtls->end.x) return false;
568    }
569
570    if (!sc || (sc->yEnd == 0)) {
571        mtls->end.y = mtls->fep.dim.y;
572    } else {
573        mtls->start.y = rsMin(mtls->fep.dim.y, sc->yStart);
574        mtls->end.y = rsMin(mtls->fep.dim.y, sc->yEnd);
575        if (mtls->start.y >= mtls->end.y) return false;
576    }
577
578    if (!sc || (sc->zEnd == 0)) {
579        mtls->end.z = mtls->fep.dim.z;
580    } else {
581        mtls->start.z = rsMin(mtls->fep.dim.z, sc->zStart);
582        mtls->end.z = rsMin(mtls->fep.dim.z, sc->zEnd);
583        if (mtls->start.z >= mtls->end.z) return false;
584    }
585
586    if (!sc || (sc->arrayEnd == 0)) {
587        mtls->end.array[0] = mtls->fep.dim.array[0];
588    } else {
589        mtls->start.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayStart);
590        mtls->end.array[0] = rsMin(mtls->fep.dim.array[0], sc->arrayEnd);
591        if (mtls->start.array[0] >= mtls->end.array[0]) return false;
592    }
593
594    if (!sc || (sc->array2End == 0)) {
595        mtls->end.array[1] = mtls->fep.dim.array[1];
596    } else {
597        mtls->start.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2Start);
598        mtls->end.array[1] = rsMin(mtls->fep.dim.array[1], sc->array2End);
599        if (mtls->start.array[1] >= mtls->end.array[1]) return false;
600    }
601
602    if (!sc || (sc->array3End == 0)) {
603        mtls->end.array[2] = mtls->fep.dim.array[2];
604    } else {
605        mtls->start.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3Start);
606        mtls->end.array[2] = rsMin(mtls->fep.dim.array[2], sc->array3End);
607        if (mtls->start.array[2] >= mtls->end.array[2]) return false;
608    }
609
610    if (!sc || (sc->array4End == 0)) {
611        mtls->end.array[3] = mtls->fep.dim.array[3];
612    } else {
613        mtls->start.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4Start);
614        mtls->end.array[3] = rsMin(mtls->fep.dim.array[3], sc->array4End);
615        if (mtls->start.array[3] >= mtls->end.array[3]) return false;
616    }
617
618
619    // The X & Y walkers always want 0-1 min even if dim is not present
620    mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
621    mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
622
623    mtls->rsc        = mCtx;
624    if (ains) {
625        memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
626    }
627    mtls->aout[0]    = aout;
628    mtls->fep.usr    = usr;
629    mtls->fep.usrLen = usrLen;
630    mtls->mSliceSize = 1;
631    mtls->mSliceNum  = 0;
632
633    mtls->isThreadable  = mIsThreadable;
634
635    if (inLen > 0) {
636        mtls->fep.inLen = inLen;
637        for (int index = inLen; --index >= 0;) {
638            mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
639            mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
640        }
641    }
642
643    if (aout != nullptr) {
644        mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
645        mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
646    }
647
648    // All validation passed, ok to launch threads
649    return true;
650}
651
652
653void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
654                                     const Allocation ** ains,
655                                     uint32_t inLen,
656                                     Allocation * aout,
657                                     const void * usr,
658                                     uint32_t usrLen,
659                                     const RsScriptCall *sc) {
660
661    MTLaunchStruct mtls;
662
663    if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
664        forEachKernelSetup(slot, &mtls);
665
666        RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
667        mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
668        mCtx->setTLS(oldTLS);
669    }
670}
671
672void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
673    mtls->script = this;
674    mtls->fep.slot = slot;
675    mtls->kernel = mScriptExec->getForEachFunction(slot);
676    rsAssert(mtls->kernel != nullptr);
677    mtls->sig = mScriptExec->getForEachSignature(slot);
678}
679
680int RsdCpuScriptImpl::invokeRoot() {
681    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
682    int ret = mRoot();
683    mCtx->setTLS(oldTLS);
684    return ret;
685}
686
687void RsdCpuScriptImpl::invokeInit() {
688    if (mInit) {
689        mInit();
690    }
691}
692
693void RsdCpuScriptImpl::invokeFreeChildren() {
694    if (mFreeChildren) {
695        mFreeChildren();
696    }
697}
698
699void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
700                                      size_t paramLength) {
701    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
702    void * ap = nullptr;
703
704#if defined(__x86_64__)
705    // The invoked function could have input parameter of vector type for example float4 which
706    // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
707    // So try to align void* params before passing them into RS exported function.
708
709    if ((uint8_t)(uint64_t)params & 0x0F) {
710        if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
711            memcpy(ap, params, paramLength);
712        } else {
713            ALOGE("x86_64: invokeFunction memalign error, still use params which"
714                  " is not 16 bytes aligned.");
715        }
716    }
717#endif
718
719    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
720    reinterpret_cast<void (*)(const void *, uint32_t)>(
721        mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
722
723    mCtx->setTLS(oldTLS);
724}
725
726void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
727    //rsAssert(!script->mFieldIsObject[slot]);
728    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
729
730    //if (mIntrinsicID) {
731        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
732        //return;
733    //}
734
735    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
736    if (!destPtr) {
737        //ALOGV("Calling setVar on slot = %i which is null", slot);
738        return;
739    }
740
741    memcpy(destPtr, data, dataLength);
742}
743
744void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
745    //rsAssert(!script->mFieldIsObject[slot]);
746    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
747
748    int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
749    if (!srcPtr) {
750        //ALOGV("Calling setVar on slot = %i which is null", slot);
751        return;
752    }
753    memcpy(data, srcPtr, dataLength);
754}
755
756
757void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
758                                                const Element *elem,
759                                                const uint32_t *dims, size_t dimLength) {
760    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
761    if (!destPtr) {
762        //ALOGV("Calling setVar on slot = %i which is null", slot);
763        return;
764    }
765
766    // We want to look at dimension in terms of integer components,
767    // but dimLength is given in terms of bytes.
768    dimLength /= sizeof(int);
769
770    // Only a single dimension is currently supported.
771    rsAssert(dimLength == 1);
772    if (dimLength == 1) {
773        // First do the increment loop.
774        size_t stride = elem->getSizeBytes();
775        const char *cVal = reinterpret_cast<const char *>(data);
776        for (uint32_t i = 0; i < dims[0]; i++) {
777            elem->incRefs(cVal);
778            cVal += stride;
779        }
780
781        // Decrement loop comes after (to prevent race conditions).
782        char *oldVal = reinterpret_cast<char *>(destPtr);
783        for (uint32_t i = 0; i < dims[0]; i++) {
784            elem->decRefs(oldVal);
785            oldVal += stride;
786        }
787    }
788
789    memcpy(destPtr, data, dataLength);
790}
791
792void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
793
794    //rsAssert(!script->mFieldIsObject[slot]);
795    //ALOGE("setGlobalBind %i %p", slot, data);
796
797    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
798    if (!destPtr) {
799        //ALOGV("Calling setVar on slot = %i which is null", slot);
800        return;
801    }
802
803    void *ptr = nullptr;
804    mBoundAllocs[slot] = data;
805    if (data) {
806        ptr = data->mHal.drvState.lod[0].mallocPtr;
807    }
808    memcpy(destPtr, &ptr, sizeof(void *));
809}
810
811void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
812
813    //rsAssert(script->mFieldIsObject[slot]);
814    //ALOGE("setGlobalObj %i %p", slot, data);
815
816    int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
817    if (!destPtr) {
818        //ALOGV("Calling setVar on slot = %i which is null", slot);
819        return;
820    }
821
822    rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
823}
824
825const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const {
826    return mScriptExec->getFieldName(slot);
827}
828
829RsdCpuScriptImpl::~RsdCpuScriptImpl() {
830#ifndef RS_COMPATIBILITY_LIB
831    if (mCompilerDriver) {
832        delete mCompilerDriver;
833    }
834#endif
835
836    if (mScriptExec != nullptr) {
837        delete mScriptExec;
838    }
839    if (mBoundAllocs) delete[] mBoundAllocs;
840    if (mScriptSO) {
841        dlclose(mScriptSO);
842    }
843}
844
845Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
846    if (!ptr) {
847        return nullptr;
848    }
849
850    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
851        Allocation *a = mBoundAllocs[ct];
852        if (!a) continue;
853        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
854            return a;
855        }
856    }
857    ALOGE("rsGetAllocation, failed to find %p", ptr);
858    return nullptr;
859}
860
861void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
862                                 uint32_t inLen, Allocation * aout,
863                                 const void * usr, uint32_t usrLen,
864                                 const RsScriptCall *sc) {}
865
866void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
867                                  uint32_t inLen, Allocation * aout,
868                                  const void * usr, uint32_t usrLen,
869                                  const RsScriptCall *sc) {}
870
871
872}
873}
874