rsdBcc.cpp revision ebee9480507562109314cca00753a09002a91e7d
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20
21#include <bcinfo/MetadataExtractor.h>
22
23#include "rsContext.h"
24#include "rsElement.h"
25#include "rsScriptC.h"
26
27#include "utils/Timers.h"
28#include "utils/StopWatch.h"
29
30using namespace android;
31using namespace android::renderscript;
32
33struct DrvScript {
34    int (*mRoot)();
35    int (*mRootExpand)();
36    void (*mInit)();
37    void (*mFreeChildren)();
38
39    BCCScriptRef mBccScript;
40
41    bcinfo::MetadataExtractor *ME;
42
43    InvokeFunc_t *mInvokeFunctions;
44    ForEachFunc_t *mForEachFunctions;
45    void ** mFieldAddress;
46    bool * mFieldIsObject;
47    const uint32_t *mExportForEachSignatureList;
48
49    const uint8_t * mScriptText;
50    uint32_t mScriptTextLength;
51};
52
53typedef void (*outer_foreach_t)(
54    const android::renderscript::RsForEachStubParamStruct *,
55    uint32_t x1, uint32_t x2,
56    uint32_t instep, uint32_t outstep);
57
58static Script * setTLS(Script *sc) {
59    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
60    rsAssert(tls);
61    Script *old = tls->mScript;
62    tls->mScript = sc;
63    return old;
64}
65
66
67bool rsdScriptInit(const Context *rsc,
68                     ScriptC *script,
69                     char const *resName,
70                     char const *cacheDir,
71                     uint8_t const *bitcode,
72                     size_t bitcodeSize,
73                     uint32_t flags) {
74    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
75
76    pthread_mutex_lock(&rsdgInitMutex);
77
78    size_t exportFuncCount = 0;
79    size_t exportVarCount = 0;
80    size_t objectSlotCount = 0;
81    size_t exportForEachSignatureCount = 0;
82
83    const char* coreLib = "/system/lib/libclcore.bc";
84    bcinfo::RSFloatPrecision prec;
85
86    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
87    if (drv == NULL) {
88        goto error;
89    }
90    script->mHal.drv = drv;
91
92    drv->mBccScript = bccCreateScript();
93    script->mHal.info.isThreadable = true;
94    drv->mScriptText = bitcode;
95    drv->mScriptTextLength = bitcodeSize;
96
97
98    drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
99                                            drv->mScriptTextLength);
100    if (!drv->ME->extract()) {
101      ALOGE("bcinfo: failed to read script metadata");
102      goto error;
103    }
104
105    //ALOGE("mBccScript %p", script->mBccScript);
106
107    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
108        ALOGE("bcc: FAILS to register symbol callback");
109        goto error;
110    }
111
112    if (bccReadBC(drv->mBccScript,
113                  resName,
114                  (char const *)drv->mScriptText,
115                  drv->mScriptTextLength, 0) != 0) {
116        ALOGE("bcc: FAILS to read bitcode");
117        goto error;
118    }
119
120    // NEON-capable devices can use an accelerated math library for all
121    // reduced precision scripts.
122#if defined(ARCH_ARM_HAVE_NEON)
123    prec = drv->ME->getRSFloatPrecision();
124    if (prec != bcinfo::RS_FP_Full) {
125        coreLib = "/system/lib/libclcore_neon.bc";
126    }
127#endif
128
129    if (bccLinkFile(drv->mBccScript, coreLib, 0) != 0) {
130        ALOGE("bcc: FAILS to link bitcode");
131        goto error;
132    }
133
134    if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
135        ALOGE("bcc: FAILS to prepare executable");
136        goto error;
137    }
138
139    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
140    drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand"));
141    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
142    drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor"));
143
144    exportFuncCount = drv->ME->getExportFuncCount();
145    if (exportFuncCount > 0) {
146        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
147                                                       sizeof(InvokeFunc_t));
148        bccGetExportFuncList(drv->mBccScript, exportFuncCount,
149                             (void **) drv->mInvokeFunctions);
150    } else {
151        drv->mInvokeFunctions = NULL;
152    }
153
154    exportVarCount = drv->ME->getExportVarCount();
155    if (exportVarCount > 0) {
156        drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
157        drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
158        bccGetExportVarList(drv->mBccScript, exportVarCount,
159                            (void **) drv->mFieldAddress);
160    } else {
161        drv->mFieldAddress = NULL;
162        drv->mFieldIsObject = NULL;
163    }
164
165    objectSlotCount = drv->ME->getObjectSlotCount();
166    if (objectSlotCount > 0) {
167        const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
168        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
169            drv->mFieldIsObject[objectSlotList[ct]] = true;
170        }
171    }
172
173    exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
174    drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
175    if (exportForEachSignatureCount > 0) {
176        drv->mForEachFunctions =
177            (ForEachFunc_t*) calloc(exportForEachSignatureCount,
178                                    sizeof(ForEachFunc_t));
179        bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount,
180                                (void **) drv->mForEachFunctions);
181    } else {
182        drv->mForEachFunctions = NULL;
183    }
184
185    // Copy info over to runtime
186    script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
187    script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
188    script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
189    script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
190    script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
191
192    if (drv->mRootExpand) {
193      script->mHal.info.root = drv->mRootExpand;
194    } else {
195      script->mHal.info.root = drv->mRoot;
196    }
197
198    pthread_mutex_unlock(&rsdgInitMutex);
199    return true;
200
201error:
202
203    pthread_mutex_unlock(&rsdgInitMutex);
204    if (drv->ME) {
205        delete drv->ME;
206        drv->ME = NULL;
207    }
208    free(drv);
209    return false;
210
211}
212
213typedef struct {
214    Context *rsc;
215    Script *script;
216    ForEachFunc_t kernel;
217    uint32_t sig;
218    const Allocation * ain;
219    Allocation * aout;
220    const void * usr;
221    size_t usrLen;
222
223    uint32_t mSliceSize;
224    volatile int mSliceNum;
225
226    const uint8_t *ptrIn;
227    uint32_t eStrideIn;
228    uint8_t *ptrOut;
229    uint32_t eStrideOut;
230
231    uint32_t yStrideIn;
232    uint32_t yStrideOut;
233
234    uint32_t xStart;
235    uint32_t xEnd;
236    uint32_t yStart;
237    uint32_t yEnd;
238    uint32_t zStart;
239    uint32_t zEnd;
240    uint32_t arrayStart;
241    uint32_t arrayEnd;
242
243    uint32_t dimX;
244    uint32_t dimY;
245    uint32_t dimZ;
246    uint32_t dimArray;
247} MTLaunchStruct;
248typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
249
250static void wc_xy(void *usr, uint32_t idx) {
251    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
252    RsForEachStubParamStruct p;
253    memset(&p, 0, sizeof(p));
254    p.usr = mtls->usr;
255    p.usr_len = mtls->usrLen;
256    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
257    uint32_t sig = mtls->sig;
258
259    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
260    while (1) {
261        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
262        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
263        uint32_t yEnd = yStart + mtls->mSliceSize;
264        yEnd = rsMin(yEnd, mtls->yEnd);
265        if (yEnd <= yStart) {
266            return;
267        }
268
269        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
270        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
271        for (p.y = yStart; p.y < yEnd; p.y++) {
272            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
273            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
274            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
275        }
276    }
277}
278
279static void wc_x(void *usr, uint32_t idx) {
280    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
281    RsForEachStubParamStruct p;
282    memset(&p, 0, sizeof(p));
283    p.usr = mtls->usr;
284    p.usr_len = mtls->usrLen;
285    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
286    uint32_t sig = mtls->sig;
287
288    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
289    while (1) {
290        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
291        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
292        uint32_t xEnd = xStart + mtls->mSliceSize;
293        xEnd = rsMin(xEnd, mtls->xEnd);
294        if (xEnd <= xStart) {
295            return;
296        }
297
298        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
299        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
300
301        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
302        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
303        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
304    }
305}
306
307void rsdScriptInvokeForEach(const Context *rsc,
308                            Script *s,
309                            uint32_t slot,
310                            const Allocation * ain,
311                            Allocation * aout,
312                            const void * usr,
313                            uint32_t usrLen,
314                            const RsScriptCall *sc) {
315
316    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
317
318    MTLaunchStruct mtls;
319    memset(&mtls, 0, sizeof(mtls));
320
321    DrvScript *drv = (DrvScript *)s->mHal.drv;
322    mtls.kernel = drv->mForEachFunctions[slot];
323    rsAssert(mtls.kernel != NULL);
324    mtls.sig = 0x1f;  // temp fix for old apps, full table in slang_rs_export_foreach.cpp
325    if (drv->mExportForEachSignatureList) {
326        mtls.sig = drv->mExportForEachSignatureList[slot];
327    }
328    if (ain) {
329        mtls.dimX = ain->getType()->getDimX();
330        mtls.dimY = ain->getType()->getDimY();
331        mtls.dimZ = ain->getType()->getDimZ();
332        //mtls.dimArray = ain->getType()->getDimArray();
333    } else if (aout) {
334        mtls.dimX = aout->getType()->getDimX();
335        mtls.dimY = aout->getType()->getDimY();
336        mtls.dimZ = aout->getType()->getDimZ();
337        //mtls.dimArray = aout->getType()->getDimArray();
338    } else {
339        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
340        return;
341    }
342
343    if (!sc || (sc->xEnd == 0)) {
344        mtls.xEnd = mtls.dimX;
345    } else {
346        rsAssert(sc->xStart < mtls.dimX);
347        rsAssert(sc->xEnd <= mtls.dimX);
348        rsAssert(sc->xStart < sc->xEnd);
349        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
350        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
351        if (mtls.xStart >= mtls.xEnd) return;
352    }
353
354    if (!sc || (sc->yEnd == 0)) {
355        mtls.yEnd = mtls.dimY;
356    } else {
357        rsAssert(sc->yStart < mtls.dimY);
358        rsAssert(sc->yEnd <= mtls.dimY);
359        rsAssert(sc->yStart < sc->yEnd);
360        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
361        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
362        if (mtls.yStart >= mtls.yEnd) return;
363    }
364
365    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
366    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
367    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
368    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
369
370    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
371
372    Context *mrsc = (Context *)rsc;
373    Script * oldTLS = setTLS(s);
374
375    mtls.rsc = mrsc;
376    mtls.ain = ain;
377    mtls.aout = aout;
378    mtls.script = s;
379    mtls.usr = usr;
380    mtls.usrLen = usrLen;
381    mtls.mSliceSize = 10;
382    mtls.mSliceNum = 0;
383
384    mtls.ptrIn = NULL;
385    mtls.eStrideIn = 0;
386    if (ain) {
387        mtls.ptrIn = (const uint8_t *)ain->getPtr();
388        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
389        mtls.yStrideIn = ain->mHal.drvState.stride;
390    }
391
392    mtls.ptrOut = NULL;
393    mtls.eStrideOut = 0;
394    if (aout) {
395        mtls.ptrOut = (uint8_t *)aout->getPtr();
396        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
397        mtls.yStrideOut = aout->mHal.drvState.stride;
398    }
399
400    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
401        if (mtls.dimY > 1) {
402            rsdLaunchThreads(mrsc, wc_xy, &mtls);
403        } else {
404            rsdLaunchThreads(mrsc, wc_x, &mtls);
405        }
406
407        //ALOGE("launch 1");
408    } else {
409        RsForEachStubParamStruct p;
410        memset(&p, 0, sizeof(p));
411        p.usr = mtls.usr;
412        p.usr_len = mtls.usrLen;
413        uint32_t sig = mtls.sig;
414
415        //ALOGE("launch 3");
416        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
417        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
418            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
419                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
420                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
421                                      mtls.dimX * mtls.dimY * p.z +
422                                      mtls.dimX * p.y;
423                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
424                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
425                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
426                       mtls.eStrideOut);
427                }
428            }
429        }
430    }
431
432    setTLS(oldTLS);
433}
434
435
436int rsdScriptInvokeRoot(const Context *dc, Script *script) {
437    DrvScript *drv = (DrvScript *)script->mHal.drv;
438
439    Script * oldTLS = setTLS(script);
440    int ret = drv->mRoot();
441    setTLS(oldTLS);
442
443    return ret;
444}
445
446void rsdScriptInvokeInit(const Context *dc, Script *script) {
447    DrvScript *drv = (DrvScript *)script->mHal.drv;
448
449    if (drv->mInit) {
450        drv->mInit();
451    }
452}
453
454void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
455    DrvScript *drv = (DrvScript *)script->mHal.drv;
456
457    if (drv->mFreeChildren) {
458        drv->mFreeChildren();
459    }
460}
461
462void rsdScriptInvokeFunction(const Context *dc, Script *script,
463                            uint32_t slot,
464                            const void *params,
465                            size_t paramLength) {
466    DrvScript *drv = (DrvScript *)script->mHal.drv;
467    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
468
469    Script * oldTLS = setTLS(script);
470    ((void (*)(const void *, uint32_t))
471        drv->mInvokeFunctions[slot])(params, paramLength);
472    setTLS(oldTLS);
473}
474
475void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
476                           uint32_t slot, void *data, size_t dataLength) {
477    DrvScript *drv = (DrvScript *)script->mHal.drv;
478    //rsAssert(!script->mFieldIsObject[slot]);
479    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
480
481    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
482    if (!destPtr) {
483        //ALOGV("Calling setVar on slot = %i which is null", slot);
484        return;
485    }
486
487    memcpy(destPtr, data, dataLength);
488}
489
490void rsdScriptSetGlobalVarWithElemDims(
491        const android::renderscript::Context *dc,
492        const android::renderscript::Script *script,
493        uint32_t slot, void *data, size_t dataLength,
494        const android::renderscript::Element *elem,
495        const size_t *dims, size_t dimLength) {
496    DrvScript *drv = (DrvScript *)script->mHal.drv;
497
498    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
499    if (!destPtr) {
500        //ALOGV("Calling setVar on slot = %i which is null", slot);
501        return;
502    }
503
504    // We want to look at dimension in terms of integer components,
505    // but dimLength is given in terms of bytes.
506    dimLength /= sizeof(int);
507
508    // Only a single dimension is currently supported.
509    rsAssert(dimLength == 1);
510    if (dimLength == 1) {
511        // First do the increment loop.
512        size_t stride = elem->getSizeBytes();
513        char *cVal = reinterpret_cast<char *>(data);
514        for (size_t i = 0; i < dims[0]; i++) {
515            elem->incRefs(cVal);
516            cVal += stride;
517        }
518
519        // Decrement loop comes after (to prevent race conditions).
520        char *oldVal = reinterpret_cast<char *>(destPtr);
521        for (size_t i = 0; i < dims[0]; i++) {
522            elem->decRefs(oldVal);
523            oldVal += stride;
524        }
525    }
526
527    memcpy(destPtr, data, dataLength);
528}
529
530void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
531    DrvScript *drv = (DrvScript *)script->mHal.drv;
532    //rsAssert(!script->mFieldIsObject[slot]);
533    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
534
535    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
536    if (!destPtr) {
537        //ALOGV("Calling setVar on slot = %i which is null", slot);
538        return;
539    }
540
541    memcpy(destPtr, &data, sizeof(void *));
542}
543
544void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
545    DrvScript *drv = (DrvScript *)script->mHal.drv;
546    //rsAssert(script->mFieldIsObject[slot]);
547    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
548
549    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
550    if (!destPtr) {
551        //ALOGV("Calling setVar on slot = %i which is null", slot);
552        return;
553    }
554
555    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
556}
557
558void rsdScriptDestroy(const Context *dc, Script *script) {
559    DrvScript *drv = (DrvScript *)script->mHal.drv;
560
561    if (drv->mFieldAddress) {
562        size_t exportVarCount = drv->ME->getExportVarCount();
563        for (size_t ct = 0; ct < exportVarCount; ct++) {
564            if (drv->mFieldIsObject[ct]) {
565                // The field address can be NULL if the script-side has
566                // optimized the corresponding global variable away.
567                if (drv->mFieldAddress[ct]) {
568                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
569                }
570            }
571        }
572        free(drv->mFieldAddress);
573        drv->mFieldAddress = NULL;
574        free(drv->mFieldIsObject);
575        drv->mFieldIsObject = NULL;    }
576
577    if (drv->mInvokeFunctions) {
578        free(drv->mInvokeFunctions);
579        drv->mInvokeFunctions = NULL;
580    }
581
582    if (drv->mForEachFunctions) {
583        free(drv->mForEachFunctions);
584        drv->mForEachFunctions = NULL;
585    }
586
587    delete drv->ME;
588    drv->ME = NULL;
589
590    free(drv);
591    script->mHal.drv = NULL;
592
593}
594
595
596