rsdBcc.cpp revision 0ab9f9f6b1fb31cda536ae4aeaed258f78ee1447
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20
21#include <bcinfo/MetadataExtractor.h>
22
23#include "rsContext.h"
24#include "rsElement.h"
25#include "rsScriptC.h"
26
27#include "utils/Timers.h"
28#include "utils/StopWatch.h"
29
30using namespace android;
31using namespace android::renderscript;
32
33struct DrvScript {
34    int (*mRoot)();
35    int (*mRootExpand)();
36    void (*mInit)();
37    void (*mFreeChildren)();
38
39    BCCScriptRef mBccScript;
40
41    bcinfo::MetadataExtractor *ME;
42
43    InvokeFunc_t *mInvokeFunctions;
44    ForEachFunc_t *mForEachFunctions;
45    void ** mFieldAddress;
46    bool * mFieldIsObject;
47    const uint32_t *mExportForEachSignatureList;
48
49    const uint8_t * mScriptText;
50    uint32_t mScriptTextLength;
51};
52
53typedef void (*outer_foreach_t)(
54    const android::renderscript::RsForEachStubParamStruct *,
55    uint32_t x1, uint32_t x2,
56    uint32_t instep, uint32_t outstep);
57
58static Script * setTLS(Script *sc) {
59    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
60    rsAssert(tls);
61    Script *old = tls->mScript;
62    tls->mScript = sc;
63    return old;
64}
65
66
67bool rsdScriptInit(const Context *rsc,
68                     ScriptC *script,
69                     char const *resName,
70                     char const *cacheDir,
71                     uint8_t const *bitcode,
72                     size_t bitcodeSize,
73                     uint32_t flags) {
74    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
75    //ALOGE("rsdScriptInit %p %p", rsc, script);
76
77    pthread_mutex_lock(&rsdgInitMutex);
78
79    size_t exportFuncCount = 0;
80    size_t exportVarCount = 0;
81    size_t objectSlotCount = 0;
82    size_t exportForEachSignatureCount = 0;
83
84    const char* coreLib = "/system/lib/libclcore.bc";
85    bcinfo::RSFloatPrecision prec;
86
87    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
88    if (drv == NULL) {
89        goto error;
90    }
91    script->mHal.drv = drv;
92
93    drv->mBccScript = bccCreateScript();
94    script->mHal.info.isThreadable = true;
95    drv->mScriptText = bitcode;
96    drv->mScriptTextLength = bitcodeSize;
97
98
99    drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
100                                            drv->mScriptTextLength);
101    if (!drv->ME->extract()) {
102      ALOGE("bcinfo: failed to read script metadata");
103      goto error;
104    }
105
106    //ALOGE("mBccScript %p", script->mBccScript);
107
108    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
109        ALOGE("bcc: FAILS to register symbol callback");
110        goto error;
111    }
112
113    if (bccReadBC(drv->mBccScript,
114                  resName,
115                  (char const *)drv->mScriptText,
116                  drv->mScriptTextLength, 0) != 0) {
117        ALOGE("bcc: FAILS to read bitcode");
118        goto error;
119    }
120
121    // NEON-capable devices can use an accelerated math library for all
122    // reduced precision scripts.
123#if defined(ARCH_ARM_HAVE_NEON)
124    prec = drv->ME->getRSFloatPrecision();
125    if (prec != bcinfo::RS_FP_Full) {
126        coreLib = "/system/lib/libclcore_neon.bc";
127    }
128#endif
129
130    if (bccLinkFile(drv->mBccScript, coreLib, 0) != 0) {
131        ALOGE("bcc: FAILS to link bitcode");
132        goto error;
133    }
134
135    if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
136        ALOGE("bcc: FAILS to prepare executable");
137        goto error;
138    }
139
140    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
141    drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand"));
142    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
143    drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor"));
144
145    exportFuncCount = drv->ME->getExportFuncCount();
146    if (exportFuncCount > 0) {
147        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
148                                                       sizeof(InvokeFunc_t));
149        bccGetExportFuncList(drv->mBccScript, exportFuncCount,
150                             (void **) drv->mInvokeFunctions);
151    } else {
152        drv->mInvokeFunctions = NULL;
153    }
154
155    exportVarCount = drv->ME->getExportVarCount();
156    if (exportVarCount > 0) {
157        drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
158        drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
159        bccGetExportVarList(drv->mBccScript, exportVarCount,
160                            (void **) drv->mFieldAddress);
161    } else {
162        drv->mFieldAddress = NULL;
163        drv->mFieldIsObject = NULL;
164    }
165
166    objectSlotCount = drv->ME->getObjectSlotCount();
167    if (objectSlotCount > 0) {
168        const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
169        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
170            drv->mFieldIsObject[objectSlotList[ct]] = true;
171        }
172    }
173
174    exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
175    drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
176    if (exportForEachSignatureCount > 0) {
177        drv->mForEachFunctions =
178            (ForEachFunc_t*) calloc(exportForEachSignatureCount,
179                                    sizeof(ForEachFunc_t));
180        bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount,
181                                (void **) drv->mForEachFunctions);
182    } else {
183        drv->mForEachFunctions = NULL;
184    }
185
186    // Copy info over to runtime
187    script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
188    script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
189    script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
190    script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
191    script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
192
193    if (drv->mRootExpand) {
194      script->mHal.info.root = drv->mRootExpand;
195    } else {
196      script->mHal.info.root = drv->mRoot;
197    }
198
199    pthread_mutex_unlock(&rsdgInitMutex);
200    return true;
201
202error:
203
204    pthread_mutex_unlock(&rsdgInitMutex);
205    if (drv->ME) {
206        delete drv->ME;
207        drv->ME = NULL;
208    }
209    free(drv);
210    return false;
211
212}
213
214typedef struct {
215    Context *rsc;
216    Script *script;
217    ForEachFunc_t kernel;
218    uint32_t sig;
219    const Allocation * ain;
220    Allocation * aout;
221    const void * usr;
222    size_t usrLen;
223
224    uint32_t mSliceSize;
225    volatile int mSliceNum;
226
227    const uint8_t *ptrIn;
228    uint32_t eStrideIn;
229    uint8_t *ptrOut;
230    uint32_t eStrideOut;
231
232    uint32_t yStrideIn;
233    uint32_t yStrideOut;
234
235    uint32_t xStart;
236    uint32_t xEnd;
237    uint32_t yStart;
238    uint32_t yEnd;
239    uint32_t zStart;
240    uint32_t zEnd;
241    uint32_t arrayStart;
242    uint32_t arrayEnd;
243
244    uint32_t dimX;
245    uint32_t dimY;
246    uint32_t dimZ;
247    uint32_t dimArray;
248} MTLaunchStruct;
249typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
250
251static void wc_xy(void *usr, uint32_t idx) {
252    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
253    RsForEachStubParamStruct p;
254    memset(&p, 0, sizeof(p));
255    p.usr = mtls->usr;
256    p.usr_len = mtls->usrLen;
257    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
258    uint32_t sig = mtls->sig;
259
260    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
261    while (1) {
262        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
263        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
264        uint32_t yEnd = yStart + mtls->mSliceSize;
265        yEnd = rsMin(yEnd, mtls->yEnd);
266        if (yEnd <= yStart) {
267            return;
268        }
269
270        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
271        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
272        for (p.y = yStart; p.y < yEnd; p.y++) {
273            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
274            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
275            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
276        }
277    }
278}
279
280static void wc_x(void *usr, uint32_t idx) {
281    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
282    RsForEachStubParamStruct p;
283    memset(&p, 0, sizeof(p));
284    p.usr = mtls->usr;
285    p.usr_len = mtls->usrLen;
286    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
287    uint32_t sig = mtls->sig;
288
289    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
290    while (1) {
291        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
292        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
293        uint32_t xEnd = xStart + mtls->mSliceSize;
294        xEnd = rsMin(xEnd, mtls->xEnd);
295        if (xEnd <= xStart) {
296            return;
297        }
298
299        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
300        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
301
302        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
303        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
304        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
305    }
306}
307
308void rsdScriptInvokeForEach(const Context *rsc,
309                            Script *s,
310                            uint32_t slot,
311                            const Allocation * ain,
312                            Allocation * aout,
313                            const void * usr,
314                            uint32_t usrLen,
315                            const RsScriptCall *sc) {
316
317    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
318
319    MTLaunchStruct mtls;
320    memset(&mtls, 0, sizeof(mtls));
321
322    //ALOGE("for each script %p  in %p   out %p", s, ain, aout);
323
324    DrvScript *drv = (DrvScript *)s->mHal.drv;
325    mtls.kernel = drv->mForEachFunctions[slot];
326    rsAssert(mtls.kernel != NULL);
327    mtls.sig = 0x1f;  // temp fix for old apps, full table in slang_rs_export_foreach.cpp
328    if (drv->mExportForEachSignatureList) {
329        mtls.sig = drv->mExportForEachSignatureList[slot];
330    }
331    if (ain) {
332        mtls.dimX = ain->getType()->getDimX();
333        mtls.dimY = ain->getType()->getDimY();
334        mtls.dimZ = ain->getType()->getDimZ();
335        //mtls.dimArray = ain->getType()->getDimArray();
336    } else if (aout) {
337        mtls.dimX = aout->getType()->getDimX();
338        mtls.dimY = aout->getType()->getDimY();
339        mtls.dimZ = aout->getType()->getDimZ();
340        //mtls.dimArray = aout->getType()->getDimArray();
341    } else {
342        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
343        return;
344    }
345
346    if (!sc || (sc->xEnd == 0)) {
347        mtls.xEnd = mtls.dimX;
348    } else {
349        rsAssert(sc->xStart < mtls.dimX);
350        rsAssert(sc->xEnd <= mtls.dimX);
351        rsAssert(sc->xStart < sc->xEnd);
352        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
353        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
354        if (mtls.xStart >= mtls.xEnd) return;
355    }
356
357    if (!sc || (sc->yEnd == 0)) {
358        mtls.yEnd = mtls.dimY;
359    } else {
360        rsAssert(sc->yStart < mtls.dimY);
361        rsAssert(sc->yEnd <= mtls.dimY);
362        rsAssert(sc->yStart < sc->yEnd);
363        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
364        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
365        if (mtls.yStart >= mtls.yEnd) return;
366    }
367
368    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
369    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
370    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
371    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
372
373    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
374
375    Context *mrsc = (Context *)rsc;
376    Script * oldTLS = setTLS(s);
377
378    mtls.rsc = mrsc;
379    mtls.ain = ain;
380    mtls.aout = aout;
381    mtls.script = s;
382    mtls.usr = usr;
383    mtls.usrLen = usrLen;
384    mtls.mSliceSize = 10;
385    mtls.mSliceNum = 0;
386
387    mtls.ptrIn = NULL;
388    mtls.eStrideIn = 0;
389    if (ain) {
390        mtls.ptrIn = (const uint8_t *)ain->getPtr();
391        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
392        mtls.yStrideIn = ain->mHal.drvState.stride;
393    }
394
395    mtls.ptrOut = NULL;
396    mtls.eStrideOut = 0;
397    if (aout) {
398        mtls.ptrOut = (uint8_t *)aout->getPtr();
399        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
400        mtls.yStrideOut = aout->mHal.drvState.stride;
401    }
402
403    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) {
404        dc->mInForEach = true;
405        if (mtls.dimY > 1) {
406            mtls.mSliceSize = mtls.dimY / (dc->mWorkers.mCount * 4);
407            if(mtls.mSliceSize < 1) {
408                mtls.mSliceSize = 1;
409            }
410
411            rsdLaunchThreads(mrsc, wc_xy, &mtls);
412        } else {
413            mtls.mSliceSize = mtls.dimX / (dc->mWorkers.mCount * 4);
414            if(mtls.mSliceSize < 1) {
415                mtls.mSliceSize = 1;
416            }
417
418            rsdLaunchThreads(mrsc, wc_x, &mtls);
419        }
420        dc->mInForEach = false;
421
422        //ALOGE("launch 1");
423    } else {
424        RsForEachStubParamStruct p;
425        memset(&p, 0, sizeof(p));
426        p.usr = mtls.usr;
427        p.usr_len = mtls.usrLen;
428        uint32_t sig = mtls.sig;
429
430        //ALOGE("launch 3");
431        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
432        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
433            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
434                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
435                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
436                                      mtls.dimX * mtls.dimY * p.z +
437                                      mtls.dimX * p.y;
438                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
439                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
440                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
441                       mtls.eStrideOut);
442                }
443            }
444        }
445    }
446
447    setTLS(oldTLS);
448}
449
450
451int rsdScriptInvokeRoot(const Context *dc, Script *script) {
452    DrvScript *drv = (DrvScript *)script->mHal.drv;
453
454    Script * oldTLS = setTLS(script);
455    int ret = drv->mRoot();
456    setTLS(oldTLS);
457
458    return ret;
459}
460
461void rsdScriptInvokeInit(const Context *dc, Script *script) {
462    DrvScript *drv = (DrvScript *)script->mHal.drv;
463
464    if (drv->mInit) {
465        drv->mInit();
466    }
467}
468
469void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
470    DrvScript *drv = (DrvScript *)script->mHal.drv;
471
472    if (drv->mFreeChildren) {
473        drv->mFreeChildren();
474    }
475}
476
477void rsdScriptInvokeFunction(const Context *dc, Script *script,
478                            uint32_t slot,
479                            const void *params,
480                            size_t paramLength) {
481    DrvScript *drv = (DrvScript *)script->mHal.drv;
482    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
483
484    Script * oldTLS = setTLS(script);
485    ((void (*)(const void *, uint32_t))
486        drv->mInvokeFunctions[slot])(params, paramLength);
487    setTLS(oldTLS);
488}
489
490void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
491                           uint32_t slot, void *data, size_t dataLength) {
492    DrvScript *drv = (DrvScript *)script->mHal.drv;
493    //rsAssert(!script->mFieldIsObject[slot]);
494    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
495
496    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
497    if (!destPtr) {
498        //ALOGV("Calling setVar on slot = %i which is null", slot);
499        return;
500    }
501
502    memcpy(destPtr, data, dataLength);
503}
504
505void rsdScriptSetGlobalVarWithElemDims(
506        const android::renderscript::Context *dc,
507        const android::renderscript::Script *script,
508        uint32_t slot, void *data, size_t dataLength,
509        const android::renderscript::Element *elem,
510        const size_t *dims, size_t dimLength) {
511    DrvScript *drv = (DrvScript *)script->mHal.drv;
512
513    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
514    if (!destPtr) {
515        //ALOGV("Calling setVar on slot = %i which is null", slot);
516        return;
517    }
518
519    // We want to look at dimension in terms of integer components,
520    // but dimLength is given in terms of bytes.
521    dimLength /= sizeof(int);
522
523    // Only a single dimension is currently supported.
524    rsAssert(dimLength == 1);
525    if (dimLength == 1) {
526        // First do the increment loop.
527        size_t stride = elem->getSizeBytes();
528        char *cVal = reinterpret_cast<char *>(data);
529        for (size_t i = 0; i < dims[0]; i++) {
530            elem->incRefs(cVal);
531            cVal += stride;
532        }
533
534        // Decrement loop comes after (to prevent race conditions).
535        char *oldVal = reinterpret_cast<char *>(destPtr);
536        for (size_t i = 0; i < dims[0]; i++) {
537            elem->decRefs(oldVal);
538            oldVal += stride;
539        }
540    }
541
542    memcpy(destPtr, data, dataLength);
543}
544
545void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
546    DrvScript *drv = (DrvScript *)script->mHal.drv;
547    //rsAssert(!script->mFieldIsObject[slot]);
548    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
549
550    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
551    if (!destPtr) {
552        //ALOGV("Calling setVar on slot = %i which is null", slot);
553        return;
554    }
555
556    memcpy(destPtr, &data, sizeof(void *));
557}
558
559void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
560    DrvScript *drv = (DrvScript *)script->mHal.drv;
561    //rsAssert(script->mFieldIsObject[slot]);
562    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
563
564    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
565    if (!destPtr) {
566        //ALOGV("Calling setVar on slot = %i which is null", slot);
567        return;
568    }
569
570    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
571}
572
573void rsdScriptDestroy(const Context *dc, Script *script) {
574    DrvScript *drv = (DrvScript *)script->mHal.drv;
575
576    if (drv->mFieldAddress) {
577        size_t exportVarCount = drv->ME->getExportVarCount();
578        for (size_t ct = 0; ct < exportVarCount; ct++) {
579            if (drv->mFieldIsObject[ct]) {
580                // The field address can be NULL if the script-side has
581                // optimized the corresponding global variable away.
582                if (drv->mFieldAddress[ct]) {
583                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
584                }
585            }
586        }
587        free(drv->mFieldAddress);
588        drv->mFieldAddress = NULL;
589        free(drv->mFieldIsObject);
590        drv->mFieldIsObject = NULL;    }
591
592    if (drv->mInvokeFunctions) {
593        free(drv->mInvokeFunctions);
594        drv->mInvokeFunctions = NULL;
595    }
596
597    if (drv->mForEachFunctions) {
598        free(drv->mForEachFunctions);
599        drv->mForEachFunctions = NULL;
600    }
601
602    delete drv->ME;
603    drv->ME = NULL;
604
605    free(drv);
606    script->mHal.drv = NULL;
607
608}
609
610
611