rsdBcc.cpp revision 298691a87d1b30d1221e303a6788d5272c223971
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20
21#include <bcinfo/MetadataExtractor.h>
22
23#include "rsContext.h"
24#include "rsElement.h"
25#include "rsScriptC.h"
26
27#include "utils/Timers.h"
28#include "utils/StopWatch.h"
29
30using namespace android;
31using namespace android::renderscript;
32
33struct DrvScript {
34    int (*mRoot)();
35    int (*mRootExpand)();
36    void (*mInit)();
37    void (*mFreeChildren)();
38
39    BCCScriptRef mBccScript;
40
41    bcinfo::MetadataExtractor *ME;
42
43    InvokeFunc_t *mInvokeFunctions;
44    ForEachFunc_t *mForEachFunctions;
45    void ** mFieldAddress;
46    bool * mFieldIsObject;
47    const uint32_t *mExportForEachSignatureList;
48
49    const uint8_t * mScriptText;
50    uint32_t mScriptTextLength;
51};
52
53typedef void (*outer_foreach_t)(
54    const android::renderscript::RsForEachStubParamStruct *,
55    uint32_t x1, uint32_t x2,
56    uint32_t instep, uint32_t outstep);
57
58static Script * setTLS(Script *sc) {
59    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
60    rsAssert(tls);
61    Script *old = tls->mScript;
62    tls->mScript = sc;
63    return old;
64}
65
66
67bool rsdScriptInit(const Context *rsc,
68                     ScriptC *script,
69                     char const *resName,
70                     char const *cacheDir,
71                     uint8_t const *bitcode,
72                     size_t bitcodeSize,
73                     uint32_t flags) {
74    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
75
76    pthread_mutex_lock(&rsdgInitMutex);
77
78    size_t exportFuncCount = 0;
79    size_t exportVarCount = 0;
80    size_t objectSlotCount = 0;
81    size_t exportForEachSignatureCount = 0;
82
83    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
84    if (drv == NULL) {
85        goto error;
86    }
87    script->mHal.drv = drv;
88
89    drv->mBccScript = bccCreateScript();
90    script->mHal.info.isThreadable = true;
91    drv->mScriptText = bitcode;
92    drv->mScriptTextLength = bitcodeSize;
93
94
95    drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
96                                            drv->mScriptTextLength);
97    if (!drv->ME->extract()) {
98      ALOGE("bcinfo: failed to read script metadata");
99      goto error;
100    }
101
102    //ALOGE("mBccScript %p", script->mBccScript);
103
104    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
105        ALOGE("bcc: FAILS to register symbol callback");
106        goto error;
107    }
108
109    if (bccReadBC(drv->mBccScript,
110                  resName,
111                  (char const *)drv->mScriptText,
112                  drv->mScriptTextLength, 0) != 0) {
113        ALOGE("bcc: FAILS to read bitcode");
114        goto error;
115    }
116
117    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
118        ALOGE("bcc: FAILS to link bitcode");
119        goto error;
120    }
121
122    if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
123        ALOGE("bcc: FAILS to prepare executable");
124        goto error;
125    }
126
127    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
128    drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand"));
129    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
130    drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor"));
131
132    exportFuncCount = drv->ME->getExportFuncCount();
133    if (exportFuncCount > 0) {
134        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
135                                                       sizeof(InvokeFunc_t));
136        bccGetExportFuncList(drv->mBccScript, exportFuncCount,
137                             (void **) drv->mInvokeFunctions);
138    } else {
139        drv->mInvokeFunctions = NULL;
140    }
141
142    exportVarCount = drv->ME->getExportVarCount();
143    if (exportVarCount > 0) {
144        drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
145        drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
146        bccGetExportVarList(drv->mBccScript, exportVarCount,
147                            (void **) drv->mFieldAddress);
148    } else {
149        drv->mFieldAddress = NULL;
150        drv->mFieldIsObject = NULL;
151    }
152
153    objectSlotCount = drv->ME->getObjectSlotCount();
154    if (objectSlotCount > 0) {
155        const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
156        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
157            drv->mFieldIsObject[objectSlotList[ct]] = true;
158        }
159    }
160
161    exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
162    drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
163    if (exportForEachSignatureCount > 0) {
164        drv->mForEachFunctions =
165            (ForEachFunc_t*) calloc(exportForEachSignatureCount,
166                                    sizeof(ForEachFunc_t));
167        bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount,
168                                (void **) drv->mForEachFunctions);
169    } else {
170        drv->mForEachFunctions = NULL;
171    }
172
173    // Copy info over to runtime
174    script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
175    script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
176    script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
177    script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
178    script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
179
180    if (drv->mRootExpand) {
181      script->mHal.info.root = drv->mRootExpand;
182    } else {
183      script->mHal.info.root = drv->mRoot;
184    }
185
186    pthread_mutex_unlock(&rsdgInitMutex);
187    return true;
188
189error:
190
191    pthread_mutex_unlock(&rsdgInitMutex);
192    if (drv->ME) {
193        delete drv->ME;
194        drv->ME = NULL;
195    }
196    free(drv);
197    return false;
198
199}
200
201typedef struct {
202    Context *rsc;
203    Script *script;
204    ForEachFunc_t kernel;
205    uint32_t sig;
206    const Allocation * ain;
207    Allocation * aout;
208    const void * usr;
209    size_t usrLen;
210
211    uint32_t mSliceSize;
212    volatile int mSliceNum;
213
214    const uint8_t *ptrIn;
215    uint32_t eStrideIn;
216    uint8_t *ptrOut;
217    uint32_t eStrideOut;
218
219    uint32_t yStrideIn;
220    uint32_t yStrideOut;
221
222    uint32_t xStart;
223    uint32_t xEnd;
224    uint32_t yStart;
225    uint32_t yEnd;
226    uint32_t zStart;
227    uint32_t zEnd;
228    uint32_t arrayStart;
229    uint32_t arrayEnd;
230
231    uint32_t dimX;
232    uint32_t dimY;
233    uint32_t dimZ;
234    uint32_t dimArray;
235} MTLaunchStruct;
236typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
237
238static void wc_xy(void *usr, uint32_t idx) {
239    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
240    RsForEachStubParamStruct p;
241    memset(&p, 0, sizeof(p));
242    p.usr = mtls->usr;
243    p.usr_len = mtls->usrLen;
244    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
245    uint32_t sig = mtls->sig;
246
247    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
248    while (1) {
249        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
250        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
251        uint32_t yEnd = yStart + mtls->mSliceSize;
252        yEnd = rsMin(yEnd, mtls->yEnd);
253        if (yEnd <= yStart) {
254            return;
255        }
256
257        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
258        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
259        for (p.y = yStart; p.y < yEnd; p.y++) {
260            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
261            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
262            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
263        }
264    }
265}
266
267static void wc_x(void *usr, uint32_t idx) {
268    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
269    RsForEachStubParamStruct p;
270    memset(&p, 0, sizeof(p));
271    p.usr = mtls->usr;
272    p.usr_len = mtls->usrLen;
273    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
274    uint32_t sig = mtls->sig;
275
276    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
277    while (1) {
278        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
279        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
280        uint32_t xEnd = xStart + mtls->mSliceSize;
281        xEnd = rsMin(xEnd, mtls->xEnd);
282        if (xEnd <= xStart) {
283            return;
284        }
285
286        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
287        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
288
289        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
290        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
291        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
292    }
293}
294
295void rsdScriptInvokeForEach(const Context *rsc,
296                            Script *s,
297                            uint32_t slot,
298                            const Allocation * ain,
299                            Allocation * aout,
300                            const void * usr,
301                            uint32_t usrLen,
302                            const RsScriptCall *sc) {
303
304    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
305
306    MTLaunchStruct mtls;
307    memset(&mtls, 0, sizeof(mtls));
308
309    DrvScript *drv = (DrvScript *)s->mHal.drv;
310    mtls.kernel = drv->mForEachFunctions[slot];
311    rsAssert(mtls.kernel != NULL);
312    mtls.sig = 0x1f;  // temp fix for old apps, full table in slang_rs_export_foreach.cpp
313    if (drv->mExportForEachSignatureList) {
314        mtls.sig = drv->mExportForEachSignatureList[slot];
315    }
316    if (ain) {
317        mtls.dimX = ain->getType()->getDimX();
318        mtls.dimY = ain->getType()->getDimY();
319        mtls.dimZ = ain->getType()->getDimZ();
320        //mtls.dimArray = ain->getType()->getDimArray();
321    } else if (aout) {
322        mtls.dimX = aout->getType()->getDimX();
323        mtls.dimY = aout->getType()->getDimY();
324        mtls.dimZ = aout->getType()->getDimZ();
325        //mtls.dimArray = aout->getType()->getDimArray();
326    } else {
327        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
328        return;
329    }
330
331    if (!sc || (sc->xEnd == 0)) {
332        mtls.xEnd = mtls.dimX;
333    } else {
334        rsAssert(sc->xStart < mtls.dimX);
335        rsAssert(sc->xEnd <= mtls.dimX);
336        rsAssert(sc->xStart < sc->xEnd);
337        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
338        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
339        if (mtls.xStart >= mtls.xEnd) return;
340    }
341
342    if (!sc || (sc->yEnd == 0)) {
343        mtls.yEnd = mtls.dimY;
344    } else {
345        rsAssert(sc->yStart < mtls.dimY);
346        rsAssert(sc->yEnd <= mtls.dimY);
347        rsAssert(sc->yStart < sc->yEnd);
348        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
349        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
350        if (mtls.yStart >= mtls.yEnd) return;
351    }
352
353    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
354    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
355    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
356    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
357
358    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
359
360    Context *mrsc = (Context *)rsc;
361    Script * oldTLS = setTLS(s);
362
363    mtls.rsc = mrsc;
364    mtls.ain = ain;
365    mtls.aout = aout;
366    mtls.script = s;
367    mtls.usr = usr;
368    mtls.usrLen = usrLen;
369    mtls.mSliceSize = 10;
370    mtls.mSliceNum = 0;
371
372    mtls.ptrIn = NULL;
373    mtls.eStrideIn = 0;
374    if (ain) {
375        mtls.ptrIn = (const uint8_t *)ain->getPtr();
376        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
377        mtls.yStrideIn = ain->mHal.drvState.stride;
378    }
379
380    mtls.ptrOut = NULL;
381    mtls.eStrideOut = 0;
382    if (aout) {
383        mtls.ptrOut = (uint8_t *)aout->getPtr();
384        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
385        mtls.yStrideOut = aout->mHal.drvState.stride;
386    }
387
388    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
389        if (mtls.dimY > 1) {
390            rsdLaunchThreads(mrsc, wc_xy, &mtls);
391        } else {
392            rsdLaunchThreads(mrsc, wc_x, &mtls);
393        }
394
395        //ALOGE("launch 1");
396    } else {
397        RsForEachStubParamStruct p;
398        memset(&p, 0, sizeof(p));
399        p.usr = mtls.usr;
400        p.usr_len = mtls.usrLen;
401        uint32_t sig = mtls.sig;
402
403        //ALOGE("launch 3");
404        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
405        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
406            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
407                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
408                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
409                                      mtls.dimX * mtls.dimY * p.z +
410                                      mtls.dimX * p.y;
411                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
412                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
413                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
414                       mtls.eStrideOut);
415                }
416            }
417        }
418    }
419
420    setTLS(oldTLS);
421}
422
423
424int rsdScriptInvokeRoot(const Context *dc, Script *script) {
425    DrvScript *drv = (DrvScript *)script->mHal.drv;
426
427    Script * oldTLS = setTLS(script);
428    int ret = drv->mRoot();
429    setTLS(oldTLS);
430
431    return ret;
432}
433
434void rsdScriptInvokeInit(const Context *dc, Script *script) {
435    DrvScript *drv = (DrvScript *)script->mHal.drv;
436
437    if (drv->mInit) {
438        drv->mInit();
439    }
440}
441
442void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
443    DrvScript *drv = (DrvScript *)script->mHal.drv;
444
445    if (drv->mFreeChildren) {
446        drv->mFreeChildren();
447    }
448}
449
450void rsdScriptInvokeFunction(const Context *dc, Script *script,
451                            uint32_t slot,
452                            const void *params,
453                            size_t paramLength) {
454    DrvScript *drv = (DrvScript *)script->mHal.drv;
455    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
456
457    Script * oldTLS = setTLS(script);
458    ((void (*)(const void *, uint32_t))
459        drv->mInvokeFunctions[slot])(params, paramLength);
460    setTLS(oldTLS);
461}
462
463void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
464                           uint32_t slot, void *data, size_t dataLength) {
465    DrvScript *drv = (DrvScript *)script->mHal.drv;
466    //rsAssert(!script->mFieldIsObject[slot]);
467    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
468
469    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
470    if (!destPtr) {
471        //ALOGV("Calling setVar on slot = %i which is null", slot);
472        return;
473    }
474
475    memcpy(destPtr, data, dataLength);
476}
477
478void rsdScriptSetGlobalVarWithElemDims(
479        const android::renderscript::Context *dc,
480        const android::renderscript::Script *script,
481        uint32_t slot, void *data, size_t dataLength,
482        const android::renderscript::Element *elem,
483        const size_t *dims, size_t dimLength) {
484    DrvScript *drv = (DrvScript *)script->mHal.drv;
485
486    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
487    if (!destPtr) {
488        //ALOGV("Calling setVar on slot = %i which is null", slot);
489        return;
490    }
491
492    // We want to look at dimension in terms of integer components,
493    // but dimLength is given in terms of bytes.
494    dimLength /= sizeof(int);
495
496    // Only a single dimension is currently supported.
497    rsAssert(dimLength == 1);
498    if (dimLength == 1) {
499        // First do the increment loop.
500        size_t stride = elem->getSizeBytes();
501        char *cVal = reinterpret_cast<char *>(data);
502        for (size_t i = 0; i < dims[0]; i++) {
503            elem->incRefs(cVal);
504            cVal += stride;
505        }
506
507        // Decrement loop comes after (to prevent race conditions).
508        char *oldVal = reinterpret_cast<char *>(destPtr);
509        for (size_t i = 0; i < dims[0]; i++) {
510            elem->decRefs(oldVal);
511            oldVal += stride;
512        }
513    }
514
515    memcpy(destPtr, data, dataLength);
516}
517
518void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
519    DrvScript *drv = (DrvScript *)script->mHal.drv;
520    //rsAssert(!script->mFieldIsObject[slot]);
521    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
522
523    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
524    if (!destPtr) {
525        //ALOGV("Calling setVar on slot = %i which is null", slot);
526        return;
527    }
528
529    memcpy(destPtr, &data, sizeof(void *));
530}
531
532void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
533    DrvScript *drv = (DrvScript *)script->mHal.drv;
534    //rsAssert(script->mFieldIsObject[slot]);
535    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
536
537    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
538    if (!destPtr) {
539        //ALOGV("Calling setVar on slot = %i which is null", slot);
540        return;
541    }
542
543    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
544}
545
546void rsdScriptDestroy(const Context *dc, Script *script) {
547    DrvScript *drv = (DrvScript *)script->mHal.drv;
548
549    if (drv->mFieldAddress) {
550        size_t exportVarCount = drv->ME->getExportVarCount();
551        for (size_t ct = 0; ct < exportVarCount; ct++) {
552            if (drv->mFieldIsObject[ct]) {
553                // The field address can be NULL if the script-side has
554                // optimized the corresponding global variable away.
555                if (drv->mFieldAddress[ct]) {
556                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
557                }
558            }
559        }
560        free(drv->mFieldAddress);
561        drv->mFieldAddress = NULL;
562        free(drv->mFieldIsObject);
563        drv->mFieldIsObject = NULL;    }
564
565    if (drv->mInvokeFunctions) {
566        free(drv->mInvokeFunctions);
567        drv->mInvokeFunctions = NULL;
568    }
569
570    if (drv->mForEachFunctions) {
571        free(drv->mForEachFunctions);
572        drv->mForEachFunctions = NULL;
573    }
574
575    delete drv->ME;
576    drv->ME = NULL;
577
578    free(drv);
579    script->mHal.drv = NULL;
580
581}
582
583
584