rsdBcc.cpp revision f22c8ace148b69847aaf5ad1829e9ec95a44df6c
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20
21#include <bcinfo/MetadataExtractor.h>
22
23#include "rsContext.h"
24#include "rsScriptC.h"
25
26#include "utils/Timers.h"
27#include "utils/StopWatch.h"
28
29using namespace android;
30using namespace android::renderscript;
31
32struct DrvScript {
33    int (*mRoot)();
34    int (*mRootExpand)();
35    void (*mInit)();
36    void (*mFreeChildren)();
37
38    BCCScriptRef mBccScript;
39
40    bcinfo::MetadataExtractor *ME;
41
42    InvokeFunc_t *mInvokeFunctions;
43    ForEachFunc_t *mForEachFunctions;
44    void ** mFieldAddress;
45    bool * mFieldIsObject;
46    const uint32_t *mExportForEachSignatureList;
47
48    const uint8_t * mScriptText;
49    uint32_t mScriptTextLength;
50};
51
52typedef void (*outer_foreach_t)(
53    const android::renderscript::RsForEachStubParamStruct *,
54    uint32_t x1, uint32_t x2,
55    uint32_t instep, uint32_t outstep);
56
57static Script * setTLS(Script *sc) {
58    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
59    rsAssert(tls);
60    Script *old = tls->mScript;
61    tls->mScript = sc;
62    return old;
63}
64
65
66bool rsdScriptInit(const Context *rsc,
67                     ScriptC *script,
68                     char const *resName,
69                     char const *cacheDir,
70                     uint8_t const *bitcode,
71                     size_t bitcodeSize,
72                     uint32_t flags) {
73    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
74
75    pthread_mutex_lock(&rsdgInitMutex);
76
77    size_t exportFuncCount = 0;
78    size_t exportVarCount = 0;
79    size_t objectSlotCount = 0;
80    size_t exportForEachSignatureCount = 0;
81
82    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
83    if (drv == NULL) {
84        goto error;
85    }
86    script->mHal.drv = drv;
87
88    drv->mBccScript = bccCreateScript();
89    script->mHal.info.isThreadable = true;
90    drv->mScriptText = bitcode;
91    drv->mScriptTextLength = bitcodeSize;
92
93
94    drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
95                                            drv->mScriptTextLength);
96    if (!drv->ME->extract()) {
97      ALOGE("bcinfo: failed to read script metadata");
98      goto error;
99    }
100
101    //ALOGE("mBccScript %p", script->mBccScript);
102
103    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
104        ALOGE("bcc: FAILS to register symbol callback");
105        goto error;
106    }
107
108    if (bccReadBC(drv->mBccScript,
109                  resName,
110                  (char const *)drv->mScriptText,
111                  drv->mScriptTextLength, 0) != 0) {
112        ALOGE("bcc: FAILS to read bitcode");
113        goto error;
114    }
115
116    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
117        ALOGE("bcc: FAILS to link bitcode");
118        goto error;
119    }
120
121    if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
122        ALOGE("bcc: FAILS to prepare executable");
123        goto error;
124    }
125
126    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
127    drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand"));
128    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
129    drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor"));
130
131    exportFuncCount = drv->ME->getExportFuncCount();
132    if (exportFuncCount > 0) {
133        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
134                                                       sizeof(InvokeFunc_t));
135        bccGetExportFuncList(drv->mBccScript, exportFuncCount,
136                             (void **) drv->mInvokeFunctions);
137    } else {
138        drv->mInvokeFunctions = NULL;
139    }
140
141    exportVarCount = drv->ME->getExportVarCount();
142    if (exportVarCount > 0) {
143        drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
144        drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
145        bccGetExportVarList(drv->mBccScript, exportVarCount,
146                            (void **) drv->mFieldAddress);
147    } else {
148        drv->mFieldAddress = NULL;
149        drv->mFieldIsObject = NULL;
150    }
151
152    objectSlotCount = drv->ME->getObjectSlotCount();
153    if (objectSlotCount > 0) {
154        const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
155        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
156            drv->mFieldIsObject[objectSlotList[ct]] = true;
157        }
158    }
159
160    exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
161    drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
162    if (exportForEachSignatureCount > 0) {
163        drv->mForEachFunctions =
164            (ForEachFunc_t*) calloc(exportForEachSignatureCount,
165                                    sizeof(ForEachFunc_t));
166        bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount,
167                                (void **) drv->mForEachFunctions);
168    } else {
169        drv->mForEachFunctions = NULL;
170    }
171
172    // Copy info over to runtime
173    script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
174    script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
175    script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
176    script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
177    script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
178
179    if (drv->mRootExpand) {
180      script->mHal.info.root = drv->mRootExpand;
181    } else {
182      script->mHal.info.root = drv->mRoot;
183    }
184
185    pthread_mutex_unlock(&rsdgInitMutex);
186    return true;
187
188error:
189
190    pthread_mutex_unlock(&rsdgInitMutex);
191    if (drv->ME) {
192        delete drv->ME;
193        drv->ME = NULL;
194    }
195    free(drv);
196    return false;
197
198}
199
200typedef struct {
201    Context *rsc;
202    Script *script;
203    ForEachFunc_t kernel;
204    uint32_t sig;
205    const Allocation * ain;
206    Allocation * aout;
207    const void * usr;
208    size_t usrLen;
209
210    uint32_t mSliceSize;
211    volatile int mSliceNum;
212
213    const uint8_t *ptrIn;
214    uint32_t eStrideIn;
215    uint8_t *ptrOut;
216    uint32_t eStrideOut;
217
218    uint32_t yStrideIn;
219    uint32_t yStrideOut;
220
221    uint32_t xStart;
222    uint32_t xEnd;
223    uint32_t yStart;
224    uint32_t yEnd;
225    uint32_t zStart;
226    uint32_t zEnd;
227    uint32_t arrayStart;
228    uint32_t arrayEnd;
229
230    uint32_t dimX;
231    uint32_t dimY;
232    uint32_t dimZ;
233    uint32_t dimArray;
234} MTLaunchStruct;
235typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
236
237static void wc_xy(void *usr, uint32_t idx) {
238    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
239    RsForEachStubParamStruct p;
240    memset(&p, 0, sizeof(p));
241    p.usr = mtls->usr;
242    p.usr_len = mtls->usrLen;
243    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
244    uint32_t sig = mtls->sig;
245
246    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
247    while (1) {
248        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
249        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
250        uint32_t yEnd = yStart + mtls->mSliceSize;
251        yEnd = rsMin(yEnd, mtls->yEnd);
252        if (yEnd <= yStart) {
253            return;
254        }
255
256        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
257        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
258        for (p.y = yStart; p.y < yEnd; p.y++) {
259            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
260            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
261            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
262        }
263    }
264}
265
266static void wc_x(void *usr, uint32_t idx) {
267    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
268    RsForEachStubParamStruct p;
269    memset(&p, 0, sizeof(p));
270    p.usr = mtls->usr;
271    p.usr_len = mtls->usrLen;
272    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
273    uint32_t sig = mtls->sig;
274
275    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
276    while (1) {
277        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
278        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
279        uint32_t xEnd = xStart + mtls->mSliceSize;
280        xEnd = rsMin(xEnd, mtls->xEnd);
281        if (xEnd <= xStart) {
282            return;
283        }
284
285        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
286        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
287
288        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
289        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
290        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
291    }
292}
293
294void rsdScriptInvokeForEach(const Context *rsc,
295                            Script *s,
296                            uint32_t slot,
297                            const Allocation * ain,
298                            Allocation * aout,
299                            const void * usr,
300                            uint32_t usrLen,
301                            const RsScriptCall *sc) {
302
303    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
304
305    MTLaunchStruct mtls;
306    memset(&mtls, 0, sizeof(mtls));
307
308    DrvScript *drv = (DrvScript *)s->mHal.drv;
309    mtls.kernel = drv->mForEachFunctions[slot];
310    rsAssert(mtls.kernel != NULL);
311    mtls.sig = 0x1f;  // temp fix for old apps, full table in slang_rs_export_foreach.cpp
312    if (drv->mExportForEachSignatureList) {
313        mtls.sig = drv->mExportForEachSignatureList[slot];
314    }
315    if (ain) {
316        mtls.dimX = ain->getType()->getDimX();
317        mtls.dimY = ain->getType()->getDimY();
318        mtls.dimZ = ain->getType()->getDimZ();
319        //mtls.dimArray = ain->getType()->getDimArray();
320    } else if (aout) {
321        mtls.dimX = aout->getType()->getDimX();
322        mtls.dimY = aout->getType()->getDimY();
323        mtls.dimZ = aout->getType()->getDimZ();
324        //mtls.dimArray = aout->getType()->getDimArray();
325    } else {
326        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
327        return;
328    }
329
330    if (!sc || (sc->xEnd == 0)) {
331        mtls.xEnd = mtls.dimX;
332    } else {
333        rsAssert(sc->xStart < mtls.dimX);
334        rsAssert(sc->xEnd <= mtls.dimX);
335        rsAssert(sc->xStart < sc->xEnd);
336        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
337        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
338        if (mtls.xStart >= mtls.xEnd) return;
339    }
340
341    if (!sc || (sc->yEnd == 0)) {
342        mtls.yEnd = mtls.dimY;
343    } else {
344        rsAssert(sc->yStart < mtls.dimY);
345        rsAssert(sc->yEnd <= mtls.dimY);
346        rsAssert(sc->yStart < sc->yEnd);
347        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
348        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
349        if (mtls.yStart >= mtls.yEnd) return;
350    }
351
352    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
353    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
354    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
355    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
356
357    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
358
359    Context *mrsc = (Context *)rsc;
360    Script * oldTLS = setTLS(s);
361
362    mtls.rsc = mrsc;
363    mtls.ain = ain;
364    mtls.aout = aout;
365    mtls.script = s;
366    mtls.usr = usr;
367    mtls.usrLen = usrLen;
368    mtls.mSliceSize = 10;
369    mtls.mSliceNum = 0;
370
371    mtls.ptrIn = NULL;
372    mtls.eStrideIn = 0;
373    if (ain) {
374        mtls.ptrIn = (const uint8_t *)ain->getPtr();
375        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
376        mtls.yStrideIn = ain->mHal.drvState.stride;
377    }
378
379    mtls.ptrOut = NULL;
380    mtls.eStrideOut = 0;
381    if (aout) {
382        mtls.ptrOut = (uint8_t *)aout->getPtr();
383        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
384        mtls.yStrideOut = aout->mHal.drvState.stride;
385    }
386
387    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
388        if (mtls.dimY > 1) {
389            rsdLaunchThreads(mrsc, wc_xy, &mtls);
390        } else {
391            rsdLaunchThreads(mrsc, wc_x, &mtls);
392        }
393
394        //ALOGE("launch 1");
395    } else {
396        RsForEachStubParamStruct p;
397        memset(&p, 0, sizeof(p));
398        p.usr = mtls.usr;
399        p.usr_len = mtls.usrLen;
400        uint32_t sig = mtls.sig;
401
402        //ALOGE("launch 3");
403        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
404        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
405            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
406                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
407                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
408                                      mtls.dimX * mtls.dimY * p.z +
409                                      mtls.dimX * p.y;
410                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
411                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
412                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
413                       mtls.eStrideOut);
414                }
415            }
416        }
417    }
418
419    setTLS(oldTLS);
420}
421
422
423int rsdScriptInvokeRoot(const Context *dc, Script *script) {
424    DrvScript *drv = (DrvScript *)script->mHal.drv;
425
426    Script * oldTLS = setTLS(script);
427    int ret = drv->mRoot();
428    setTLS(oldTLS);
429
430    return ret;
431}
432
433void rsdScriptInvokeInit(const Context *dc, Script *script) {
434    DrvScript *drv = (DrvScript *)script->mHal.drv;
435
436    if (drv->mInit) {
437        drv->mInit();
438    }
439}
440
441void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
442    DrvScript *drv = (DrvScript *)script->mHal.drv;
443
444    if (drv->mFreeChildren) {
445        drv->mFreeChildren();
446    }
447}
448
449void rsdScriptInvokeFunction(const Context *dc, Script *script,
450                            uint32_t slot,
451                            const void *params,
452                            size_t paramLength) {
453    DrvScript *drv = (DrvScript *)script->mHal.drv;
454    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
455
456    Script * oldTLS = setTLS(script);
457    ((void (*)(const void *, uint32_t))
458        drv->mInvokeFunctions[slot])(params, paramLength);
459    setTLS(oldTLS);
460}
461
462void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
463                           uint32_t slot, void *data, size_t dataLength) {
464    DrvScript *drv = (DrvScript *)script->mHal.drv;
465    //rsAssert(!script->mFieldIsObject[slot]);
466    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
467
468    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
469    if (!destPtr) {
470        //ALOGV("Calling setVar on slot = %i which is null", slot);
471        return;
472    }
473
474    memcpy(destPtr, data, dataLength);
475}
476
477void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
478    DrvScript *drv = (DrvScript *)script->mHal.drv;
479    //rsAssert(!script->mFieldIsObject[slot]);
480    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
481
482    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
483    if (!destPtr) {
484        //ALOGV("Calling setVar on slot = %i which is null", slot);
485        return;
486    }
487
488    memcpy(destPtr, &data, sizeof(void *));
489}
490
491void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
492    DrvScript *drv = (DrvScript *)script->mHal.drv;
493    //rsAssert(script->mFieldIsObject[slot]);
494    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
495
496    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
497    if (!destPtr) {
498        //ALOGV("Calling setVar on slot = %i which is null", slot);
499        return;
500    }
501
502    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
503}
504
505void rsdScriptDestroy(const Context *dc, Script *script) {
506    DrvScript *drv = (DrvScript *)script->mHal.drv;
507
508    if (drv->mFieldAddress) {
509        size_t exportVarCount = drv->ME->getExportVarCount();
510        for (size_t ct = 0; ct < exportVarCount; ct++) {
511            if (drv->mFieldIsObject[ct]) {
512                // The field address can be NULL if the script-side has
513                // optimized the corresponding global variable away.
514                if (drv->mFieldAddress[ct]) {
515                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
516                }
517            }
518        }
519        free(drv->mFieldAddress);
520        drv->mFieldAddress = NULL;
521        free(drv->mFieldIsObject);
522        drv->mFieldIsObject = NULL;    }
523
524    if (drv->mInvokeFunctions) {
525        free(drv->mInvokeFunctions);
526        drv->mInvokeFunctions = NULL;
527    }
528
529    if (drv->mForEachFunctions) {
530        free(drv->mForEachFunctions);
531        drv->mForEachFunctions = NULL;
532    }
533
534    delete drv->ME;
535    drv->ME = NULL;
536
537    free(drv);
538    script->mHal.drv = NULL;
539
540}
541
542
543