rsdBcc.cpp revision 3815badf95a7dca8aa278e3e12f07a3924a82319
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsdCore.h"
19#include "rsdBcc.h"
20#include "rsdRuntime.h"
21
22#include <bcinfo/MetadataExtractor.h>
23
24#include "rsContext.h"
25#include "rsScriptC.h"
26
27#include "utils/Timers.h"
28#include "utils/StopWatch.h"
29extern "C" {
30#include "libdex/ZipArchive.h"
31}
32
33
34using namespace android;
35using namespace android::renderscript;
36
37struct DrvScript {
38    int (*mRoot)();
39    void (*mInit)();
40
41    BCCScriptRef mBccScript;
42
43    bcinfo::MetadataExtractor *ME;
44
45    InvokeFunc_t *mInvokeFunctions;
46    void ** mFieldAddress;
47    bool * mFieldIsObject;
48    const uint32_t *mExportForEachSignatureList;
49
50    const uint8_t * mScriptText;
51    uint32_t mScriptTextLength;
52};
53
54
55static Script * setTLS(Script *sc) {
56    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
57    rsAssert(tls);
58    Script *old = tls->mScript;
59    tls->mScript = sc;
60    return old;
61}
62
63
64bool rsdScriptInit(const Context *rsc,
65                     ScriptC *script,
66                     char const *resName,
67                     char const *cacheDir,
68                     uint8_t const *bitcode,
69                     size_t bitcodeSize,
70                     uint32_t flags) {
71    //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
72
73    pthread_mutex_lock(&rsdgInitMutex);
74    char *cachePath = NULL;
75    size_t exportFuncCount = 0;
76    size_t exportVarCount = 0;
77    size_t objectSlotCount = 0;
78    size_t exportForEachSignatureCount = 0;
79
80    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
81    if (drv == NULL) {
82        goto error;
83    }
84    script->mHal.drv = drv;
85
86    drv->mBccScript = bccCreateScript();
87    script->mHal.info.isThreadable = true;
88    drv->mScriptText = bitcode;
89    drv->mScriptTextLength = bitcodeSize;
90
91
92    drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
93                                            drv->mScriptTextLength);
94    if (!drv->ME->extract()) {
95      LOGE("bcinfo: failed to read script metadata");
96      goto error;
97    }
98
99    //LOGE("mBccScript %p", script->mBccScript);
100
101    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
102        LOGE("bcc: FAILS to register symbol callback");
103        goto error;
104    }
105
106    if (bccReadBC(drv->mBccScript,
107                  resName,
108                  (char const *)drv->mScriptText,
109                  drv->mScriptTextLength, 0) != 0) {
110        LOGE("bcc: FAILS to read bitcode");
111        goto error;
112    }
113
114    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
115        LOGE("bcc: FAILS to link bitcode");
116        goto error;
117    }
118
119    if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
120        LOGE("bcc: FAILS to prepare executable");
121        goto error;
122    }
123
124    free(cachePath);
125
126    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
127    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
128
129    exportFuncCount = drv->ME->getExportFuncCount();
130    if (exportFuncCount > 0) {
131        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
132                                                       sizeof(InvokeFunc_t));
133        bccGetExportFuncList(drv->mBccScript, exportFuncCount,
134                             (void **) drv->mInvokeFunctions);
135    } else {
136        drv->mInvokeFunctions = NULL;
137    }
138
139    exportVarCount = drv->ME->getExportVarCount();
140    if (exportVarCount > 0) {
141        drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
142        drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
143        bccGetExportVarList(drv->mBccScript, exportVarCount,
144                            (void **) drv->mFieldAddress);
145    } else {
146        drv->mFieldAddress = NULL;
147        drv->mFieldIsObject = NULL;
148    }
149
150    objectSlotCount = drv->ME->getObjectSlotCount();
151    if (objectSlotCount > 0) {
152        const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
153        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
154            drv->mFieldIsObject[objectSlotList[ct]] = true;
155        }
156    }
157
158    exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
159    rsAssert(exportForEachSignatureCount <= 1);
160    drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
161
162    // Copy info over to runtime
163    script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
164    script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
165    script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
166    script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
167    script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
168    script->mHal.info.root = drv->mRoot;
169
170    pthread_mutex_unlock(&rsdgInitMutex);
171    return true;
172
173error:
174
175    pthread_mutex_unlock(&rsdgInitMutex);
176    if (drv->ME) {
177        delete drv->ME;
178        drv->ME = NULL;
179    }
180    free(drv);
181    return false;
182
183}
184
185typedef struct {
186    Context *rsc;
187    Script *script;
188    uint32_t sig;
189    const Allocation * ain;
190    Allocation * aout;
191    const void * usr;
192    size_t usrLen;
193
194    uint32_t mSliceSize;
195    volatile int mSliceNum;
196
197    const uint8_t *ptrIn;
198    uint32_t eStrideIn;
199    uint8_t *ptrOut;
200    uint32_t eStrideOut;
201
202    uint32_t xStart;
203    uint32_t xEnd;
204    uint32_t yStart;
205    uint32_t yEnd;
206    uint32_t zStart;
207    uint32_t zEnd;
208    uint32_t arrayStart;
209    uint32_t arrayEnd;
210
211    uint32_t dimX;
212    uint32_t dimY;
213    uint32_t dimZ;
214    uint32_t dimArray;
215} MTLaunchStruct;
216typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
217
218static void wc_xy(void *usr, uint32_t idx) {
219    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
220    RsForEachStubParamStruct p;
221    memset(&p, 0, sizeof(p));
222    p.usr = mtls->usr;
223    p.usr_len = mtls->usrLen;
224    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
225    uint32_t sig = mtls->sig;
226
227    while (1) {
228        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
229        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
230        uint32_t yEnd = yStart + mtls->mSliceSize;
231        yEnd = rsMin(yEnd, mtls->yEnd);
232        if (yEnd <= yStart) {
233            return;
234        }
235
236        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
237        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
238        for (p.y = yStart; p.y < yEnd; p.y++) {
239            uint32_t offset = mtls->dimX * p.y;
240            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
241            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
242
243            for (p.x = mtls->xStart; p.x < mtls->xEnd; p.x++) {
244                p.in = xPtrIn;
245                p.out = xPtrOut;
246                dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
247                xPtrIn += mtls->eStrideIn;
248                xPtrOut += mtls->eStrideOut;
249            }
250        }
251    }
252}
253
254static void wc_x(void *usr, uint32_t idx) {
255    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
256    RsForEachStubParamStruct p;
257    memset(&p, 0, sizeof(p));
258    p.usr = mtls->usr;
259    p.usr_len = mtls->usrLen;
260    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
261    uint32_t sig = mtls->sig;
262
263    while (1) {
264        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
265        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
266        uint32_t xEnd = xStart + mtls->mSliceSize;
267        xEnd = rsMin(xEnd, mtls->xEnd);
268        if (xEnd <= xStart) {
269            return;
270        }
271
272        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
273        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
274        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
275        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
276        for (p.x = xStart; p.x < xEnd; p.x++) {
277            p.in = xPtrIn;
278            p.out = xPtrOut;
279            dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
280            xPtrIn += mtls->eStrideIn;
281            xPtrOut += mtls->eStrideOut;
282        }
283    }
284}
285
286void rsdScriptInvokeForEach(const Context *rsc,
287                            Script *s,
288                            uint32_t slot,
289                            const Allocation * ain,
290                            Allocation * aout,
291                            const void * usr,
292                            uint32_t usrLen,
293                            const RsScriptCall *sc) {
294
295    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
296
297    MTLaunchStruct mtls;
298    memset(&mtls, 0, sizeof(mtls));
299
300    DrvScript *drv = (DrvScript *)s->mHal.drv;
301    // We only support slot 0 (root) at this point in time.
302    rsAssert(slot == 0);
303    mtls.sig = drv->mExportForEachSignatureList[slot];
304    if (ain) {
305        mtls.dimX = ain->getType()->getDimX();
306        mtls.dimY = ain->getType()->getDimY();
307        mtls.dimZ = ain->getType()->getDimZ();
308        //mtls.dimArray = ain->getType()->getDimArray();
309    } else if (aout) {
310        mtls.dimX = aout->getType()->getDimX();
311        mtls.dimY = aout->getType()->getDimY();
312        mtls.dimZ = aout->getType()->getDimZ();
313        //mtls.dimArray = aout->getType()->getDimArray();
314    } else {
315        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
316        return;
317    }
318
319    if (!sc || (sc->xEnd == 0)) {
320        mtls.xEnd = mtls.dimX;
321    } else {
322        rsAssert(sc->xStart < mtls.dimX);
323        rsAssert(sc->xEnd <= mtls.dimX);
324        rsAssert(sc->xStart < sc->xEnd);
325        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
326        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
327        if (mtls.xStart >= mtls.xEnd) return;
328    }
329
330    if (!sc || (sc->yEnd == 0)) {
331        mtls.yEnd = mtls.dimY;
332    } else {
333        rsAssert(sc->yStart < mtls.dimY);
334        rsAssert(sc->yEnd <= mtls.dimY);
335        rsAssert(sc->yStart < sc->yEnd);
336        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
337        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
338        if (mtls.yStart >= mtls.yEnd) return;
339    }
340
341    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
342    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
343    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
344    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
345
346    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
347
348    Context *mrsc = (Context *)rsc;
349    Script * oldTLS = setTLS(s);
350
351    mtls.rsc = mrsc;
352    mtls.ain = ain;
353    mtls.aout = aout;
354    mtls.script = s;
355    mtls.usr = usr;
356    mtls.usrLen = usrLen;
357    mtls.mSliceSize = 10;
358    mtls.mSliceNum = 0;
359
360    mtls.ptrIn = NULL;
361    mtls.eStrideIn = 0;
362    if (ain) {
363        mtls.ptrIn = (const uint8_t *)ain->getPtr();
364        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
365    }
366
367    mtls.ptrOut = NULL;
368    mtls.eStrideOut = 0;
369    if (aout) {
370        mtls.ptrOut = (uint8_t *)aout->getPtr();
371        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
372    }
373
374    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
375        if (mtls.dimY > 1) {
376            rsdLaunchThreads(mrsc, wc_xy, &mtls);
377        } else {
378            rsdLaunchThreads(mrsc, wc_x, &mtls);
379        }
380
381        //LOGE("launch 1");
382    } else {
383        RsForEachStubParamStruct p;
384        memset(&p, 0, sizeof(p));
385        p.usr = mtls.usr;
386        p.usr_len = mtls.usrLen;
387        uint32_t sig = mtls.sig;
388
389        //LOGE("launch 3");
390        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
391            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
392                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
393                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
394                                      mtls.dimX * mtls.dimY * p.z +
395                                      mtls.dimX * p.y;
396                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
397                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
398
399                    for (p.x = mtls.xStart; p.x < mtls.xEnd; p.x++) {
400                        p.in = xPtrIn;
401                        p.out = xPtrOut;
402                        dc->mForEachLaunch[sig](&s->mHal.info.root, &p);
403                        xPtrIn += mtls.eStrideIn;
404                        xPtrOut += mtls.eStrideOut;
405                    }
406                }
407            }
408        }
409    }
410
411    setTLS(oldTLS);
412}
413
414
415int rsdScriptInvokeRoot(const Context *dc, Script *script) {
416    DrvScript *drv = (DrvScript *)script->mHal.drv;
417
418    Script * oldTLS = setTLS(script);
419    int ret = drv->mRoot();
420    setTLS(oldTLS);
421
422    return ret;
423}
424
425void rsdScriptInvokeInit(const Context *dc, Script *script) {
426    DrvScript *drv = (DrvScript *)script->mHal.drv;
427
428    if (drv->mInit) {
429        drv->mInit();
430    }
431}
432
433
434void rsdScriptInvokeFunction(const Context *dc, Script *script,
435                            uint32_t slot,
436                            const void *params,
437                            size_t paramLength) {
438    DrvScript *drv = (DrvScript *)script->mHal.drv;
439    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
440
441    Script * oldTLS = setTLS(script);
442    ((void (*)(const void *, uint32_t))
443        drv->mInvokeFunctions[slot])(params, paramLength);
444    setTLS(oldTLS);
445}
446
447void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
448                           uint32_t slot, void *data, size_t dataLength) {
449    DrvScript *drv = (DrvScript *)script->mHal.drv;
450    //rsAssert(!script->mFieldIsObject[slot]);
451    //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
452
453    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
454    if (!destPtr) {
455        //LOGV("Calling setVar on slot = %i which is null", slot);
456        return;
457    }
458
459    memcpy(destPtr, data, dataLength);
460}
461
462void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
463    DrvScript *drv = (DrvScript *)script->mHal.drv;
464    //rsAssert(!script->mFieldIsObject[slot]);
465    //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
466
467    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
468    if (!destPtr) {
469        //LOGV("Calling setVar on slot = %i which is null", slot);
470        return;
471    }
472
473    memcpy(destPtr, &data, sizeof(void *));
474}
475
476void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
477    DrvScript *drv = (DrvScript *)script->mHal.drv;
478    //rsAssert(script->mFieldIsObject[slot]);
479    //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
480
481    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
482    if (!destPtr) {
483        //LOGV("Calling setVar on slot = %i which is null", slot);
484        return;
485    }
486
487    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
488}
489
490void rsdScriptDestroy(const Context *dc, Script *script) {
491    DrvScript *drv = (DrvScript *)script->mHal.drv;
492
493    if (drv->mFieldAddress) {
494        size_t exportVarCount = drv->ME->getExportVarCount();
495        for (size_t ct = 0; ct < exportVarCount; ct++) {
496            if (drv->mFieldIsObject[ct]) {
497                // The field address can be NULL if the script-side has
498                // optimized the corresponding global variable away.
499                if (drv->mFieldAddress[ct]) {
500                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
501                }
502            }
503        }
504        free(drv->mFieldAddress);
505        drv->mFieldAddress = NULL;
506        free(drv->mFieldIsObject);
507        drv->mFieldIsObject = NULL;    }
508
509    if (drv->mInvokeFunctions) {
510        free(drv->mInvokeFunctions);
511        drv->mInvokeFunctions = NULL;
512    }
513
514    delete drv->ME;
515    drv->ME = NULL;
516
517    free(drv);
518    script->mHal.drv = NULL;
519
520}
521
522
523