rsdBcc.cpp revision 4ff21865d7e6c24adf8e4159783c848530ecc4a2
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsdCore.h"
19#include "rsdBcc.h"
20#include "rsdRuntime.h"
21
22#include <bcinfo/MetadataExtractor.h>
23
24#include "rsContext.h"
25#include "rsScriptC.h"
26
27#include "utils/Timers.h"
28#include "utils/StopWatch.h"
29extern "C" {
30#include "libdex/ZipArchive.h"
31}
32
33
34using namespace android;
35using namespace android::renderscript;
36
37struct DrvScript {
38    int (*mRoot)();
39    void (*mInit)();
40    void (*mFreeChildren)();
41
42    BCCScriptRef mBccScript;
43
44    bcinfo::MetadataExtractor *ME;
45
46    InvokeFunc_t *mInvokeFunctions;
47    void ** mFieldAddress;
48    bool * mFieldIsObject;
49    const uint32_t *mExportForEachSignatureList;
50
51    const uint8_t * mScriptText;
52    uint32_t mScriptTextLength;
53};
54
55
56static Script * setTLS(Script *sc) {
57    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
58    rsAssert(tls);
59    Script *old = tls->mScript;
60    tls->mScript = sc;
61    return old;
62}
63
64
65bool rsdScriptInit(const Context *rsc,
66                     ScriptC *script,
67                     char const *resName,
68                     char const *cacheDir,
69                     uint8_t const *bitcode,
70                     size_t bitcodeSize,
71                     uint32_t flags) {
72    //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
73
74    pthread_mutex_lock(&rsdgInitMutex);
75    char *cachePath = NULL;
76    size_t exportFuncCount = 0;
77    size_t exportVarCount = 0;
78    size_t objectSlotCount = 0;
79    size_t exportForEachSignatureCount = 0;
80
81    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
82    if (drv == NULL) {
83        goto error;
84    }
85    script->mHal.drv = drv;
86
87    drv->mBccScript = bccCreateScript();
88    script->mHal.info.isThreadable = true;
89    drv->mScriptText = bitcode;
90    drv->mScriptTextLength = bitcodeSize;
91
92
93    drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
94                                            drv->mScriptTextLength);
95    if (!drv->ME->extract()) {
96      LOGE("bcinfo: failed to read script metadata");
97      goto error;
98    }
99
100    //LOGE("mBccScript %p", script->mBccScript);
101
102    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
103        LOGE("bcc: FAILS to register symbol callback");
104        goto error;
105    }
106
107    if (bccReadBC(drv->mBccScript,
108                  resName,
109                  (char const *)drv->mScriptText,
110                  drv->mScriptTextLength, 0) != 0) {
111        LOGE("bcc: FAILS to read bitcode");
112        goto error;
113    }
114
115    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
116        LOGE("bcc: FAILS to link bitcode");
117        goto error;
118    }
119
120    if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
121        LOGE("bcc: FAILS to prepare executable");
122        goto error;
123    }
124
125    free(cachePath);
126
127    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
128    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
129    drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor"));
130
131    exportFuncCount = drv->ME->getExportFuncCount();
132    if (exportFuncCount > 0) {
133        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
134                                                       sizeof(InvokeFunc_t));
135        bccGetExportFuncList(drv->mBccScript, exportFuncCount,
136                             (void **) drv->mInvokeFunctions);
137    } else {
138        drv->mInvokeFunctions = NULL;
139    }
140
141    exportVarCount = drv->ME->getExportVarCount();
142    if (exportVarCount > 0) {
143        drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
144        drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
145        bccGetExportVarList(drv->mBccScript, exportVarCount,
146                            (void **) drv->mFieldAddress);
147    } else {
148        drv->mFieldAddress = NULL;
149        drv->mFieldIsObject = NULL;
150    }
151
152    objectSlotCount = drv->ME->getObjectSlotCount();
153    if (objectSlotCount > 0) {
154        const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
155        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
156            drv->mFieldIsObject[objectSlotList[ct]] = true;
157        }
158    }
159
160    exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
161    rsAssert(exportForEachSignatureCount <= 1);
162    drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
163
164    // Copy info over to runtime
165    script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
166    script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
167    script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
168    script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
169    script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
170    script->mHal.info.root = drv->mRoot;
171
172    pthread_mutex_unlock(&rsdgInitMutex);
173    return true;
174
175error:
176
177    pthread_mutex_unlock(&rsdgInitMutex);
178    if (drv->ME) {
179        delete drv->ME;
180        drv->ME = NULL;
181    }
182    free(drv);
183    return false;
184
185}
186
187typedef struct {
188    Context *rsc;
189    Script *script;
190    uint32_t sig;
191    const Allocation * ain;
192    Allocation * aout;
193    const void * usr;
194    size_t usrLen;
195
196    uint32_t mSliceSize;
197    volatile int mSliceNum;
198
199    const uint8_t *ptrIn;
200    uint32_t eStrideIn;
201    uint8_t *ptrOut;
202    uint32_t eStrideOut;
203
204    uint32_t xStart;
205    uint32_t xEnd;
206    uint32_t yStart;
207    uint32_t yEnd;
208    uint32_t zStart;
209    uint32_t zEnd;
210    uint32_t arrayStart;
211    uint32_t arrayEnd;
212
213    uint32_t dimX;
214    uint32_t dimY;
215    uint32_t dimZ;
216    uint32_t dimArray;
217} MTLaunchStruct;
218typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
219
220static void wc_xy(void *usr, uint32_t idx) {
221    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
222    RsForEachStubParamStruct p;
223    memset(&p, 0, sizeof(p));
224    p.usr = mtls->usr;
225    p.usr_len = mtls->usrLen;
226    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
227    uint32_t sig = mtls->sig;
228
229    while (1) {
230        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
231        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
232        uint32_t yEnd = yStart + mtls->mSliceSize;
233        yEnd = rsMin(yEnd, mtls->yEnd);
234        if (yEnd <= yStart) {
235            return;
236        }
237
238        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
239        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
240        for (p.y = yStart; p.y < yEnd; p.y++) {
241            uint32_t offset = mtls->dimX * p.y;
242            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
243            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
244
245            for (p.x = mtls->xStart; p.x < mtls->xEnd; p.x++) {
246                p.in = xPtrIn;
247                p.out = xPtrOut;
248                dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
249                xPtrIn += mtls->eStrideIn;
250                xPtrOut += mtls->eStrideOut;
251            }
252        }
253    }
254}
255
256static void wc_x(void *usr, uint32_t idx) {
257    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
258    RsForEachStubParamStruct p;
259    memset(&p, 0, sizeof(p));
260    p.usr = mtls->usr;
261    p.usr_len = mtls->usrLen;
262    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
263    uint32_t sig = mtls->sig;
264
265    while (1) {
266        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
267        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
268        uint32_t xEnd = xStart + mtls->mSliceSize;
269        xEnd = rsMin(xEnd, mtls->xEnd);
270        if (xEnd <= xStart) {
271            return;
272        }
273
274        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
275        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
276        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
277        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
278        for (p.x = xStart; p.x < xEnd; p.x++) {
279            p.in = xPtrIn;
280            p.out = xPtrOut;
281            dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
282            xPtrIn += mtls->eStrideIn;
283            xPtrOut += mtls->eStrideOut;
284        }
285    }
286}
287
288void rsdScriptInvokeForEach(const Context *rsc,
289                            Script *s,
290                            uint32_t slot,
291                            const Allocation * ain,
292                            Allocation * aout,
293                            const void * usr,
294                            uint32_t usrLen,
295                            const RsScriptCall *sc) {
296
297    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
298
299    MTLaunchStruct mtls;
300    memset(&mtls, 0, sizeof(mtls));
301
302    DrvScript *drv = (DrvScript *)s->mHal.drv;
303    // We only support slot 0 (root) at this point in time.
304    rsAssert(slot == 0);
305    mtls.sig = 0x1f;  // temp fix for old apps, full table in slang_rs_export_foreach.cpp
306    if (drv->mExportForEachSignatureList) {
307        mtls.sig = drv->mExportForEachSignatureList[slot];
308    }
309    if (ain) {
310        mtls.dimX = ain->getType()->getDimX();
311        mtls.dimY = ain->getType()->getDimY();
312        mtls.dimZ = ain->getType()->getDimZ();
313        //mtls.dimArray = ain->getType()->getDimArray();
314    } else if (aout) {
315        mtls.dimX = aout->getType()->getDimX();
316        mtls.dimY = aout->getType()->getDimY();
317        mtls.dimZ = aout->getType()->getDimZ();
318        //mtls.dimArray = aout->getType()->getDimArray();
319    } else {
320        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
321        return;
322    }
323
324    if (!sc || (sc->xEnd == 0)) {
325        mtls.xEnd = mtls.dimX;
326    } else {
327        rsAssert(sc->xStart < mtls.dimX);
328        rsAssert(sc->xEnd <= mtls.dimX);
329        rsAssert(sc->xStart < sc->xEnd);
330        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
331        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
332        if (mtls.xStart >= mtls.xEnd) return;
333    }
334
335    if (!sc || (sc->yEnd == 0)) {
336        mtls.yEnd = mtls.dimY;
337    } else {
338        rsAssert(sc->yStart < mtls.dimY);
339        rsAssert(sc->yEnd <= mtls.dimY);
340        rsAssert(sc->yStart < sc->yEnd);
341        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
342        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
343        if (mtls.yStart >= mtls.yEnd) return;
344    }
345
346    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
347    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
348    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
349    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
350
351    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
352
353    Context *mrsc = (Context *)rsc;
354    Script * oldTLS = setTLS(s);
355
356    mtls.rsc = mrsc;
357    mtls.ain = ain;
358    mtls.aout = aout;
359    mtls.script = s;
360    mtls.usr = usr;
361    mtls.usrLen = usrLen;
362    mtls.mSliceSize = 10;
363    mtls.mSliceNum = 0;
364
365    mtls.ptrIn = NULL;
366    mtls.eStrideIn = 0;
367    if (ain) {
368        mtls.ptrIn = (const uint8_t *)ain->getPtr();
369        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
370    }
371
372    mtls.ptrOut = NULL;
373    mtls.eStrideOut = 0;
374    if (aout) {
375        mtls.ptrOut = (uint8_t *)aout->getPtr();
376        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
377    }
378
379    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
380        if (mtls.dimY > 1) {
381            rsdLaunchThreads(mrsc, wc_xy, &mtls);
382        } else {
383            rsdLaunchThreads(mrsc, wc_x, &mtls);
384        }
385
386        //LOGE("launch 1");
387    } else {
388        RsForEachStubParamStruct p;
389        memset(&p, 0, sizeof(p));
390        p.usr = mtls.usr;
391        p.usr_len = mtls.usrLen;
392        uint32_t sig = mtls.sig;
393
394        //LOGE("launch 3");
395        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
396            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
397                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
398                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
399                                      mtls.dimX * mtls.dimY * p.z +
400                                      mtls.dimX * p.y;
401                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
402                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
403
404                    for (p.x = mtls.xStart; p.x < mtls.xEnd; p.x++) {
405                        p.in = xPtrIn;
406                        p.out = xPtrOut;
407                        dc->mForEachLaunch[sig](&s->mHal.info.root, &p);
408                        xPtrIn += mtls.eStrideIn;
409                        xPtrOut += mtls.eStrideOut;
410                    }
411                }
412            }
413        }
414    }
415
416    setTLS(oldTLS);
417}
418
419
420int rsdScriptInvokeRoot(const Context *dc, Script *script) {
421    DrvScript *drv = (DrvScript *)script->mHal.drv;
422
423    Script * oldTLS = setTLS(script);
424    int ret = drv->mRoot();
425    setTLS(oldTLS);
426
427    return ret;
428}
429
430void rsdScriptInvokeInit(const Context *dc, Script *script) {
431    DrvScript *drv = (DrvScript *)script->mHal.drv;
432
433    if (drv->mInit) {
434        drv->mInit();
435    }
436}
437
438void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
439    DrvScript *drv = (DrvScript *)script->mHal.drv;
440
441    if (drv->mFreeChildren) {
442        drv->mFreeChildren();
443    }
444}
445
446void rsdScriptInvokeFunction(const Context *dc, Script *script,
447                            uint32_t slot,
448                            const void *params,
449                            size_t paramLength) {
450    DrvScript *drv = (DrvScript *)script->mHal.drv;
451    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
452
453    Script * oldTLS = setTLS(script);
454    ((void (*)(const void *, uint32_t))
455        drv->mInvokeFunctions[slot])(params, paramLength);
456    setTLS(oldTLS);
457}
458
459void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
460                           uint32_t slot, void *data, size_t dataLength) {
461    DrvScript *drv = (DrvScript *)script->mHal.drv;
462    //rsAssert(!script->mFieldIsObject[slot]);
463    //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
464
465    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
466    if (!destPtr) {
467        //LOGV("Calling setVar on slot = %i which is null", slot);
468        return;
469    }
470
471    memcpy(destPtr, data, dataLength);
472}
473
474void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
475    DrvScript *drv = (DrvScript *)script->mHal.drv;
476    //rsAssert(!script->mFieldIsObject[slot]);
477    //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
478
479    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
480    if (!destPtr) {
481        //LOGV("Calling setVar on slot = %i which is null", slot);
482        return;
483    }
484
485    memcpy(destPtr, &data, sizeof(void *));
486}
487
488void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
489    DrvScript *drv = (DrvScript *)script->mHal.drv;
490    //rsAssert(script->mFieldIsObject[slot]);
491    //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
492
493    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
494    if (!destPtr) {
495        //LOGV("Calling setVar on slot = %i which is null", slot);
496        return;
497    }
498
499    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
500}
501
502void rsdScriptDestroy(const Context *dc, Script *script) {
503    DrvScript *drv = (DrvScript *)script->mHal.drv;
504
505    if (drv->mFieldAddress) {
506        size_t exportVarCount = drv->ME->getExportVarCount();
507        for (size_t ct = 0; ct < exportVarCount; ct++) {
508            if (drv->mFieldIsObject[ct]) {
509                // The field address can be NULL if the script-side has
510                // optimized the corresponding global variable away.
511                if (drv->mFieldAddress[ct]) {
512                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
513                }
514            }
515        }
516        free(drv->mFieldAddress);
517        drv->mFieldAddress = NULL;
518        free(drv->mFieldIsObject);
519        drv->mFieldIsObject = NULL;    }
520
521    if (drv->mInvokeFunctions) {
522        free(drv->mInvokeFunctions);
523        drv->mInvokeFunctions = NULL;
524    }
525
526    delete drv->ME;
527    drv->ME = NULL;
528
529    free(drv);
530    script->mHal.drv = NULL;
531
532}
533
534
535