rsdBcc.cpp revision 451cf2e16555c96eaaf9d8180e29dd97a5d6cbd7
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsdCore.h"
19#include "rsdBcc.h"
20#include "rsdRuntime.h"
21
22#include <bcinfo/MetadataExtractor.h>
23
24#include "rsContext.h"
25#include "rsScriptC.h"
26
27#include "utils/Timers.h"
28#include "utils/StopWatch.h"
29extern "C" {
30#include "libdex/ZipArchive.h"
31}
32
33
34using namespace android;
35using namespace android::renderscript;
36
37struct DrvScript {
38    int (*mRoot)();
39    void (*mInit)();
40
41    BCCScriptRef mBccScript;
42
43    bcinfo::MetadataExtractor *ME;
44
45    InvokeFunc_t *mInvokeFunctions;
46    void ** mFieldAddress;
47    bool * mFieldIsObject;
48
49    const uint8_t * mScriptText;
50    uint32_t mScriptTextLength;
51};
52
53
54static Script * setTLS(Script *sc) {
55    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
56    rsAssert(tls);
57    Script *old = tls->mScript;
58    tls->mScript = sc;
59    return old;
60}
61
62
63bool rsdScriptInit(const Context *rsc,
64                     ScriptC *script,
65                     char const *resName,
66                     char const *cacheDir,
67                     uint8_t const *bitcode,
68                     size_t bitcodeSize,
69                     uint32_t flags) {
70    //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
71
72    pthread_mutex_lock(&rsdgInitMutex);
73    char *cachePath = NULL;
74    size_t exportFuncCount = 0;
75    size_t exportVarCount = 0;
76    size_t objectSlotCount = 0;
77
78    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
79    if (drv == NULL) {
80        goto error;
81    }
82    script->mHal.drv = drv;
83
84    drv->mBccScript = bccCreateScript();
85    script->mHal.info.isThreadable = true;
86    drv->mScriptText = bitcode;
87    drv->mScriptTextLength = bitcodeSize;
88
89
90    drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
91                                            drv->mScriptTextLength);
92    if (!drv->ME->extract()) {
93      LOGE("bcinfo: failed to read script metadata");
94      goto error;
95    }
96
97    //LOGE("mBccScript %p", script->mBccScript);
98
99    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
100        LOGE("bcc: FAILS to register symbol callback");
101        goto error;
102    }
103
104    if (bccReadBC(drv->mBccScript,
105                  resName,
106                  (char const *)drv->mScriptText,
107                  drv->mScriptTextLength, 0) != 0) {
108        LOGE("bcc: FAILS to read bitcode");
109        goto error;
110    }
111
112    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
113        LOGE("bcc: FAILS to link bitcode");
114        goto error;
115    }
116
117    if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
118        LOGE("bcc: FAILS to prepare executable");
119        goto error;
120    }
121
122    free(cachePath);
123
124    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
125    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
126
127    exportFuncCount = drv->ME->getExportFuncCount();
128    if (exportFuncCount > 0) {
129        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
130                                                       sizeof(InvokeFunc_t));
131        bccGetExportFuncList(drv->mBccScript, exportFuncCount,
132                             (void **) drv->mInvokeFunctions);
133    } else {
134        drv->mInvokeFunctions = NULL;
135    }
136
137    exportVarCount = drv->ME->getExportVarCount();
138    if (exportVarCount > 0) {
139        drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
140        drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
141        bccGetExportVarList(drv->mBccScript, exportVarCount,
142                            (void **) drv->mFieldAddress);
143    } else {
144        drv->mFieldAddress = NULL;
145        drv->mFieldIsObject = NULL;
146    }
147
148    objectSlotCount = drv->ME->getObjectSlotCount();
149    if (objectSlotCount > 0) {
150        const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
151        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
152            drv->mFieldIsObject[objectSlotList[ct]] = true;
153        }
154    }
155
156    // Copy info over to runtime
157    script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
158    script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
159    script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
160    script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
161    script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
162    script->mHal.info.root = drv->mRoot;
163
164    pthread_mutex_unlock(&rsdgInitMutex);
165    return true;
166
167error:
168
169    pthread_mutex_unlock(&rsdgInitMutex);
170    if (drv->ME) {
171        delete drv->ME;
172        drv->ME = NULL;
173    }
174    free(drv);
175    return false;
176
177}
178
179typedef struct {
180    Context *rsc;
181    Script *script;
182    const Allocation * ain;
183    Allocation * aout;
184    const void * usr;
185    size_t usrLen;
186
187    uint32_t mSliceSize;
188    volatile int mSliceNum;
189
190    const uint8_t *ptrIn;
191    uint32_t eStrideIn;
192    uint8_t *ptrOut;
193    uint32_t eStrideOut;
194
195    uint32_t xStart;
196    uint32_t xEnd;
197    uint32_t yStart;
198    uint32_t yEnd;
199    uint32_t zStart;
200    uint32_t zEnd;
201    uint32_t arrayStart;
202    uint32_t arrayEnd;
203
204    uint32_t dimX;
205    uint32_t dimY;
206    uint32_t dimZ;
207    uint32_t dimArray;
208} MTLaunchStruct;
209typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
210
211static void wc_xy(void *usr, uint32_t idx) {
212    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
213    RsForEachStubParamStruct p;
214    memset(&p, 0, sizeof(p));
215    p.usr = mtls->usr;
216    p.usr_len = mtls->usrLen;
217
218    while (1) {
219        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
220        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
221        uint32_t yEnd = yStart + mtls->mSliceSize;
222        yEnd = rsMin(yEnd, mtls->yEnd);
223        if (yEnd <= yStart) {
224            return;
225        }
226
227        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
228        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
229        for (p.y = yStart; p.y < yEnd; p.y++) {
230            uint32_t offset = mtls->dimX * p.y;
231            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
232            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
233
234            for (p.x = mtls->xStart; p.x < mtls->xEnd; p.x++) {
235                p.in = xPtrIn;
236                p.out = xPtrOut;
237                ((rs_t)mtls->script->mHal.info.root) (p.in, p.out, p.usr, p.x, p.y, 0, 0);
238                xPtrIn += mtls->eStrideIn;
239                xPtrOut += mtls->eStrideOut;
240            }
241        }
242    }
243}
244
245static void wc_x(void *usr, uint32_t idx) {
246    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
247    RsForEachStubParamStruct p;
248    memset(&p, 0, sizeof(p));
249    p.usr = mtls->usr;
250    p.usr_len = mtls->usrLen;
251
252    while (1) {
253        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
254        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
255        uint32_t xEnd = xStart + mtls->mSliceSize;
256        xEnd = rsMin(xEnd, mtls->xEnd);
257        if (xEnd <= xStart) {
258            return;
259        }
260
261        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
262        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
263        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
264        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
265        for (p.x = xStart; p.x < xEnd; p.x++) {
266            p.in = xPtrIn;
267            p.out = xPtrOut;
268            ((rs_t)mtls->script->mHal.info.root) (p.in, p.out, p.usr, p.x, 0, 0, 0);
269            xPtrIn += mtls->eStrideIn;
270            xPtrOut += mtls->eStrideOut;
271        }
272    }
273}
274
275void rsdScriptInvokeForEach(const Context *rsc,
276                            Script *s,
277                            uint32_t slot,
278                            const Allocation * ain,
279                            Allocation * aout,
280                            const void * usr,
281                            uint32_t usrLen,
282                            const RsScriptCall *sc) {
283
284    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
285
286    MTLaunchStruct mtls;
287    memset(&mtls, 0, sizeof(mtls));
288
289    if (ain) {
290        mtls.dimX = ain->getType()->getDimX();
291        mtls.dimY = ain->getType()->getDimY();
292        mtls.dimZ = ain->getType()->getDimZ();
293        //mtls.dimArray = ain->getType()->getDimArray();
294    } else if (aout) {
295        mtls.dimX = aout->getType()->getDimX();
296        mtls.dimY = aout->getType()->getDimY();
297        mtls.dimZ = aout->getType()->getDimZ();
298        //mtls.dimArray = aout->getType()->getDimArray();
299    } else {
300        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
301        return;
302    }
303
304    if (!sc || (sc->xEnd == 0)) {
305        mtls.xEnd = mtls.dimX;
306    } else {
307        rsAssert(sc->xStart < mtls.dimX);
308        rsAssert(sc->xEnd <= mtls.dimX);
309        rsAssert(sc->xStart < sc->xEnd);
310        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
311        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
312        if (mtls.xStart >= mtls.xEnd) return;
313    }
314
315    if (!sc || (sc->yEnd == 0)) {
316        mtls.yEnd = mtls.dimY;
317    } else {
318        rsAssert(sc->yStart < mtls.dimY);
319        rsAssert(sc->yEnd <= mtls.dimY);
320        rsAssert(sc->yStart < sc->yEnd);
321        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
322        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
323        if (mtls.yStart >= mtls.yEnd) return;
324    }
325
326    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
327    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
328    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
329    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
330
331    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
332
333    Context *mrsc = (Context *)rsc;
334    Script * oldTLS = setTLS(s);
335
336    mtls.rsc = mrsc;
337    mtls.ain = ain;
338    mtls.aout = aout;
339    mtls.script = s;
340    mtls.usr = usr;
341    mtls.usrLen = usrLen;
342    mtls.mSliceSize = 10;
343    mtls.mSliceNum = 0;
344
345    mtls.ptrIn = NULL;
346    mtls.eStrideIn = 0;
347    if (ain) {
348        mtls.ptrIn = (const uint8_t *)ain->getPtr();
349        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
350    }
351
352    mtls.ptrOut = NULL;
353    mtls.eStrideOut = 0;
354    if (aout) {
355        mtls.ptrOut = (uint8_t *)aout->getPtr();
356        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
357    }
358
359    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
360        if (mtls.dimY > 1) {
361            rsdLaunchThreads(mrsc, wc_xy, &mtls);
362        } else {
363            rsdLaunchThreads(mrsc, wc_x, &mtls);
364        }
365
366        //LOGE("launch 1");
367    } else {
368        RsForEachStubParamStruct p;
369        memset(&p, 0, sizeof(p));
370        p.usr = mtls.usr;
371        p.usr_len = mtls.usrLen;
372
373        //LOGE("launch 3");
374        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
375            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
376                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
377                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
378                                      mtls.dimX * mtls.dimY * p.z +
379                                      mtls.dimX * p.y;
380                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
381                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
382
383                    for (p.x = mtls.xStart; p.x < mtls.xEnd; p.x++) {
384                        p.in = xPtrIn;
385                        p.out = xPtrOut;
386                        ((rs_t)s->mHal.info.root) (p.in, p.out, p.usr, p.x, p.y, p.z, p.ar[0]);
387                        xPtrIn += mtls.eStrideIn;
388                        xPtrOut += mtls.eStrideOut;
389                    }
390                }
391            }
392        }
393    }
394
395    setTLS(oldTLS);
396}
397
398
399int rsdScriptInvokeRoot(const Context *dc, Script *script) {
400    DrvScript *drv = (DrvScript *)script->mHal.drv;
401
402    Script * oldTLS = setTLS(script);
403    int ret = drv->mRoot();
404    setTLS(oldTLS);
405
406    return ret;
407}
408
409void rsdScriptInvokeInit(const Context *dc, Script *script) {
410    DrvScript *drv = (DrvScript *)script->mHal.drv;
411
412    if (drv->mInit) {
413        drv->mInit();
414    }
415}
416
417
418void rsdScriptInvokeFunction(const Context *dc, Script *script,
419                            uint32_t slot,
420                            const void *params,
421                            size_t paramLength) {
422    DrvScript *drv = (DrvScript *)script->mHal.drv;
423    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
424
425    Script * oldTLS = setTLS(script);
426    ((void (*)(const void *, uint32_t))
427        drv->mInvokeFunctions[slot])(params, paramLength);
428    setTLS(oldTLS);
429}
430
431void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
432                           uint32_t slot, void *data, size_t dataLength) {
433    DrvScript *drv = (DrvScript *)script->mHal.drv;
434    //rsAssert(!script->mFieldIsObject[slot]);
435    //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
436
437    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
438    if (!destPtr) {
439        //LOGV("Calling setVar on slot = %i which is null", slot);
440        return;
441    }
442
443    memcpy(destPtr, data, dataLength);
444}
445
446void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
447    DrvScript *drv = (DrvScript *)script->mHal.drv;
448    //rsAssert(!script->mFieldIsObject[slot]);
449    //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
450
451    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
452    if (!destPtr) {
453        //LOGV("Calling setVar on slot = %i which is null", slot);
454        return;
455    }
456
457    memcpy(destPtr, &data, sizeof(void *));
458}
459
460void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
461    DrvScript *drv = (DrvScript *)script->mHal.drv;
462    //rsAssert(script->mFieldIsObject[slot]);
463    //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
464
465    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
466    if (!destPtr) {
467        //LOGV("Calling setVar on slot = %i which is null", slot);
468        return;
469    }
470
471    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
472}
473
474void rsdScriptDestroy(const Context *dc, Script *script) {
475    DrvScript *drv = (DrvScript *)script->mHal.drv;
476
477    if (drv->mFieldAddress) {
478        size_t exportVarCount = drv->ME->getExportVarCount();
479        for (size_t ct = 0; ct < exportVarCount; ct++) {
480            if (drv->mFieldIsObject[ct]) {
481                // The field address can be NULL if the script-side has
482                // optimized the corresponding global variable away.
483                if (drv->mFieldAddress[ct]) {
484                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
485                }
486            }
487        }
488        free(drv->mFieldAddress);
489        drv->mFieldAddress = NULL;
490        free(drv->mFieldIsObject);
491        drv->mFieldIsObject = NULL;    }
492
493    if (drv->mInvokeFunctions) {
494        free(drv->mInvokeFunctions);
495        drv->mInvokeFunctions = NULL;
496    }
497
498    delete drv->ME;
499    drv->ME = NULL;
500
501    free(drv);
502    script->mHal.drv = NULL;
503
504}
505
506
507