rsdBcc.cpp revision abfaab8983d9727d3d5ca34e2a0ff3e80ba2eb24
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsdCore.h"
19#include "rsdBcc.h"
20#include "rsdRuntime.h"
21
22#include "rsContext.h"
23#include "rsScriptC.h"
24
25#include "utils/Timers.h"
26#include "utils/StopWatch.h"
27extern "C" {
28#include "libdex/ZipArchive.h"
29}
30
31
32using namespace android;
33using namespace android::renderscript;
34
35struct DrvScript {
36    int (*mRoot)();
37    void (*mInit)();
38
39    BCCScriptRef mBccScript;
40
41    uint32_t mInvokeFunctionCount;
42    InvokeFunc_t *mInvokeFunctions;
43    uint32_t mFieldCount;
44    void ** mFieldAddress;
45    bool * mFieldIsObject;
46
47    const uint8_t * mScriptText;
48    uint32_t mScriptTextLength;
49
50    //uint32_t * mObjectSlots;
51    //uint32_t mObjectSlotCount;
52
53    uint32_t mPragmaCount;
54    const char ** mPragmaKeys;
55    const char ** mPragmaValues;
56
57};
58
59
60static Script * setTLS(Script *sc) {
61    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
62    rsAssert(tls);
63    Script *old = tls->mScript;
64    tls->mScript = sc;
65    return old;
66}
67
68
69bool rsdScriptInit(const Context *rsc,
70                     ScriptC *script,
71                     char const *resName,
72                     char const *cacheDir,
73                     uint8_t const *bitcode,
74                     size_t bitcodeSize,
75                     uint32_t flags) {
76    //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
77
78    pthread_mutex_lock(&rsdgInitMutex);
79    char *cachePath = NULL;
80    uint32_t objectSlotCount = 0;
81
82    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
83    if (drv == NULL) {
84        goto error;
85    }
86    script->mHal.drv = drv;
87
88    drv->mBccScript = bccCreateScript();
89    script->mHal.info.isThreadable = true;
90    drv->mScriptText = bitcode;
91    drv->mScriptTextLength = bitcodeSize;
92
93    //LOGE("mBccScript %p", script->mBccScript);
94
95    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
96        LOGE("bcc: FAILS to register symbol callback");
97        goto error;
98    }
99
100    if (bccReadBC(drv->mBccScript,
101                  resName,
102                  (char const *)drv->mScriptText,
103                  drv->mScriptTextLength, 0) != 0) {
104        LOGE("bcc: FAILS to read bitcode");
105        goto error;
106    }
107
108    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
109        LOGE("bcc: FAILS to link bitcode");
110        goto error;
111    }
112
113    if (bccPrepareExecutableEx(drv->mBccScript, cacheDir, resName, 0) != 0) {
114        LOGE("bcc: FAILS to prepare executable");
115        goto error;
116    }
117
118    free(cachePath);
119
120    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
121    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
122
123    drv->mInvokeFunctionCount = bccGetExportFuncCount(drv->mBccScript);
124    if (drv->mInvokeFunctionCount <= 0)
125        drv->mInvokeFunctions = NULL;
126    else {
127        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(drv->mInvokeFunctionCount, sizeof(InvokeFunc_t));
128        bccGetExportFuncList(drv->mBccScript, drv->mInvokeFunctionCount, (void **) drv->mInvokeFunctions);
129    }
130
131    drv->mFieldCount = bccGetExportVarCount(drv->mBccScript);
132    if (drv->mFieldCount <= 0) {
133        drv->mFieldAddress = NULL;
134        drv->mFieldIsObject = NULL;
135    } else {
136        drv->mFieldAddress = (void **) calloc(drv->mFieldCount, sizeof(void *));
137        drv->mFieldIsObject = (bool *) calloc(drv->mFieldCount, sizeof(bool));
138        bccGetExportVarList(drv->mBccScript, drv->mFieldCount, (void **) drv->mFieldAddress);
139    }
140
141    objectSlotCount = bccGetObjectSlotCount(drv->mBccScript);
142    if (objectSlotCount) {
143        uint32_t * slots = new uint32_t[objectSlotCount];
144        bccGetObjectSlotList(drv->mBccScript, objectSlotCount, slots);
145        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
146            drv->mFieldIsObject[slots[ct]] = true;
147        }
148        delete [] slots;
149    }
150
151    uint32_t mPragmaCount;
152    const char ** mPragmaKeys;
153    const char ** mPragmaValues;
154
155    drv->mPragmaCount = bccGetPragmaCount(drv->mBccScript);
156    if (drv->mPragmaCount <= 0) {
157        drv->mPragmaKeys = NULL;
158        drv->mPragmaValues = NULL;
159    } else {
160        drv->mPragmaKeys = (const char **) calloc(drv->mPragmaCount, sizeof(const char *));
161        drv->mPragmaValues = (const char **) calloc(drv->mPragmaCount, sizeof(const char *));
162        bccGetPragmaList(drv->mBccScript, drv->mPragmaCount, drv->mPragmaKeys, drv->mPragmaValues);
163    }
164
165
166
167    // Copy info over to runtime
168    script->mHal.info.exportedFunctionCount = drv->mInvokeFunctionCount;
169    script->mHal.info.exportedVariableCount = drv->mFieldCount;
170    script->mHal.info.exportedPragmaCount = drv->mPragmaCount;
171    script->mHal.info.exportedPragmaKeyList = drv->mPragmaKeys;
172    script->mHal.info.exportedPragmaValueList = drv->mPragmaValues;
173    script->mHal.info.root = drv->mRoot;
174
175
176    pthread_mutex_unlock(&rsdgInitMutex);
177    return true;
178
179error:
180
181    pthread_mutex_unlock(&rsdgInitMutex);
182    free(drv);
183    return false;
184
185}
186
187typedef struct {
188    Context *rsc;
189    Script *script;
190    const Allocation * ain;
191    Allocation * aout;
192    const void * usr;
193
194    uint32_t mSliceSize;
195    volatile int mSliceNum;
196
197    const uint8_t *ptrIn;
198    uint32_t eStrideIn;
199    uint8_t *ptrOut;
200    uint32_t eStrideOut;
201
202    uint32_t xStart;
203    uint32_t xEnd;
204    uint32_t yStart;
205    uint32_t yEnd;
206    uint32_t zStart;
207    uint32_t zEnd;
208    uint32_t arrayStart;
209    uint32_t arrayEnd;
210
211    uint32_t dimX;
212    uint32_t dimY;
213    uint32_t dimZ;
214    uint32_t dimArray;
215} MTLaunchStruct;
216typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
217
218static void wc_xy(void *usr, uint32_t idx) {
219    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
220
221    while (1) {
222        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
223        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
224        uint32_t yEnd = yStart + mtls->mSliceSize;
225        yEnd = rsMin(yEnd, mtls->yEnd);
226        if (yEnd <= yStart) {
227            return;
228        }
229
230        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
231        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
232        for (uint32_t y = yStart; y < yEnd; y++) {
233            uint32_t offset = mtls->dimX * y;
234            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
235            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
236
237            for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
238                ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
239                xPtrIn += mtls->eStrideIn;
240                xPtrOut += mtls->eStrideOut;
241            }
242        }
243    }
244}
245
246static void wc_x(void *usr, uint32_t idx) {
247    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
248
249    while (1) {
250        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
251        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
252        uint32_t xEnd = xStart + mtls->mSliceSize;
253        xEnd = rsMin(xEnd, mtls->xEnd);
254        if (xEnd <= xStart) {
255            return;
256        }
257
258        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
259        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
260        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
261        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
262        for (uint32_t x = xStart; x < xEnd; x++) {
263            ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
264            xPtrIn += mtls->eStrideIn;
265            xPtrOut += mtls->eStrideOut;
266        }
267    }
268}
269
270void rsdScriptInvokeForEach(const Context *rsc,
271                            Script *s,
272                            const Allocation * ain,
273                            Allocation * aout,
274                            const void * usr,
275                            uint32_t usrLen,
276                            const RsScriptCall *sc) {
277
278    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
279
280    MTLaunchStruct mtls;
281    memset(&mtls, 0, sizeof(mtls));
282
283    if (ain) {
284        mtls.dimX = ain->getType()->getDimX();
285        mtls.dimY = ain->getType()->getDimY();
286        mtls.dimZ = ain->getType()->getDimZ();
287        //mtls.dimArray = ain->getType()->getDimArray();
288    } else if (aout) {
289        mtls.dimX = aout->getType()->getDimX();
290        mtls.dimY = aout->getType()->getDimY();
291        mtls.dimZ = aout->getType()->getDimZ();
292        //mtls.dimArray = aout->getType()->getDimArray();
293    } else {
294        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
295        return;
296    }
297
298    if (!sc || (sc->xEnd == 0)) {
299        mtls.xEnd = mtls.dimX;
300    } else {
301        rsAssert(sc->xStart < mtls.dimX);
302        rsAssert(sc->xEnd <= mtls.dimX);
303        rsAssert(sc->xStart < sc->xEnd);
304        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
305        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
306        if (mtls.xStart >= mtls.xEnd) return;
307    }
308
309    if (!sc || (sc->yEnd == 0)) {
310        mtls.yEnd = mtls.dimY;
311    } else {
312        rsAssert(sc->yStart < mtls.dimY);
313        rsAssert(sc->yEnd <= mtls.dimY);
314        rsAssert(sc->yStart < sc->yEnd);
315        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
316        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
317        if (mtls.yStart >= mtls.yEnd) return;
318    }
319
320    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
321    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
322    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
323    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
324
325    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
326
327    Context *mrsc = (Context *)rsc;
328    Script * oldTLS = setTLS(s);
329
330    mtls.rsc = mrsc;
331    mtls.ain = ain;
332    mtls.aout = aout;
333    mtls.script = s;
334    mtls.usr = usr;
335    mtls.mSliceSize = 10;
336    mtls.mSliceNum = 0;
337
338    mtls.ptrIn = NULL;
339    mtls.eStrideIn = 0;
340    if (ain) {
341        mtls.ptrIn = (const uint8_t *)ain->getPtr();
342        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
343    }
344
345    mtls.ptrOut = NULL;
346    mtls.eStrideOut = 0;
347    if (aout) {
348        mtls.ptrOut = (uint8_t *)aout->getPtr();
349        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
350    }
351
352    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
353        if (mtls.dimY > 1) {
354            rsdLaunchThreads(mrsc, wc_xy, &mtls);
355        } else {
356            rsdLaunchThreads(mrsc, wc_x, &mtls);
357        }
358
359        //LOGE("launch 1");
360    } else {
361        //LOGE("launch 3");
362        for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
363            for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
364                for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
365                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
366                                      mtls.dimX * mtls.dimY * z +
367                                      mtls.dimX * y;
368                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
369                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
370
371                    for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
372                        ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar);
373                        xPtrIn += mtls.eStrideIn;
374                        xPtrOut += mtls.eStrideOut;
375                    }
376                }
377            }
378        }
379    }
380
381    setTLS(oldTLS);
382}
383
384
385int rsdScriptInvokeRoot(const Context *dc, Script *script) {
386    DrvScript *drv = (DrvScript *)script->mHal.drv;
387
388    Script * oldTLS = setTLS(script);
389    int ret = drv->mRoot();
390    setTLS(oldTLS);
391
392    return ret;
393}
394
395void rsdScriptInvokeInit(const Context *dc, Script *script) {
396    DrvScript *drv = (DrvScript *)script->mHal.drv;
397
398    if (drv->mInit) {
399        drv->mInit();
400    }
401}
402
403
404void rsdScriptInvokeFunction(const Context *dc, Script *script,
405                            uint32_t slot,
406                            const void *params,
407                            size_t paramLength) {
408    DrvScript *drv = (DrvScript *)script->mHal.drv;
409    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
410
411    Script * oldTLS = setTLS(script);
412    ((void (*)(const void *, uint32_t))
413        drv->mInvokeFunctions[slot])(params, paramLength);
414    setTLS(oldTLS);
415}
416
417void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
418                           uint32_t slot, void *data, size_t dataLength) {
419    DrvScript *drv = (DrvScript *)script->mHal.drv;
420    //rsAssert(!script->mFieldIsObject[slot]);
421    //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
422
423    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
424    if (!destPtr) {
425        //LOGV("Calling setVar on slot = %i which is null", slot);
426        return;
427    }
428
429    memcpy(destPtr, data, dataLength);
430}
431
432void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
433    DrvScript *drv = (DrvScript *)script->mHal.drv;
434    //rsAssert(!script->mFieldIsObject[slot]);
435    //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
436
437    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
438    if (!destPtr) {
439        //LOGV("Calling setVar on slot = %i which is null", slot);
440        return;
441    }
442
443    memcpy(destPtr, &data, sizeof(void *));
444}
445
446void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
447    DrvScript *drv = (DrvScript *)script->mHal.drv;
448    //rsAssert(script->mFieldIsObject[slot]);
449    //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
450
451    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
452    if (!destPtr) {
453        //LOGV("Calling setVar on slot = %i which is null", slot);
454        return;
455    }
456
457    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
458}
459
460void rsdScriptDestroy(const Context *dc, Script *script) {
461    DrvScript *drv = (DrvScript *)script->mHal.drv;
462
463    if (drv->mFieldAddress) {
464        for (size_t ct=0; ct < drv->mFieldCount; ct++) {
465            if (drv->mFieldIsObject[ct]) {
466                // The field address can be NULL if the script-side has
467                // optimized the corresponding global variable away.
468                if (drv->mFieldAddress[ct]) {
469                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
470                }
471            }
472        }
473        delete [] drv->mFieldAddress;
474        delete [] drv->mFieldIsObject;
475        drv->mFieldAddress = NULL;
476        drv->mFieldIsObject = NULL;
477        drv->mFieldCount = 0;
478    }
479
480    if (drv->mInvokeFunctions) {
481        delete [] drv->mInvokeFunctions;
482        drv->mInvokeFunctions = NULL;
483        drv->mInvokeFunctionCount = 0;
484    }
485    free(drv);
486    script->mHal.drv = NULL;
487
488}
489
490
491