rsdBcc.cpp revision 8d43eafdd2684b7bcb2554e65676dc914b7eabd7
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsdCore.h"
19#include "rsdBcc.h"
20
21#include "rsContext.h"
22#include "rsScriptC.h"
23
24#include "utils/Timers.h"
25#include "utils/StopWatch.h"
26extern "C" {
27#include "libdex/ZipArchive.h"
28}
29
30
31using namespace android;
32using namespace android::renderscript;
33
34struct DrvScript {
35    int (*mRoot)();
36    void (*mInit)();
37
38    BCCScriptRef mBccScript;
39
40    uint32_t mInvokeFunctionCount;
41    InvokeFunc_t *mInvokeFunctions;
42    uint32_t mFieldCount;
43    void ** mFieldAddress;
44    bool * mFieldIsObject;
45
46    const uint8_t * mScriptText;
47    uint32_t mScriptTextLength;
48
49    //uint32_t * mObjectSlots;
50    //uint32_t mObjectSlotCount;
51
52    uint32_t mPragmaCount;
53    const char ** mPragmaKeys;
54    const char ** mPragmaValues;
55
56};
57
58static Script * setTLS(Script *sc) {
59    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey);
60    rsAssert(tls);
61    Script *old = tls->mScript;
62    tls->mScript = sc;
63    return old;
64}
65
66
67// Input: cacheDir
68// Input: resName
69// Input: extName
70//
71// Note: cacheFile = resName + extName
72//
73// Output: Returns cachePath == cacheDir + cacheFile
74static char *genCacheFileName(const char *cacheDir,
75                              const char *resName,
76                              const char *extName) {
77    char cachePath[512];
78    char cacheFile[sizeof(cachePath)];
79    const size_t kBufLen = sizeof(cachePath) - 1;
80
81    cacheFile[0] = '\0';
82    // Note: resName today is usually something like
83    //       "/com.android.fountain:raw/fountain"
84    if (resName[0] != '/') {
85        // Get the absolute path of the raw/***.bc file.
86
87        // Generate the absolute path.  This doesn't do everything it
88        // should, e.g. if resName is "./out/whatever" it doesn't crunch
89        // the leading "./" out because this if-block is not triggered,
90        // but it'll make do.
91        //
92        if (getcwd(cacheFile, kBufLen) == NULL) {
93            LOGE("Can't get CWD while opening raw/***.bc file\n");
94            return NULL;
95        }
96        // Append "/" at the end of cacheFile so far.
97        strncat(cacheFile, "/", kBufLen);
98    }
99
100    // cacheFile = resName + extName
101    //
102    strncat(cacheFile, resName, kBufLen);
103    if (extName != NULL) {
104        // TODO(srhines): strncat() is a bit dangerous
105        strncat(cacheFile, extName, kBufLen);
106    }
107
108    // Turn the path into a flat filename by replacing
109    // any slashes after the first one with '@' characters.
110    char *cp = cacheFile + 1;
111    while (*cp != '\0') {
112        if (*cp == '/') {
113            *cp = '@';
114        }
115        cp++;
116    }
117
118    // Tack on the file name for the actual cache file path.
119    strncpy(cachePath, cacheDir, kBufLen);
120    strncat(cachePath, cacheFile, kBufLen);
121
122    LOGV("Cache file for '%s' '%s' is '%s'\n", resName, extName, cachePath);
123    return strdup(cachePath);
124}
125
126bool rsdScriptInit(const Context *rsc,
127                     ScriptC *script,
128                     char const *resName,
129                     char const *cacheDir,
130                     uint8_t const *bitcode,
131                     size_t bitcodeSize,
132                     uint32_t flags,
133                     RsHalSymbolLookupFunc lookupFunc) {
134    //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
135
136    char *cachePath = NULL;
137    uint32_t objectSlotCount = 0;
138
139    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
140    if (drv == NULL) {
141        return false;
142    }
143    script->mHal.drv = drv;
144
145    drv->mBccScript = bccCreateScript();
146    script->mHal.info.isThreadable = true;
147    drv->mScriptText = bitcode;
148    drv->mScriptTextLength = bitcodeSize;
149
150    //LOGE("mBccScript %p", script->mBccScript);
151
152    if (bccRegisterSymbolCallback(drv->mBccScript, lookupFunc, script) != 0) {
153        LOGE("bcc: FAILS to register symbol callback");
154        goto error;
155    }
156
157    if (bccReadBC(drv->mBccScript,
158                  resName,
159                  (char const *)drv->mScriptText,
160                  drv->mScriptTextLength, 0) != 0) {
161        LOGE("bcc: FAILS to read bitcode");
162        return NULL;
163    }
164
165#if 1
166    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
167        LOGE("bcc: FAILS to link bitcode");
168        return NULL;
169    }
170#endif
171    cachePath = genCacheFileName(cacheDir, resName, ".oBCC");
172
173    if (bccPrepareExecutable(drv->mBccScript, cachePath, 0) != 0) {
174        LOGE("bcc: FAILS to prepare executable");
175        return NULL;
176    }
177
178    free(cachePath);
179
180    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
181    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
182
183    drv->mInvokeFunctionCount = bccGetExportFuncCount(drv->mBccScript);
184    if (drv->mInvokeFunctionCount <= 0)
185        drv->mInvokeFunctions = NULL;
186    else {
187        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(drv->mInvokeFunctionCount, sizeof(InvokeFunc_t));
188        bccGetExportFuncList(drv->mBccScript, drv->mInvokeFunctionCount, (void **) drv->mInvokeFunctions);
189    }
190
191    drv->mFieldCount = bccGetExportVarCount(drv->mBccScript);
192    if (drv->mFieldCount <= 0) {
193        drv->mFieldAddress = NULL;
194        drv->mFieldIsObject = NULL;
195    } else {
196        drv->mFieldAddress = (void **) calloc(drv->mFieldCount, sizeof(void *));
197        drv->mFieldIsObject = (bool *) calloc(drv->mFieldCount, sizeof(bool));
198        bccGetExportVarList(drv->mBccScript, drv->mFieldCount, (void **) drv->mFieldAddress);
199    }
200
201    objectSlotCount = bccGetObjectSlotCount(drv->mBccScript);
202    if (objectSlotCount) {
203        uint32_t * slots = new uint32_t[objectSlotCount];
204        bccGetObjectSlotList(drv->mBccScript, objectSlotCount, slots);
205        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
206            drv->mFieldIsObject[slots[ct]] = true;
207        }
208        delete [] slots;
209    }
210
211    uint32_t mPragmaCount;
212    const char ** mPragmaKeys;
213    const char ** mPragmaValues;
214
215    const static int pragmaMax = 16;
216    drv->mPragmaCount = bccGetPragmaCount(drv->mBccScript);
217    if (drv->mPragmaCount <= 0) {
218        drv->mPragmaKeys = NULL;
219        drv->mPragmaValues = NULL;
220    } else {
221        drv->mPragmaKeys = (const char **) calloc(drv->mFieldCount, sizeof(const char *));
222        drv->mPragmaValues = (const char **) calloc(drv->mFieldCount, sizeof(const char *));
223        bccGetPragmaList(drv->mBccScript, drv->mPragmaCount, drv->mPragmaKeys, drv->mPragmaValues);
224    }
225
226
227
228    // Copy info over to runtime
229    script->mHal.info.exportedFunctionCount = drv->mInvokeFunctionCount;
230    script->mHal.info.exportedVariableCount = drv->mFieldCount;
231    script->mHal.info.exportedPragmaCount = drv->mPragmaCount;
232    script->mHal.info.exportedPragmaKeyList = drv->mPragmaKeys;
233    script->mHal.info.exportedPragmaValueList = drv->mPragmaValues;
234    script->mHal.info.root = drv->mRoot;
235
236
237    return true;
238
239error:
240
241    free(drv);
242    return false;
243
244}
245
246typedef struct {
247    Context *rsc;
248    Script *script;
249    const Allocation * ain;
250    Allocation * aout;
251    const void * usr;
252
253    uint32_t mSliceSize;
254    volatile int mSliceNum;
255
256    const uint8_t *ptrIn;
257    uint32_t eStrideIn;
258    uint8_t *ptrOut;
259    uint32_t eStrideOut;
260
261    uint32_t xStart;
262    uint32_t xEnd;
263    uint32_t yStart;
264    uint32_t yEnd;
265    uint32_t zStart;
266    uint32_t zEnd;
267    uint32_t arrayStart;
268    uint32_t arrayEnd;
269
270    uint32_t dimX;
271    uint32_t dimY;
272    uint32_t dimZ;
273    uint32_t dimArray;
274} MTLaunchStruct;
275typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
276
277static void wc_xy(void *usr, uint32_t idx) {
278    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
279
280    while (1) {
281        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
282        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
283        uint32_t yEnd = yStart + mtls->mSliceSize;
284        yEnd = rsMin(yEnd, mtls->yEnd);
285        if (yEnd <= yStart) {
286            return;
287        }
288
289        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
290        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
291        for (uint32_t y = yStart; y < yEnd; y++) {
292            uint32_t offset = mtls->dimX * y;
293            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
294            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
295
296            for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
297                ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
298                xPtrIn += mtls->eStrideIn;
299                xPtrOut += mtls->eStrideOut;
300            }
301        }
302    }
303}
304
305static void wc_x(void *usr, uint32_t idx) {
306    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
307
308    while (1) {
309        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
310        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
311        uint32_t xEnd = xStart + mtls->mSliceSize;
312        xEnd = rsMin(xEnd, mtls->xEnd);
313        if (xEnd <= xStart) {
314            return;
315        }
316
317        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
318        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
319        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
320        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
321        for (uint32_t x = xStart; x < xEnd; x++) {
322            ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
323            xPtrIn += mtls->eStrideIn;
324            xPtrOut += mtls->eStrideOut;
325        }
326    }
327}
328
329void rsdScriptInvokeForEach(const Context *rsc,
330                            Script *s,
331                            const Allocation * ain,
332                            Allocation * aout,
333                            const void * usr,
334                            uint32_t usrLen,
335                            const RsScriptCall *sc) {
336
337    RsHal * dc = (RsHal *)rsc->mHal.drv;
338
339    MTLaunchStruct mtls;
340    memset(&mtls, 0, sizeof(mtls));
341
342    if (ain) {
343        mtls.dimX = ain->getType()->getDimX();
344        mtls.dimY = ain->getType()->getDimY();
345        mtls.dimZ = ain->getType()->getDimZ();
346        //mtls.dimArray = ain->getType()->getDimArray();
347    } else if (aout) {
348        mtls.dimX = aout->getType()->getDimX();
349        mtls.dimY = aout->getType()->getDimY();
350        mtls.dimZ = aout->getType()->getDimZ();
351        //mtls.dimArray = aout->getType()->getDimArray();
352    } else {
353        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
354        return;
355    }
356
357    if (!sc || (sc->xEnd == 0)) {
358        mtls.xEnd = mtls.dimX;
359    } else {
360        rsAssert(sc->xStart < mtls.dimX);
361        rsAssert(sc->xEnd <= mtls.dimX);
362        rsAssert(sc->xStart < sc->xEnd);
363        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
364        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
365        if (mtls.xStart >= mtls.xEnd) return;
366    }
367
368    if (!sc || (sc->yEnd == 0)) {
369        mtls.yEnd = mtls.dimY;
370    } else {
371        rsAssert(sc->yStart < mtls.dimY);
372        rsAssert(sc->yEnd <= mtls.dimY);
373        rsAssert(sc->yStart < sc->yEnd);
374        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
375        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
376        if (mtls.yStart >= mtls.yEnd) return;
377    }
378
379    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
380    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
381    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
382    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
383
384    rsAssert(ain->getType()->getDimZ() == 0);
385
386    Context *mrsc = (Context *)rsc;
387    Script * oldTLS = setTLS(s);
388
389    mtls.rsc = mrsc;
390    mtls.ain = ain;
391    mtls.aout = aout;
392    mtls.script = s;
393    mtls.usr = usr;
394    mtls.mSliceSize = 10;
395    mtls.mSliceNum = 0;
396
397    mtls.ptrIn = NULL;
398    mtls.eStrideIn = 0;
399    if (ain) {
400        mtls.ptrIn = (const uint8_t *)ain->getPtr();
401        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
402    }
403
404    mtls.ptrOut = NULL;
405    mtls.eStrideOut = 0;
406    if (aout) {
407        mtls.ptrOut = (uint8_t *)aout->getPtr();
408        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
409    }
410
411    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
412        if (mtls.dimY > 1) {
413            rsdLaunchThreads(mrsc, wc_xy, &mtls);
414        } else {
415            rsdLaunchThreads(mrsc, wc_x, &mtls);
416        }
417
418        //LOGE("launch 1");
419    } else {
420        //LOGE("launch 3");
421        for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
422            for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
423                for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
424                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
425                                      mtls.dimX * mtls.dimY * z +
426                                      mtls.dimX * y;
427                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
428                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
429
430                    for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
431                        ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar);
432                        xPtrIn += mtls.eStrideIn;
433                        xPtrOut += mtls.eStrideOut;
434                    }
435                }
436            }
437        }
438    }
439
440    setTLS(oldTLS);
441}
442
443
444int rsdScriptInvokeRoot(const Context *dc, Script *script) {
445    DrvScript *drv = (DrvScript *)script->mHal.drv;
446
447    Script * oldTLS = setTLS(script);
448    int ret = drv->mRoot();
449    setTLS(oldTLS);
450
451    return ret;
452}
453
454void rsdScriptInvokeInit(const Context *dc, Script *script) {
455    DrvScript *drv = (DrvScript *)script->mHal.drv;
456
457    if (drv->mInit) {
458        drv->mInit();
459    }
460}
461
462
463void rsdScriptInvokeFunction(const Context *dc, Script *script,
464                            uint32_t slot,
465                            const void *params,
466                            size_t paramLength) {
467    DrvScript *drv = (DrvScript *)script->mHal.drv;
468    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
469
470    Script * oldTLS = setTLS(script);
471    ((void (*)(const void *, uint32_t))
472        drv->mInvokeFunctions[slot])(params, paramLength);
473    setTLS(oldTLS);
474}
475
476void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
477                           uint32_t slot, void *data, size_t dataLength) {
478    DrvScript *drv = (DrvScript *)script->mHal.drv;
479    //rsAssert(!script->mFieldIsObject[slot]);
480    //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
481
482    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
483    if (!destPtr) {
484        //LOGV("Calling setVar on slot = %i which is null", slot);
485        return;
486    }
487
488    memcpy(destPtr, data, dataLength);
489}
490
491void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
492    DrvScript *drv = (DrvScript *)script->mHal.drv;
493    //rsAssert(!script->mFieldIsObject[slot]);
494    //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
495
496    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
497    if (!destPtr) {
498        //LOGV("Calling setVar on slot = %i which is null", slot);
499        return;
500    }
501
502    memcpy(destPtr, &data, sizeof(void *));
503}
504
505void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
506    DrvScript *drv = (DrvScript *)script->mHal.drv;
507    //rsAssert(script->mFieldIsObject[slot]);
508    //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
509
510    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
511    if (!destPtr) {
512        //LOGV("Calling setVar on slot = %i which is null", slot);
513        return;
514    }
515
516    rsiSetObject((ObjectBase **)destPtr, data);
517}
518
519void rsdScriptDestroy(const Context *dc, Script *script) {
520    DrvScript *drv = (DrvScript *)script->mHal.drv;
521
522    if (drv->mFieldAddress) {
523        for (size_t ct=0; ct < drv->mFieldCount; ct++) {
524            if (drv->mFieldIsObject[ct]) {
525                // The field address can be NULL if the script-side has
526                // optimized the corresponding global variable away.
527                if (drv->mFieldAddress[ct]) {
528                    rsiClearObject((ObjectBase **)drv->mFieldAddress[ct]);
529                }
530            }
531        }
532        delete [] drv->mFieldAddress;
533        delete [] drv->mFieldIsObject;
534        drv->mFieldAddress = NULL;
535        drv->mFieldIsObject = NULL;
536        drv->mFieldCount = 0;
537    }
538
539    if (drv->mInvokeFunctions) {
540        delete [] drv->mInvokeFunctions;
541        drv->mInvokeFunctions = NULL;
542        drv->mInvokeFunctionCount = 0;
543    }
544    free(drv);
545    script->mHal.drv = NULL;
546
547}
548
549
550