rsdBcc.cpp revision 35e429ebf817130d8590d11d441a77aa697bd7d4
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsdCore.h"
19#include "rsdBcc.h"
20#include "rsdRuntime.h"
21
22#include "rsContext.h"
23#include "rsScriptC.h"
24
25#include "utils/Timers.h"
26#include "utils/StopWatch.h"
27extern "C" {
28#include "libdex/ZipArchive.h"
29}
30
31
32using namespace android;
33using namespace android::renderscript;
34
35struct DrvScript {
36    int (*mRoot)();
37    void (*mInit)();
38
39    BCCScriptRef mBccScript;
40
41    uint32_t mInvokeFunctionCount;
42    InvokeFunc_t *mInvokeFunctions;
43    uint32_t mFieldCount;
44    void ** mFieldAddress;
45    bool * mFieldIsObject;
46
47    const uint8_t * mScriptText;
48    uint32_t mScriptTextLength;
49
50    //uint32_t * mObjectSlots;
51    //uint32_t mObjectSlotCount;
52
53    uint32_t mPragmaCount;
54    const char ** mPragmaKeys;
55    const char ** mPragmaValues;
56
57};
58
59
60static Script * setTLS(Script *sc) {
61    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
62    rsAssert(tls);
63    Script *old = tls->mScript;
64    tls->mScript = sc;
65    return old;
66}
67
68
69// Input: cacheDir
70// Input: resName
71// Input: extName
72//
73// Note: cacheFile = resName + extName
74//
75// Output: Returns cachePath == cacheDir + cacheFile
76static char *genCacheFileName(const char *cacheDir,
77                              const char *resName,
78                              const char *extName) {
79    char cachePath[512];
80    char cacheFile[sizeof(cachePath)];
81    const size_t kBufLen = sizeof(cachePath) - 1;
82
83    cacheFile[0] = '\0';
84    // Note: resName today is usually something like
85    //       "/com.android.fountain:raw/fountain"
86    if (resName[0] != '/') {
87        // Get the absolute path of the raw/***.bc file.
88
89        // Generate the absolute path.  This doesn't do everything it
90        // should, e.g. if resName is "./out/whatever" it doesn't crunch
91        // the leading "./" out because this if-block is not triggered,
92        // but it'll make do.
93        //
94        if (getcwd(cacheFile, kBufLen) == NULL) {
95            LOGE("Can't get CWD while opening raw/***.bc file\n");
96            return NULL;
97        }
98        // Append "/" at the end of cacheFile so far.
99        strncat(cacheFile, "/", kBufLen);
100    }
101
102    // cacheFile = resName + extName
103    //
104    strncat(cacheFile, resName, kBufLen);
105    if (extName != NULL) {
106        // TODO(srhines): strncat() is a bit dangerous
107        strncat(cacheFile, extName, kBufLen);
108    }
109
110    // Turn the path into a flat filename by replacing
111    // any slashes after the first one with '@' characters.
112    char *cp = cacheFile + 1;
113    while (*cp != '\0') {
114        if (*cp == '/') {
115            *cp = '@';
116        }
117        cp++;
118    }
119
120    // Tack on the file name for the actual cache file path.
121    strncpy(cachePath, cacheDir, kBufLen);
122    strncat(cachePath, cacheFile, kBufLen);
123
124    LOGV("Cache file for '%s' '%s' is '%s'\n", resName, extName, cachePath);
125    return strdup(cachePath);
126}
127
128bool rsdScriptInit(const Context *rsc,
129                     ScriptC *script,
130                     char const *resName,
131                     char const *cacheDir,
132                     uint8_t const *bitcode,
133                     size_t bitcodeSize,
134                     uint32_t flags) {
135    //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
136
137    pthread_mutex_lock(&rsdgInitMutex);
138    char *cachePath = NULL;
139    uint32_t objectSlotCount = 0;
140
141    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
142    if (drv == NULL) {
143        goto error;
144    }
145    script->mHal.drv = drv;
146
147    drv->mBccScript = bccCreateScript();
148    script->mHal.info.isThreadable = true;
149    drv->mScriptText = bitcode;
150    drv->mScriptTextLength = bitcodeSize;
151
152    //LOGE("mBccScript %p", script->mBccScript);
153
154    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
155        LOGE("bcc: FAILS to register symbol callback");
156        goto error;
157    }
158
159    if (bccReadBC(drv->mBccScript,
160                  resName,
161                  (char const *)drv->mScriptText,
162                  drv->mScriptTextLength, 0) != 0) {
163        LOGE("bcc: FAILS to read bitcode");
164        goto error;
165    }
166
167#if 1
168    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
169        LOGE("bcc: FAILS to link bitcode");
170        goto error;
171    }
172#endif
173    cachePath = genCacheFileName(cacheDir, resName, ".oBCC");
174
175    if (bccPrepareExecutable(drv->mBccScript, cachePath, 0) != 0) {
176        LOGE("bcc: FAILS to prepare executable");
177        goto error;
178    }
179
180    free(cachePath);
181
182    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
183    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
184
185    drv->mInvokeFunctionCount = bccGetExportFuncCount(drv->mBccScript);
186    if (drv->mInvokeFunctionCount <= 0)
187        drv->mInvokeFunctions = NULL;
188    else {
189        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(drv->mInvokeFunctionCount, sizeof(InvokeFunc_t));
190        bccGetExportFuncList(drv->mBccScript, drv->mInvokeFunctionCount, (void **) drv->mInvokeFunctions);
191    }
192
193    drv->mFieldCount = bccGetExportVarCount(drv->mBccScript);
194    if (drv->mFieldCount <= 0) {
195        drv->mFieldAddress = NULL;
196        drv->mFieldIsObject = NULL;
197    } else {
198        drv->mFieldAddress = (void **) calloc(drv->mFieldCount, sizeof(void *));
199        drv->mFieldIsObject = (bool *) calloc(drv->mFieldCount, sizeof(bool));
200        bccGetExportVarList(drv->mBccScript, drv->mFieldCount, (void **) drv->mFieldAddress);
201    }
202
203    objectSlotCount = bccGetObjectSlotCount(drv->mBccScript);
204    if (objectSlotCount) {
205        uint32_t * slots = new uint32_t[objectSlotCount];
206        bccGetObjectSlotList(drv->mBccScript, objectSlotCount, slots);
207        for (uint32_t ct=0; ct < objectSlotCount; ct++) {
208            drv->mFieldIsObject[slots[ct]] = true;
209        }
210        delete [] slots;
211    }
212
213    uint32_t mPragmaCount;
214    const char ** mPragmaKeys;
215    const char ** mPragmaValues;
216
217    const static int pragmaMax = 16;
218    drv->mPragmaCount = bccGetPragmaCount(drv->mBccScript);
219    if (drv->mPragmaCount <= 0) {
220        drv->mPragmaKeys = NULL;
221        drv->mPragmaValues = NULL;
222    } else {
223        drv->mPragmaKeys = (const char **) calloc(drv->mFieldCount, sizeof(const char *));
224        drv->mPragmaValues = (const char **) calloc(drv->mFieldCount, sizeof(const char *));
225        bccGetPragmaList(drv->mBccScript, drv->mPragmaCount, drv->mPragmaKeys, drv->mPragmaValues);
226    }
227
228
229
230    // Copy info over to runtime
231    script->mHal.info.exportedFunctionCount = drv->mInvokeFunctionCount;
232    script->mHal.info.exportedVariableCount = drv->mFieldCount;
233    script->mHal.info.exportedPragmaCount = drv->mPragmaCount;
234    script->mHal.info.exportedPragmaKeyList = drv->mPragmaKeys;
235    script->mHal.info.exportedPragmaValueList = drv->mPragmaValues;
236    script->mHal.info.root = drv->mRoot;
237
238
239    pthread_mutex_unlock(&rsdgInitMutex);
240    return true;
241
242error:
243
244    pthread_mutex_unlock(&rsdgInitMutex);
245    free(drv);
246    return false;
247
248}
249
250typedef struct {
251    Context *rsc;
252    Script *script;
253    const Allocation * ain;
254    Allocation * aout;
255    const void * usr;
256
257    uint32_t mSliceSize;
258    volatile int mSliceNum;
259
260    const uint8_t *ptrIn;
261    uint32_t eStrideIn;
262    uint8_t *ptrOut;
263    uint32_t eStrideOut;
264
265    uint32_t xStart;
266    uint32_t xEnd;
267    uint32_t yStart;
268    uint32_t yEnd;
269    uint32_t zStart;
270    uint32_t zEnd;
271    uint32_t arrayStart;
272    uint32_t arrayEnd;
273
274    uint32_t dimX;
275    uint32_t dimY;
276    uint32_t dimZ;
277    uint32_t dimArray;
278} MTLaunchStruct;
279typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
280
281static void wc_xy(void *usr, uint32_t idx) {
282    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
283
284    while (1) {
285        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
286        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
287        uint32_t yEnd = yStart + mtls->mSliceSize;
288        yEnd = rsMin(yEnd, mtls->yEnd);
289        if (yEnd <= yStart) {
290            return;
291        }
292
293        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
294        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
295        for (uint32_t y = yStart; y < yEnd; y++) {
296            uint32_t offset = mtls->dimX * y;
297            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
298            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
299
300            for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
301                ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
302                xPtrIn += mtls->eStrideIn;
303                xPtrOut += mtls->eStrideOut;
304            }
305        }
306    }
307}
308
309static void wc_x(void *usr, uint32_t idx) {
310    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
311
312    while (1) {
313        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
314        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
315        uint32_t xEnd = xStart + mtls->mSliceSize;
316        xEnd = rsMin(xEnd, mtls->xEnd);
317        if (xEnd <= xStart) {
318            return;
319        }
320
321        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
322        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
323        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
324        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
325        for (uint32_t x = xStart; x < xEnd; x++) {
326            ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
327            xPtrIn += mtls->eStrideIn;
328            xPtrOut += mtls->eStrideOut;
329        }
330    }
331}
332
333void rsdScriptInvokeForEach(const Context *rsc,
334                            Script *s,
335                            uint32_t slot,
336                            const Allocation * ain,
337                            Allocation * aout,
338                            const void * usr,
339                            uint32_t usrLen,
340                            const RsScriptCall *sc) {
341
342    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
343
344    MTLaunchStruct mtls;
345    memset(&mtls, 0, sizeof(mtls));
346
347    if (ain) {
348        mtls.dimX = ain->getType()->getDimX();
349        mtls.dimY = ain->getType()->getDimY();
350        mtls.dimZ = ain->getType()->getDimZ();
351        //mtls.dimArray = ain->getType()->getDimArray();
352    } else if (aout) {
353        mtls.dimX = aout->getType()->getDimX();
354        mtls.dimY = aout->getType()->getDimY();
355        mtls.dimZ = aout->getType()->getDimZ();
356        //mtls.dimArray = aout->getType()->getDimArray();
357    } else {
358        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
359        return;
360    }
361
362    if (!sc || (sc->xEnd == 0)) {
363        mtls.xEnd = mtls.dimX;
364    } else {
365        rsAssert(sc->xStart < mtls.dimX);
366        rsAssert(sc->xEnd <= mtls.dimX);
367        rsAssert(sc->xStart < sc->xEnd);
368        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
369        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
370        if (mtls.xStart >= mtls.xEnd) return;
371    }
372
373    if (!sc || (sc->yEnd == 0)) {
374        mtls.yEnd = mtls.dimY;
375    } else {
376        rsAssert(sc->yStart < mtls.dimY);
377        rsAssert(sc->yEnd <= mtls.dimY);
378        rsAssert(sc->yStart < sc->yEnd);
379        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
380        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
381        if (mtls.yStart >= mtls.yEnd) return;
382    }
383
384    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
385    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
386    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
387    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
388
389    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
390
391    Context *mrsc = (Context *)rsc;
392    Script * oldTLS = setTLS(s);
393
394    mtls.rsc = mrsc;
395    mtls.ain = ain;
396    mtls.aout = aout;
397    mtls.script = s;
398    mtls.usr = usr;
399    mtls.mSliceSize = 10;
400    mtls.mSliceNum = 0;
401
402    mtls.ptrIn = NULL;
403    mtls.eStrideIn = 0;
404    if (ain) {
405        mtls.ptrIn = (const uint8_t *)ain->getPtr();
406        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
407    }
408
409    mtls.ptrOut = NULL;
410    mtls.eStrideOut = 0;
411    if (aout) {
412        mtls.ptrOut = (uint8_t *)aout->getPtr();
413        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
414    }
415
416    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
417        if (mtls.dimY > 1) {
418            rsdLaunchThreads(mrsc, wc_xy, &mtls);
419        } else {
420            rsdLaunchThreads(mrsc, wc_x, &mtls);
421        }
422
423        //LOGE("launch 1");
424    } else {
425        //LOGE("launch 3");
426        for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
427            for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
428                for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
429                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
430                                      mtls.dimX * mtls.dimY * z +
431                                      mtls.dimX * y;
432                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
433                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
434
435                    for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
436                        ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar);
437                        xPtrIn += mtls.eStrideIn;
438                        xPtrOut += mtls.eStrideOut;
439                    }
440                }
441            }
442        }
443    }
444
445    setTLS(oldTLS);
446}
447
448
449int rsdScriptInvokeRoot(const Context *dc, Script *script) {
450    DrvScript *drv = (DrvScript *)script->mHal.drv;
451
452    Script * oldTLS = setTLS(script);
453    int ret = drv->mRoot();
454    setTLS(oldTLS);
455
456    return ret;
457}
458
459void rsdScriptInvokeInit(const Context *dc, Script *script) {
460    DrvScript *drv = (DrvScript *)script->mHal.drv;
461
462    if (drv->mInit) {
463        drv->mInit();
464    }
465}
466
467
468void rsdScriptInvokeFunction(const Context *dc, Script *script,
469                            uint32_t slot,
470                            const void *params,
471                            size_t paramLength) {
472    DrvScript *drv = (DrvScript *)script->mHal.drv;
473    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
474
475    Script * oldTLS = setTLS(script);
476    ((void (*)(const void *, uint32_t))
477        drv->mInvokeFunctions[slot])(params, paramLength);
478    setTLS(oldTLS);
479}
480
481void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
482                           uint32_t slot, void *data, size_t dataLength) {
483    DrvScript *drv = (DrvScript *)script->mHal.drv;
484    //rsAssert(!script->mFieldIsObject[slot]);
485    //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
486
487    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
488    if (!destPtr) {
489        //LOGV("Calling setVar on slot = %i which is null", slot);
490        return;
491    }
492
493    memcpy(destPtr, data, dataLength);
494}
495
496void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
497    DrvScript *drv = (DrvScript *)script->mHal.drv;
498    //rsAssert(!script->mFieldIsObject[slot]);
499    //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
500
501    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
502    if (!destPtr) {
503        //LOGV("Calling setVar on slot = %i which is null", slot);
504        return;
505    }
506
507    memcpy(destPtr, &data, sizeof(void *));
508}
509
510void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
511    DrvScript *drv = (DrvScript *)script->mHal.drv;
512    //rsAssert(script->mFieldIsObject[slot]);
513    //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
514
515    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
516    if (!destPtr) {
517        //LOGV("Calling setVar on slot = %i which is null", slot);
518        return;
519    }
520
521    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
522}
523
524void rsdScriptDestroy(const Context *dc, Script *script) {
525    DrvScript *drv = (DrvScript *)script->mHal.drv;
526
527    if (drv->mFieldAddress) {
528        for (size_t ct=0; ct < drv->mFieldCount; ct++) {
529            if (drv->mFieldIsObject[ct]) {
530                // The field address can be NULL if the script-side has
531                // optimized the corresponding global variable away.
532                if (drv->mFieldAddress[ct]) {
533                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
534                }
535            }
536        }
537        delete [] drv->mFieldAddress;
538        delete [] drv->mFieldIsObject;
539        drv->mFieldAddress = NULL;
540        drv->mFieldIsObject = NULL;
541        drv->mFieldCount = 0;
542    }
543
544    if (drv->mInvokeFunctions) {
545        delete [] drv->mInvokeFunctions;
546        drv->mInvokeFunctions = NULL;
547        drv->mInvokeFunctionCount = 0;
548    }
549    free(drv);
550    script->mHal.drv = NULL;
551
552}
553
554
555