rsdBcc.cpp revision 689821f178503af951a3d9dd558ef8eace6537cd
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsdCore.h"
19#include "rsdBcc.h"
20#include "rsdRuntime.h"
21
22#include <bcinfo/bcinfo.h>
23
24#include "rsContext.h"
25#include "rsScriptC.h"
26
27#include "utils/Timers.h"
28#include "utils/StopWatch.h"
29extern "C" {
30#include "libdex/ZipArchive.h"
31}
32
33
34using namespace android;
35using namespace android::renderscript;
36
37struct DrvScript {
38    int (*mRoot)();
39    void (*mInit)();
40
41    BCCScriptRef mBccScript;
42
43    struct BCScriptMetadata *mScriptMetadata;
44
45    InvokeFunc_t *mInvokeFunctions;
46    void ** mFieldAddress;
47    bool * mFieldIsObject;
48
49    const uint8_t * mScriptText;
50    uint32_t mScriptTextLength;
51};
52
53
54static Script * setTLS(Script *sc) {
55    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
56    rsAssert(tls);
57    Script *old = tls->mScript;
58    tls->mScript = sc;
59    return old;
60}
61
62
63bool rsdScriptInit(const Context *rsc,
64                     ScriptC *script,
65                     char const *resName,
66                     char const *cacheDir,
67                     uint8_t const *bitcode,
68                     size_t bitcodeSize,
69                     uint32_t flags) {
70    //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
71
72    pthread_mutex_lock(&rsdgInitMutex);
73    char *cachePath = NULL;
74    struct BCScriptMetadata *md = NULL;
75
76    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
77    if (drv == NULL) {
78        goto error;
79    }
80    script->mHal.drv = drv;
81
82    drv->mBccScript = bccCreateScript();
83    script->mHal.info.isThreadable = true;
84    drv->mScriptText = bitcode;
85    drv->mScriptTextLength = bitcodeSize;
86
87    md = bcinfoGetScriptMetadata((const char*)drv->mScriptText,
88                                 drv->mScriptTextLength, 0);
89    if (!md) {
90      LOGE("bcinfo: failed to read script metadata");
91      goto error;
92    }
93    drv->mScriptMetadata = md;
94
95    //LOGE("mBccScript %p", script->mBccScript);
96
97    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
98        LOGE("bcc: FAILS to register symbol callback");
99        goto error;
100    }
101
102    if (bccReadBC(drv->mBccScript,
103                  resName,
104                  (char const *)drv->mScriptText,
105                  drv->mScriptTextLength, 0) != 0) {
106        LOGE("bcc: FAILS to read bitcode");
107        goto error;
108    }
109
110    if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
111        LOGE("bcc: FAILS to link bitcode");
112        goto error;
113    }
114
115    if (bccPrepareExecutableEx(drv->mBccScript, cacheDir, resName, 0) != 0) {
116        LOGE("bcc: FAILS to prepare executable");
117        goto error;
118    }
119
120    free(cachePath);
121
122    drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
123    drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
124
125    if (md->exportFuncCount > 0) {
126        drv->mInvokeFunctions = (InvokeFunc_t*) calloc(md->exportFuncCount,
127                                                       sizeof(InvokeFunc_t));
128        bccGetExportFuncList(drv->mBccScript,
129                             md->exportFuncCount,
130                             (void **) drv->mInvokeFunctions);
131    } else {
132        drv->mInvokeFunctions = NULL;
133    }
134
135    if (md->exportVarCount > 0) {
136        drv->mFieldAddress = (void **) calloc(md->exportVarCount,
137                                              sizeof(void*));
138        drv->mFieldIsObject = (bool *) calloc(md->exportVarCount, sizeof(bool));
139        bccGetExportVarList(drv->mBccScript,
140                            md->exportVarCount,
141                            (void **) drv->mFieldAddress);
142    } else {
143        drv->mFieldAddress = NULL;
144        drv->mFieldIsObject = NULL;
145    }
146
147    if (md->objectSlotCount) {
148        for (uint32_t ct=0; ct < md->objectSlotCount; ct++) {
149            drv->mFieldIsObject[md->objectSlotList[ct]] = true;
150        }
151    }
152
153    // Copy info over to runtime
154    script->mHal.info.exportedFunctionCount = md->exportFuncCount;
155    script->mHal.info.exportedVariableCount = md->exportVarCount;
156    script->mHal.info.exportedPragmaCount = md->pragmaCount;
157    script->mHal.info.exportedPragmaKeyList = md->pragmaKeyList;
158    script->mHal.info.exportedPragmaValueList = md->pragmaValueList;
159    script->mHal.info.root = drv->mRoot;
160
161    pthread_mutex_unlock(&rsdgInitMutex);
162    return true;
163
164error:
165
166    pthread_mutex_unlock(&rsdgInitMutex);
167    free(drv);
168    return false;
169
170}
171
172typedef struct {
173    Context *rsc;
174    Script *script;
175    const Allocation * ain;
176    Allocation * aout;
177    const void * usr;
178
179    uint32_t mSliceSize;
180    volatile int mSliceNum;
181
182    const uint8_t *ptrIn;
183    uint32_t eStrideIn;
184    uint8_t *ptrOut;
185    uint32_t eStrideOut;
186
187    uint32_t xStart;
188    uint32_t xEnd;
189    uint32_t yStart;
190    uint32_t yEnd;
191    uint32_t zStart;
192    uint32_t zEnd;
193    uint32_t arrayStart;
194    uint32_t arrayEnd;
195
196    uint32_t dimX;
197    uint32_t dimY;
198    uint32_t dimZ;
199    uint32_t dimArray;
200} MTLaunchStruct;
201typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
202
203static void wc_xy(void *usr, uint32_t idx) {
204    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
205
206    while (1) {
207        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
208        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
209        uint32_t yEnd = yStart + mtls->mSliceSize;
210        yEnd = rsMin(yEnd, mtls->yEnd);
211        if (yEnd <= yStart) {
212            return;
213        }
214
215        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
216        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
217        for (uint32_t y = yStart; y < yEnd; y++) {
218            uint32_t offset = mtls->dimX * y;
219            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
220            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
221
222            for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
223                ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
224                xPtrIn += mtls->eStrideIn;
225                xPtrOut += mtls->eStrideOut;
226            }
227        }
228    }
229}
230
231static void wc_x(void *usr, uint32_t idx) {
232    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
233
234    while (1) {
235        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
236        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
237        uint32_t xEnd = xStart + mtls->mSliceSize;
238        xEnd = rsMin(xEnd, mtls->xEnd);
239        if (xEnd <= xStart) {
240            return;
241        }
242
243        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
244        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
245        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
246        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
247        for (uint32_t x = xStart; x < xEnd; x++) {
248            ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
249            xPtrIn += mtls->eStrideIn;
250            xPtrOut += mtls->eStrideOut;
251        }
252    }
253}
254
255void rsdScriptInvokeForEach(const Context *rsc,
256                            Script *s,
257                            uint32_t slot,
258                            const Allocation * ain,
259                            Allocation * aout,
260                            const void * usr,
261                            uint32_t usrLen,
262                            const RsScriptCall *sc) {
263
264    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
265
266    MTLaunchStruct mtls;
267    memset(&mtls, 0, sizeof(mtls));
268
269    if (ain) {
270        mtls.dimX = ain->getType()->getDimX();
271        mtls.dimY = ain->getType()->getDimY();
272        mtls.dimZ = ain->getType()->getDimZ();
273        //mtls.dimArray = ain->getType()->getDimArray();
274    } else if (aout) {
275        mtls.dimX = aout->getType()->getDimX();
276        mtls.dimY = aout->getType()->getDimY();
277        mtls.dimZ = aout->getType()->getDimZ();
278        //mtls.dimArray = aout->getType()->getDimArray();
279    } else {
280        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
281        return;
282    }
283
284    if (!sc || (sc->xEnd == 0)) {
285        mtls.xEnd = mtls.dimX;
286    } else {
287        rsAssert(sc->xStart < mtls.dimX);
288        rsAssert(sc->xEnd <= mtls.dimX);
289        rsAssert(sc->xStart < sc->xEnd);
290        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
291        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
292        if (mtls.xStart >= mtls.xEnd) return;
293    }
294
295    if (!sc || (sc->yEnd == 0)) {
296        mtls.yEnd = mtls.dimY;
297    } else {
298        rsAssert(sc->yStart < mtls.dimY);
299        rsAssert(sc->yEnd <= mtls.dimY);
300        rsAssert(sc->yStart < sc->yEnd);
301        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
302        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
303        if (mtls.yStart >= mtls.yEnd) return;
304    }
305
306    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
307    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
308    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
309    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
310
311    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
312
313    Context *mrsc = (Context *)rsc;
314    Script * oldTLS = setTLS(s);
315
316    mtls.rsc = mrsc;
317    mtls.ain = ain;
318    mtls.aout = aout;
319    mtls.script = s;
320    mtls.usr = usr;
321    mtls.mSliceSize = 10;
322    mtls.mSliceNum = 0;
323
324    mtls.ptrIn = NULL;
325    mtls.eStrideIn = 0;
326    if (ain) {
327        mtls.ptrIn = (const uint8_t *)ain->getPtr();
328        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
329    }
330
331    mtls.ptrOut = NULL;
332    mtls.eStrideOut = 0;
333    if (aout) {
334        mtls.ptrOut = (uint8_t *)aout->getPtr();
335        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
336    }
337
338    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
339        if (mtls.dimY > 1) {
340            rsdLaunchThreads(mrsc, wc_xy, &mtls);
341        } else {
342            rsdLaunchThreads(mrsc, wc_x, &mtls);
343        }
344
345        //LOGE("launch 1");
346    } else {
347        //LOGE("launch 3");
348        for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
349            for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
350                for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
351                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
352                                      mtls.dimX * mtls.dimY * z +
353                                      mtls.dimX * y;
354                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
355                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
356
357                    for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
358                        ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar);
359                        xPtrIn += mtls.eStrideIn;
360                        xPtrOut += mtls.eStrideOut;
361                    }
362                }
363            }
364        }
365    }
366
367    setTLS(oldTLS);
368}
369
370
371int rsdScriptInvokeRoot(const Context *dc, Script *script) {
372    DrvScript *drv = (DrvScript *)script->mHal.drv;
373
374    Script * oldTLS = setTLS(script);
375    int ret = drv->mRoot();
376    setTLS(oldTLS);
377
378    return ret;
379}
380
381void rsdScriptInvokeInit(const Context *dc, Script *script) {
382    DrvScript *drv = (DrvScript *)script->mHal.drv;
383
384    if (drv->mInit) {
385        drv->mInit();
386    }
387}
388
389
390void rsdScriptInvokeFunction(const Context *dc, Script *script,
391                            uint32_t slot,
392                            const void *params,
393                            size_t paramLength) {
394    DrvScript *drv = (DrvScript *)script->mHal.drv;
395    //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
396
397    Script * oldTLS = setTLS(script);
398    ((void (*)(const void *, uint32_t))
399        drv->mInvokeFunctions[slot])(params, paramLength);
400    setTLS(oldTLS);
401}
402
403void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
404                           uint32_t slot, void *data, size_t dataLength) {
405    DrvScript *drv = (DrvScript *)script->mHal.drv;
406    //rsAssert(!script->mFieldIsObject[slot]);
407    //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
408
409    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
410    if (!destPtr) {
411        //LOGV("Calling setVar on slot = %i which is null", slot);
412        return;
413    }
414
415    memcpy(destPtr, data, dataLength);
416}
417
418void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
419    DrvScript *drv = (DrvScript *)script->mHal.drv;
420    //rsAssert(!script->mFieldIsObject[slot]);
421    //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
422
423    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
424    if (!destPtr) {
425        //LOGV("Calling setVar on slot = %i which is null", slot);
426        return;
427    }
428
429    memcpy(destPtr, &data, sizeof(void *));
430}
431
432void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
433    DrvScript *drv = (DrvScript *)script->mHal.drv;
434    //rsAssert(script->mFieldIsObject[slot]);
435    //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
436
437    int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
438    if (!destPtr) {
439        //LOGV("Calling setVar on slot = %i which is null", slot);
440        return;
441    }
442
443    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
444}
445
446void rsdScriptDestroy(const Context *dc, Script *script) {
447    DrvScript *drv = (DrvScript *)script->mHal.drv;
448    struct BCScriptMetadata *md = drv->mScriptMetadata;
449
450    if (drv->mFieldAddress) {
451        for (size_t ct = 0; ct < md->exportVarCount; ct++) {
452            if (drv->mFieldIsObject[ct]) {
453                // The field address can be NULL if the script-side has
454                // optimized the corresponding global variable away.
455                if (drv->mFieldAddress[ct]) {
456                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
457                }
458            }
459        }
460        free(drv->mFieldAddress);
461        drv->mFieldAddress = NULL;
462        free(drv->mFieldIsObject);
463        drv->mFieldIsObject = NULL;    }
464
465    if (drv->mInvokeFunctions) {
466        free(drv->mInvokeFunctions);
467        drv->mInvokeFunctions = NULL;
468    }
469
470    bcinfoReleaseScriptMetadata(&drv->mScriptMetadata);
471
472    free(drv);
473    script->mHal.drv = NULL;
474
475}
476
477
478