rsdBcc.cpp revision 586e2a41144dd520390b04897936b694647a1330
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20
21#include <bcc/BCCContext.h>
22#include <bcc/RenderScript/RSCompilerDriver.h>
23#include <bcc/RenderScript/RSExecutable.h>
24#include <bcc/RenderScript/RSInfo.h>
25
26#include "rsContext.h"
27#include "rsElement.h"
28#include "rsScriptC.h"
29
30#include "utils/Vector.h"
31#include "utils/Timers.h"
32#include "utils/StopWatch.h"
33
34using namespace android;
35using namespace android::renderscript;
36
37struct DrvScript {
38    int (*mRoot)();
39    int (*mRootExpand)();
40    void (*mInit)();
41    void (*mFreeChildren)();
42
43    bcc::BCCContext *mCompilerContext;
44    bcc::RSCompilerDriver *mCompilerDriver;
45    bcc::RSExecutable *mExecutable;
46};
47
48typedef void (*outer_foreach_t)(
49    const android::renderscript::RsForEachStubParamStruct *,
50    uint32_t x1, uint32_t x2,
51    uint32_t instep, uint32_t outstep);
52
53static Script * setTLS(Script *sc) {
54    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
55    rsAssert(tls);
56    Script *old = tls->mScript;
57    tls->mScript = sc;
58    return old;
59}
60
61
62bool rsdScriptInit(const Context *rsc,
63                     ScriptC *script,
64                     char const *resName,
65                     char const *cacheDir,
66                     uint8_t const *bitcode,
67                     size_t bitcodeSize,
68                     uint32_t flags) {
69    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
70    //ALOGE("rsdScriptInit %p %p", rsc, script);
71
72    pthread_mutex_lock(&rsdgInitMutex);
73
74    const char* coreLib = "/system/lib/libclcore.bc";
75    bcc::RSInfo::FloatPrecision prec;
76    bcc::RSExecutable *exec;
77    const bcc::RSInfo *info;
78    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
79    if (drv == NULL) {
80        goto error;
81    }
82    script->mHal.drv = drv;
83
84    drv->mCompilerContext = NULL;
85    drv->mCompilerDriver = NULL;
86    drv->mExecutable = NULL;
87
88    drv->mCompilerContext = new bcc::BCCContext();
89    if (drv->mCompilerContext == NULL) {
90        ALOGE("bcc: FAILS to create compiler context (out of memory)");
91        goto error;
92    }
93
94    drv->mCompilerDriver = new bcc::RSCompilerDriver();
95    if (drv->mCompilerDriver == NULL) {
96        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
97        goto error;
98    }
99
100    script->mHal.info.isThreadable = true;
101
102    drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub);
103    drv->mCompilerDriver->setRSRuntimeLookupContext(script);
104
105    exec = drv->mCompilerDriver->build(*drv->mCompilerContext,
106                                       cacheDir, resName,
107                                       (const char *)bitcode, bitcodeSize);
108
109    if (exec == NULL) {
110        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
111        goto error;
112    }
113
114    drv->mExecutable = exec;
115
116    exec->setThreadable(script->mHal.info.isThreadable);
117    if (!exec->syncInfo()) {
118        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
119    }
120
121    drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
122    drv->mRootExpand =
123        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
124    drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
125    drv->mFreeChildren =
126        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
127
128    info = &drv->mExecutable->getInfo();
129    // Copy info over to runtime
130    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
131    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
132    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
133    script->mHal.info.exportedPragmaKeyList =
134        const_cast<const char**>(exec->getPragmaKeys().array());
135    script->mHal.info.exportedPragmaValueList =
136        const_cast<const char**>(exec->getPragmaValues().array());
137
138    if (drv->mRootExpand) {
139        script->mHal.info.root = drv->mRootExpand;
140    } else {
141        script->mHal.info.root = drv->mRoot;
142    }
143
144    pthread_mutex_unlock(&rsdgInitMutex);
145    return true;
146
147error:
148
149    pthread_mutex_unlock(&rsdgInitMutex);
150    if (drv) {
151        delete drv->mCompilerContext;
152        delete drv->mCompilerDriver;
153        delete drv->mExecutable;
154        free(drv);
155    }
156    script->mHal.drv = NULL;
157    return false;
158
159}
160
161typedef struct {
162    Context *rsc;
163    Script *script;
164    ForEachFunc_t kernel;
165    uint32_t sig;
166    const Allocation * ain;
167    Allocation * aout;
168    const void * usr;
169    size_t usrLen;
170
171    uint32_t mSliceSize;
172    volatile int mSliceNum;
173
174    const uint8_t *ptrIn;
175    uint32_t eStrideIn;
176    uint8_t *ptrOut;
177    uint32_t eStrideOut;
178
179    uint32_t yStrideIn;
180    uint32_t yStrideOut;
181
182    uint32_t xStart;
183    uint32_t xEnd;
184    uint32_t yStart;
185    uint32_t yEnd;
186    uint32_t zStart;
187    uint32_t zEnd;
188    uint32_t arrayStart;
189    uint32_t arrayEnd;
190
191    uint32_t dimX;
192    uint32_t dimY;
193    uint32_t dimZ;
194    uint32_t dimArray;
195} MTLaunchStruct;
196typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
197
198static void wc_xy(void *usr, uint32_t idx) {
199    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
200    RsForEachStubParamStruct p;
201    memset(&p, 0, sizeof(p));
202    p.usr = mtls->usr;
203    p.usr_len = mtls->usrLen;
204    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
205    uint32_t sig = mtls->sig;
206
207    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
208    while (1) {
209        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
210        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
211        uint32_t yEnd = yStart + mtls->mSliceSize;
212        yEnd = rsMin(yEnd, mtls->yEnd);
213        if (yEnd <= yStart) {
214            return;
215        }
216
217        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
218        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
219        for (p.y = yStart; p.y < yEnd; p.y++) {
220            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
221            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
222            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
223        }
224    }
225}
226
227static void wc_x(void *usr, uint32_t idx) {
228    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
229    RsForEachStubParamStruct p;
230    memset(&p, 0, sizeof(p));
231    p.usr = mtls->usr;
232    p.usr_len = mtls->usrLen;
233    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
234    uint32_t sig = mtls->sig;
235
236    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
237    while (1) {
238        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
239        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
240        uint32_t xEnd = xStart + mtls->mSliceSize;
241        xEnd = rsMin(xEnd, mtls->xEnd);
242        if (xEnd <= xStart) {
243            return;
244        }
245
246        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
247        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
248
249        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
250        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
251        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
252    }
253}
254
255void rsdScriptInvokeForEach(const Context *rsc,
256                            Script *s,
257                            uint32_t slot,
258                            const Allocation * ain,
259                            Allocation * aout,
260                            const void * usr,
261                            uint32_t usrLen,
262                            const RsScriptCall *sc) {
263
264    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
265
266    MTLaunchStruct mtls;
267    memset(&mtls, 0, sizeof(mtls));
268
269    //ALOGE("for each script %p  in %p   out %p", s, ain, aout);
270
271    DrvScript *drv = (DrvScript *)s->mHal.drv;
272    rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
273    mtls.kernel = reinterpret_cast<ForEachFunc_t>(
274                      drv->mExecutable->getExportForeachFuncAddrs()[slot]);
275    rsAssert(mtls.kernel != NULL);
276    mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
277
278    if (ain) {
279        mtls.dimX = ain->getType()->getDimX();
280        mtls.dimY = ain->getType()->getDimY();
281        mtls.dimZ = ain->getType()->getDimZ();
282        //mtls.dimArray = ain->getType()->getDimArray();
283    } else if (aout) {
284        mtls.dimX = aout->getType()->getDimX();
285        mtls.dimY = aout->getType()->getDimY();
286        mtls.dimZ = aout->getType()->getDimZ();
287        //mtls.dimArray = aout->getType()->getDimArray();
288    } else {
289        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
290        return;
291    }
292
293    if (!sc || (sc->xEnd == 0)) {
294        mtls.xEnd = mtls.dimX;
295    } else {
296        rsAssert(sc->xStart < mtls.dimX);
297        rsAssert(sc->xEnd <= mtls.dimX);
298        rsAssert(sc->xStart < sc->xEnd);
299        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
300        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
301        if (mtls.xStart >= mtls.xEnd) return;
302    }
303
304    if (!sc || (sc->yEnd == 0)) {
305        mtls.yEnd = mtls.dimY;
306    } else {
307        rsAssert(sc->yStart < mtls.dimY);
308        rsAssert(sc->yEnd <= mtls.dimY);
309        rsAssert(sc->yStart < sc->yEnd);
310        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
311        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
312        if (mtls.yStart >= mtls.yEnd) return;
313    }
314
315    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
316    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
317    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
318    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
319
320    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
321
322    Context *mrsc = (Context *)rsc;
323    Script * oldTLS = setTLS(s);
324
325    mtls.rsc = mrsc;
326    mtls.ain = ain;
327    mtls.aout = aout;
328    mtls.script = s;
329    mtls.usr = usr;
330    mtls.usrLen = usrLen;
331    mtls.mSliceSize = 10;
332    mtls.mSliceNum = 0;
333
334    mtls.ptrIn = NULL;
335    mtls.eStrideIn = 0;
336    if (ain) {
337        mtls.ptrIn = (const uint8_t *)ain->getPtr();
338        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
339        mtls.yStrideIn = ain->mHal.drvState.stride;
340    }
341
342    mtls.ptrOut = NULL;
343    mtls.eStrideOut = 0;
344    if (aout) {
345        mtls.ptrOut = (uint8_t *)aout->getPtr();
346        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
347        mtls.yStrideOut = aout->mHal.drvState.stride;
348    }
349
350    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) {
351        dc->mInForEach = true;
352        if (mtls.dimY > 1) {
353            mtls.mSliceSize = mtls.dimY / (dc->mWorkers.mCount * 4);
354            if(mtls.mSliceSize < 1) {
355                mtls.mSliceSize = 1;
356            }
357
358            rsdLaunchThreads(mrsc, wc_xy, &mtls);
359        } else {
360            mtls.mSliceSize = mtls.dimX / (dc->mWorkers.mCount * 4);
361            if(mtls.mSliceSize < 1) {
362                mtls.mSliceSize = 1;
363            }
364
365            rsdLaunchThreads(mrsc, wc_x, &mtls);
366        }
367        dc->mInForEach = false;
368
369        //ALOGE("launch 1");
370    } else {
371        RsForEachStubParamStruct p;
372        memset(&p, 0, sizeof(p));
373        p.usr = mtls.usr;
374        p.usr_len = mtls.usrLen;
375        uint32_t sig = mtls.sig;
376
377        //ALOGE("launch 3");
378        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
379        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
380            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
381                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
382                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
383                                      mtls.dimX * mtls.dimY * p.z +
384                                      mtls.dimX * p.y;
385                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
386                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
387                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
388                       mtls.eStrideOut);
389                }
390            }
391        }
392    }
393
394    setTLS(oldTLS);
395}
396
397
398int rsdScriptInvokeRoot(const Context *dc, Script *script) {
399    DrvScript *drv = (DrvScript *)script->mHal.drv;
400
401    Script * oldTLS = setTLS(script);
402    int ret = drv->mRoot();
403    setTLS(oldTLS);
404
405    return ret;
406}
407
408void rsdScriptInvokeInit(const Context *dc, Script *script) {
409    DrvScript *drv = (DrvScript *)script->mHal.drv;
410
411    if (drv->mInit) {
412        drv->mInit();
413    }
414}
415
416void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
417    DrvScript *drv = (DrvScript *)script->mHal.drv;
418
419    if (drv->mFreeChildren) {
420        drv->mFreeChildren();
421    }
422}
423
424void rsdScriptInvokeFunction(const Context *dc, Script *script,
425                            uint32_t slot,
426                            const void *params,
427                            size_t paramLength) {
428    DrvScript *drv = (DrvScript *)script->mHal.drv;
429    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
430
431    Script * oldTLS = setTLS(script);
432    reinterpret_cast<void (*)(const void *, uint32_t)>(
433        drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
434    setTLS(oldTLS);
435}
436
437void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
438                           uint32_t slot, void *data, size_t dataLength) {
439    DrvScript *drv = (DrvScript *)script->mHal.drv;
440    //rsAssert(!script->mFieldIsObject[slot]);
441    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
442
443    int32_t *destPtr = reinterpret_cast<int32_t *>(
444                          drv->mExecutable->getExportVarAddrs()[slot]);
445    if (!destPtr) {
446        //ALOGV("Calling setVar on slot = %i which is null", slot);
447        return;
448    }
449
450    memcpy(destPtr, data, dataLength);
451}
452
453void rsdScriptSetGlobalVarWithElemDims(
454        const android::renderscript::Context *dc,
455        const android::renderscript::Script *script,
456        uint32_t slot, void *data, size_t dataLength,
457        const android::renderscript::Element *elem,
458        const size_t *dims, size_t dimLength) {
459    DrvScript *drv = (DrvScript *)script->mHal.drv;
460
461    int32_t *destPtr = reinterpret_cast<int32_t *>(
462        drv->mExecutable->getExportVarAddrs()[slot]);
463    if (!destPtr) {
464        //ALOGV("Calling setVar on slot = %i which is null", slot);
465        return;
466    }
467
468    // We want to look at dimension in terms of integer components,
469    // but dimLength is given in terms of bytes.
470    dimLength /= sizeof(int);
471
472    // Only a single dimension is currently supported.
473    rsAssert(dimLength == 1);
474    if (dimLength == 1) {
475        // First do the increment loop.
476        size_t stride = elem->getSizeBytes();
477        char *cVal = reinterpret_cast<char *>(data);
478        for (size_t i = 0; i < dims[0]; i++) {
479            elem->incRefs(cVal);
480            cVal += stride;
481        }
482
483        // Decrement loop comes after (to prevent race conditions).
484        char *oldVal = reinterpret_cast<char *>(destPtr);
485        for (size_t i = 0; i < dims[0]; i++) {
486            elem->decRefs(oldVal);
487            oldVal += stride;
488        }
489    }
490
491    memcpy(destPtr, data, dataLength);
492}
493
494void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
495    DrvScript *drv = (DrvScript *)script->mHal.drv;
496    //rsAssert(!script->mFieldIsObject[slot]);
497    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
498
499    int32_t *destPtr = reinterpret_cast<int32_t *>(
500                          drv->mExecutable->getExportVarAddrs()[slot]);
501    if (!destPtr) {
502        //ALOGV("Calling setVar on slot = %i which is null", slot);
503        return;
504    }
505
506    memcpy(destPtr, &data, sizeof(void *));
507}
508
509void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
510    DrvScript *drv = (DrvScript *)script->mHal.drv;
511    //rsAssert(script->mFieldIsObject[slot]);
512    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
513
514    int32_t *destPtr = reinterpret_cast<int32_t *>(
515                          drv->mExecutable->getExportVarAddrs()[slot]);
516    if (!destPtr) {
517        //ALOGV("Calling setVar on slot = %i which is null", slot);
518        return;
519    }
520
521    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
522}
523
524void rsdScriptDestroy(const Context *dc, Script *script) {
525    DrvScript *drv = (DrvScript *)script->mHal.drv;
526
527    if (drv == NULL) {
528        return;
529    }
530
531    if (drv->mExecutable) {
532        Vector<void *>::const_iterator var_addr_iter =
533            drv->mExecutable->getExportVarAddrs().begin();
534        Vector<void *>::const_iterator var_addr_end =
535            drv->mExecutable->getExportVarAddrs().end();
536
537        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
538            drv->mExecutable->getInfo().getObjectSlots().begin();
539        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
540            drv->mExecutable->getInfo().getObjectSlots().end();
541
542        while ((var_addr_iter != var_addr_end) &&
543               (is_object_iter != is_object_end)) {
544            // The field address can be NULL if the script-side has optimized
545            // the corresponding global variable away.
546            ObjectBase **obj_addr =
547                reinterpret_cast<ObjectBase **>(*var_addr_iter);
548            if (*is_object_iter) {
549                if (*var_addr_iter != NULL) {
550                    rsrClearObject(dc, script, obj_addr);
551                }
552            }
553            var_addr_iter++;
554            is_object_iter++;
555        }
556    }
557
558    delete drv->mCompilerContext;
559    delete drv->mCompilerDriver;
560    delete drv->mExecutable;
561
562    free(drv);
563    script->mHal.drv = NULL;
564}
565