rsdBcc.cpp revision 807fdc4b6f3fb893015ee136565d6151bb2332d3
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20#include "rsdAllocation.h"
21
22#include <bcc/BCCContext.h>
23#include <bcc/RenderScript/RSCompilerDriver.h>
24#include <bcc/RenderScript/RSExecutable.h>
25#include <bcc/RenderScript/RSInfo.h>
26
27#include "rsContext.h"
28#include "rsElement.h"
29#include "rsScriptC.h"
30
31#include "utils/Vector.h"
32#include "utils/Timers.h"
33#include "utils/StopWatch.h"
34
35using namespace android;
36using namespace android::renderscript;
37
38struct DrvScript {
39    int (*mRoot)();
40    int (*mRootExpand)();
41    void (*mInit)();
42    void (*mFreeChildren)();
43
44    bcc::BCCContext *mCompilerContext;
45    bcc::RSCompilerDriver *mCompilerDriver;
46    bcc::RSExecutable *mExecutable;
47
48    Allocation **mBoundAllocs;
49};
50
51typedef void (*outer_foreach_t)(
52    const android::renderscript::RsForEachStubParamStruct *,
53    uint32_t x1, uint32_t x2,
54    uint32_t instep, uint32_t outstep);
55
56static Script * setTLS(Script *sc) {
57    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
58    rsAssert(tls);
59    Script *old = tls->mScript;
60    tls->mScript = sc;
61    return old;
62}
63
64
65bool rsdScriptInit(const Context *rsc,
66                     ScriptC *script,
67                     char const *resName,
68                     char const *cacheDir,
69                     uint8_t const *bitcode,
70                     size_t bitcodeSize,
71                     uint32_t flags) {
72    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
73    //ALOGE("rsdScriptInit %p %p", rsc, script);
74
75    pthread_mutex_lock(&rsdgInitMutex);
76
77    const char* coreLib = "/system/lib/libclcore.bc";
78    bcc::RSInfo::FloatPrecision prec;
79    bcc::RSExecutable *exec;
80    const bcc::RSInfo *info;
81    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
82    if (drv == NULL) {
83        goto error;
84    }
85    script->mHal.drv = drv;
86
87    drv->mCompilerContext = NULL;
88    drv->mCompilerDriver = NULL;
89    drv->mExecutable = NULL;
90
91    drv->mCompilerContext = new bcc::BCCContext();
92    if (drv->mCompilerContext == NULL) {
93        ALOGE("bcc: FAILS to create compiler context (out of memory)");
94        goto error;
95    }
96
97    drv->mCompilerDriver = new bcc::RSCompilerDriver();
98    if (drv->mCompilerDriver == NULL) {
99        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
100        goto error;
101    }
102
103    script->mHal.info.isThreadable = true;
104
105    drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub);
106    drv->mCompilerDriver->setRSRuntimeLookupContext(script);
107
108    exec = drv->mCompilerDriver->build(*drv->mCompilerContext,
109                                       cacheDir, resName,
110                                       (const char *)bitcode, bitcodeSize);
111
112    if (exec == NULL) {
113        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
114        goto error;
115    }
116
117    drv->mExecutable = exec;
118
119    exec->setThreadable(script->mHal.info.isThreadable);
120    if (!exec->syncInfo()) {
121        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
122    }
123
124    drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
125    drv->mRootExpand =
126        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
127    drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
128    drv->mFreeChildren =
129        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
130
131    info = &drv->mExecutable->getInfo();
132    // Copy info over to runtime
133    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
134    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
135    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
136    script->mHal.info.exportedPragmaKeyList =
137        const_cast<const char**>(exec->getPragmaKeys().array());
138    script->mHal.info.exportedPragmaValueList =
139        const_cast<const char**>(exec->getPragmaValues().array());
140
141    if (drv->mRootExpand) {
142        script->mHal.info.root = drv->mRootExpand;
143    } else {
144        script->mHal.info.root = drv->mRoot;
145    }
146
147    if (script->mHal.info.exportedVariableCount) {
148        drv->mBoundAllocs = new Allocation *[script->mHal.info.exportedVariableCount];
149        memset(drv->mBoundAllocs, 0, sizeof(void *) * script->mHal.info.exportedVariableCount);
150    }
151
152    pthread_mutex_unlock(&rsdgInitMutex);
153    return true;
154
155error:
156
157    pthread_mutex_unlock(&rsdgInitMutex);
158    if (drv) {
159        delete drv->mCompilerContext;
160        delete drv->mCompilerDriver;
161        delete drv->mExecutable;
162        delete[] drv->mBoundAllocs;
163        free(drv);
164    }
165    script->mHal.drv = NULL;
166    return false;
167
168}
169
170typedef struct {
171    Context *rsc;
172    Script *script;
173    ForEachFunc_t kernel;
174    uint32_t sig;
175    const Allocation * ain;
176    Allocation * aout;
177    const void * usr;
178    size_t usrLen;
179
180    uint32_t mSliceSize;
181    volatile int mSliceNum;
182
183    const uint8_t *ptrIn;
184    uint32_t eStrideIn;
185    uint8_t *ptrOut;
186    uint32_t eStrideOut;
187
188    uint32_t yStrideIn;
189    uint32_t yStrideOut;
190
191    uint32_t xStart;
192    uint32_t xEnd;
193    uint32_t yStart;
194    uint32_t yEnd;
195    uint32_t zStart;
196    uint32_t zEnd;
197    uint32_t arrayStart;
198    uint32_t arrayEnd;
199
200    uint32_t dimX;
201    uint32_t dimY;
202    uint32_t dimZ;
203    uint32_t dimArray;
204} MTLaunchStruct;
205typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
206
207static void wc_xy(void *usr, uint32_t idx) {
208    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
209    RsForEachStubParamStruct p;
210    memset(&p, 0, sizeof(p));
211    p.usr = mtls->usr;
212    p.usr_len = mtls->usrLen;
213    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
214    uint32_t sig = mtls->sig;
215
216    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
217    while (1) {
218        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
219        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
220        uint32_t yEnd = yStart + mtls->mSliceSize;
221        yEnd = rsMin(yEnd, mtls->yEnd);
222        if (yEnd <= yStart) {
223            return;
224        }
225
226        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
227        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
228        for (p.y = yStart; p.y < yEnd; p.y++) {
229            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
230            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
231            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
232        }
233    }
234}
235
236static void wc_x(void *usr, uint32_t idx) {
237    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
238    RsForEachStubParamStruct p;
239    memset(&p, 0, sizeof(p));
240    p.usr = mtls->usr;
241    p.usr_len = mtls->usrLen;
242    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
243    uint32_t sig = mtls->sig;
244
245    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
246    while (1) {
247        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
248        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
249        uint32_t xEnd = xStart + mtls->mSliceSize;
250        xEnd = rsMin(xEnd, mtls->xEnd);
251        if (xEnd <= xStart) {
252            return;
253        }
254
255        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
256        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
257
258        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
259        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
260        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
261    }
262}
263
264void rsdScriptInvokeForEach(const Context *rsc,
265                            Script *s,
266                            uint32_t slot,
267                            const Allocation * ain,
268                            Allocation * aout,
269                            const void * usr,
270                            uint32_t usrLen,
271                            const RsScriptCall *sc) {
272
273    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
274
275    MTLaunchStruct mtls;
276    memset(&mtls, 0, sizeof(mtls));
277
278    //ALOGE("for each script %p  in %p   out %p", s, ain, aout);
279
280    DrvScript *drv = (DrvScript *)s->mHal.drv;
281    rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
282    mtls.kernel = reinterpret_cast<ForEachFunc_t>(
283                      drv->mExecutable->getExportForeachFuncAddrs()[slot]);
284    rsAssert(mtls.kernel != NULL);
285    mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
286
287    if (ain) {
288        mtls.dimX = ain->getType()->getDimX();
289        mtls.dimY = ain->getType()->getDimY();
290        mtls.dimZ = ain->getType()->getDimZ();
291        //mtls.dimArray = ain->getType()->getDimArray();
292    } else if (aout) {
293        mtls.dimX = aout->getType()->getDimX();
294        mtls.dimY = aout->getType()->getDimY();
295        mtls.dimZ = aout->getType()->getDimZ();
296        //mtls.dimArray = aout->getType()->getDimArray();
297    } else {
298        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
299        return;
300    }
301
302    if (!sc || (sc->xEnd == 0)) {
303        mtls.xEnd = mtls.dimX;
304    } else {
305        rsAssert(sc->xStart < mtls.dimX);
306        rsAssert(sc->xEnd <= mtls.dimX);
307        rsAssert(sc->xStart < sc->xEnd);
308        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
309        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
310        if (mtls.xStart >= mtls.xEnd) return;
311    }
312
313    if (!sc || (sc->yEnd == 0)) {
314        mtls.yEnd = mtls.dimY;
315    } else {
316        rsAssert(sc->yStart < mtls.dimY);
317        rsAssert(sc->yEnd <= mtls.dimY);
318        rsAssert(sc->yStart < sc->yEnd);
319        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
320        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
321        if (mtls.yStart >= mtls.yEnd) return;
322    }
323
324    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
325    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
326    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
327    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
328
329    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
330
331    Context *mrsc = (Context *)rsc;
332    Script * oldTLS = setTLS(s);
333
334    mtls.rsc = mrsc;
335    mtls.ain = ain;
336    mtls.aout = aout;
337    mtls.script = s;
338    mtls.usr = usr;
339    mtls.usrLen = usrLen;
340    mtls.mSliceSize = 10;
341    mtls.mSliceNum = 0;
342
343    mtls.ptrIn = NULL;
344    mtls.eStrideIn = 0;
345    if (ain) {
346        DrvAllocation *aindrv = (DrvAllocation *)ain->mHal.drv;
347        mtls.ptrIn = (const uint8_t *)ain->getPtr();
348        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
349        mtls.yStrideIn = aindrv->lod[0].stride;
350    }
351
352    mtls.ptrOut = NULL;
353    mtls.eStrideOut = 0;
354    if (aout) {
355        DrvAllocation *aoutdrv = (DrvAllocation *)aout->mHal.drv;
356        mtls.ptrOut = (uint8_t *)aout->getPtr();
357        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
358        mtls.yStrideOut = aoutdrv->lod[0].stride;
359    }
360
361    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) {
362        dc->mInForEach = true;
363        if (mtls.dimY > 1) {
364            mtls.mSliceSize = mtls.dimY / (dc->mWorkers.mCount * 4);
365            if(mtls.mSliceSize < 1) {
366                mtls.mSliceSize = 1;
367            }
368
369            rsdLaunchThreads(mrsc, wc_xy, &mtls);
370        } else {
371            mtls.mSliceSize = mtls.dimX / (dc->mWorkers.mCount * 4);
372            if(mtls.mSliceSize < 1) {
373                mtls.mSliceSize = 1;
374            }
375
376            rsdLaunchThreads(mrsc, wc_x, &mtls);
377        }
378        dc->mInForEach = false;
379
380        //ALOGE("launch 1");
381    } else {
382        RsForEachStubParamStruct p;
383        memset(&p, 0, sizeof(p));
384        p.usr = mtls.usr;
385        p.usr_len = mtls.usrLen;
386        uint32_t sig = mtls.sig;
387
388        //ALOGE("launch 3");
389        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
390        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
391            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
392                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
393                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
394                                      mtls.dimX * mtls.dimY * p.z +
395                                      mtls.dimX * p.y;
396                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
397                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
398                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
399                       mtls.eStrideOut);
400                }
401            }
402        }
403    }
404
405    setTLS(oldTLS);
406}
407
408
409int rsdScriptInvokeRoot(const Context *dc, Script *script) {
410    DrvScript *drv = (DrvScript *)script->mHal.drv;
411
412    Script * oldTLS = setTLS(script);
413    int ret = drv->mRoot();
414    setTLS(oldTLS);
415
416    return ret;
417}
418
419void rsdScriptInvokeInit(const Context *dc, Script *script) {
420    DrvScript *drv = (DrvScript *)script->mHal.drv;
421
422    if (drv->mInit) {
423        drv->mInit();
424    }
425}
426
427void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
428    DrvScript *drv = (DrvScript *)script->mHal.drv;
429
430    if (drv->mFreeChildren) {
431        drv->mFreeChildren();
432    }
433}
434
435void rsdScriptInvokeFunction(const Context *dc, Script *script,
436                            uint32_t slot,
437                            const void *params,
438                            size_t paramLength) {
439    DrvScript *drv = (DrvScript *)script->mHal.drv;
440    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
441
442    Script * oldTLS = setTLS(script);
443    reinterpret_cast<void (*)(const void *, uint32_t)>(
444        drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
445    setTLS(oldTLS);
446}
447
448void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
449                           uint32_t slot, void *data, size_t dataLength) {
450    DrvScript *drv = (DrvScript *)script->mHal.drv;
451    //rsAssert(!script->mFieldIsObject[slot]);
452    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
453
454    int32_t *destPtr = reinterpret_cast<int32_t *>(
455                          drv->mExecutable->getExportVarAddrs()[slot]);
456    if (!destPtr) {
457        //ALOGV("Calling setVar on slot = %i which is null", slot);
458        return;
459    }
460
461    memcpy(destPtr, data, dataLength);
462}
463
464void rsdScriptSetGlobalVarWithElemDims(
465        const android::renderscript::Context *dc,
466        const android::renderscript::Script *script,
467        uint32_t slot, void *data, size_t dataLength,
468        const android::renderscript::Element *elem,
469        const size_t *dims, size_t dimLength) {
470    DrvScript *drv = (DrvScript *)script->mHal.drv;
471
472    int32_t *destPtr = reinterpret_cast<int32_t *>(
473        drv->mExecutable->getExportVarAddrs()[slot]);
474    if (!destPtr) {
475        //ALOGV("Calling setVar on slot = %i which is null", slot);
476        return;
477    }
478
479    // We want to look at dimension in terms of integer components,
480    // but dimLength is given in terms of bytes.
481    dimLength /= sizeof(int);
482
483    // Only a single dimension is currently supported.
484    rsAssert(dimLength == 1);
485    if (dimLength == 1) {
486        // First do the increment loop.
487        size_t stride = elem->getSizeBytes();
488        char *cVal = reinterpret_cast<char *>(data);
489        for (size_t i = 0; i < dims[0]; i++) {
490            elem->incRefs(cVal);
491            cVal += stride;
492        }
493
494        // Decrement loop comes after (to prevent race conditions).
495        char *oldVal = reinterpret_cast<char *>(destPtr);
496        for (size_t i = 0; i < dims[0]; i++) {
497            elem->decRefs(oldVal);
498            oldVal += stride;
499        }
500    }
501
502    memcpy(destPtr, data, dataLength);
503}
504
505void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, Allocation *data) {
506    DrvScript *drv = (DrvScript *)script->mHal.drv;
507
508    //rsAssert(!script->mFieldIsObject[slot]);
509    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
510
511    int32_t *destPtr = reinterpret_cast<int32_t *>(
512                          drv->mExecutable->getExportVarAddrs()[slot]);
513    if (!destPtr) {
514        //ALOGV("Calling setVar on slot = %i which is null", slot);
515        return;
516    }
517
518    void *ptr = NULL;
519    drv->mBoundAllocs[slot] = data;
520    if(data) {
521        DrvAllocation *allocDrv = (DrvAllocation *)data->mHal.drv;
522        ptr = allocDrv->lod[0].mallocPtr;
523    }
524    memcpy(destPtr, &ptr, sizeof(void *));
525}
526
527void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
528    DrvScript *drv = (DrvScript *)script->mHal.drv;
529    //rsAssert(script->mFieldIsObject[slot]);
530    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
531
532    int32_t *destPtr = reinterpret_cast<int32_t *>(
533                          drv->mExecutable->getExportVarAddrs()[slot]);
534    if (!destPtr) {
535        //ALOGV("Calling setVar on slot = %i which is null", slot);
536        return;
537    }
538
539    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
540}
541
542void rsdScriptDestroy(const Context *dc, Script *script) {
543    DrvScript *drv = (DrvScript *)script->mHal.drv;
544
545    if (drv == NULL) {
546        return;
547    }
548
549    if (drv->mExecutable) {
550        Vector<void *>::const_iterator var_addr_iter =
551            drv->mExecutable->getExportVarAddrs().begin();
552        Vector<void *>::const_iterator var_addr_end =
553            drv->mExecutable->getExportVarAddrs().end();
554
555        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
556            drv->mExecutable->getInfo().getObjectSlots().begin();
557        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
558            drv->mExecutable->getInfo().getObjectSlots().end();
559
560        while ((var_addr_iter != var_addr_end) &&
561               (is_object_iter != is_object_end)) {
562            // The field address can be NULL if the script-side has optimized
563            // the corresponding global variable away.
564            ObjectBase **obj_addr =
565                reinterpret_cast<ObjectBase **>(*var_addr_iter);
566            if (*is_object_iter) {
567                if (*var_addr_iter != NULL) {
568                    rsrClearObject(dc, script, obj_addr);
569                }
570            }
571            var_addr_iter++;
572            is_object_iter++;
573        }
574    }
575
576    delete drv->mCompilerContext;
577    delete drv->mCompilerDriver;
578    delete drv->mExecutable;
579    delete[] drv->mBoundAllocs;
580    free(drv);
581    script->mHal.drv = NULL;
582}
583
584Allocation * rsdScriptGetAllocationForPointer(const android::renderscript::Context *dc,
585                                              const android::renderscript::Script *sc,
586                                              const void *ptr) {
587    DrvScript *drv = (DrvScript *)sc->mHal.drv;
588    if (!ptr) {
589        return NULL;
590    }
591
592    for (uint32_t ct=0; ct < sc->mHal.info.exportedVariableCount; ct++) {
593        Allocation *a = drv->mBoundAllocs[ct];
594        if (!a) continue;
595        DrvAllocation *adrv = (DrvAllocation *)a->mHal.drv;
596        if (adrv->lod[0].mallocPtr == ptr) {
597            return a;
598        }
599    }
600    ALOGE("rsGetAllocation, failed to find %p", ptr);
601    return NULL;
602}
603
604