rsdBcc.cpp revision cca7ee69f5ae9aa5832082b81eb4fa0db84c5fa2
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20
21#include <bcc/BCCContext.h>
22#include <bcc/RenderScript/RSCompilerDriver.h>
23#include <bcc/RenderScript/RSExecutable.h>
24#include <bcc/RenderScript/RSInfo.h>
25
26#include "rsContext.h"
27#include "rsElement.h"
28#include "rsScriptC.h"
29
30#include "utils/Vector.h"
31#include "utils/Timers.h"
32#include "utils/StopWatch.h"
33
34using namespace android;
35using namespace android::renderscript;
36
37struct DrvScript {
38    int (*mRoot)();
39    int (*mRootExpand)();
40    void (*mInit)();
41    void (*mFreeChildren)();
42
43    bcc::BCCContext *mCompilerContext;
44    bcc::RSCompilerDriver *mCompilerDriver;
45    bcc::RSExecutable *mExecutable;
46};
47
48typedef void (*outer_foreach_t)(
49    const android::renderscript::RsForEachStubParamStruct *,
50    uint32_t x1, uint32_t x2,
51    uint32_t instep, uint32_t outstep);
52
53static Script * setTLS(Script *sc) {
54    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
55    rsAssert(tls);
56    Script *old = tls->mScript;
57    tls->mScript = sc;
58    return old;
59}
60
61
62bool rsdScriptInit(const Context *rsc,
63                     ScriptC *script,
64                     char const *resName,
65                     char const *cacheDir,
66                     uint8_t const *bitcode,
67                     size_t bitcodeSize,
68                     uint32_t flags) {
69    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
70    //ALOGE("rsdScriptInit %p %p", rsc, script);
71
72    pthread_mutex_lock(&rsdgInitMutex);
73
74    const char* coreLib = "/system/lib/libclcore.bc";
75    bcc::RSInfo::FloatPrecision prec;
76    bcc::RSExecutable *exec;
77    const bcc::RSInfo *info;
78    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
79    if (drv == NULL) {
80        goto error;
81    }
82    script->mHal.drv = drv;
83
84    drv->mCompilerContext = NULL;
85    drv->mCompilerDriver = NULL;
86    drv->mExecutable = NULL;
87
88    drv->mCompilerContext = new bcc::BCCContext();
89    if (drv->mCompilerContext == NULL) {
90        ALOGE("bcc: FAILS to create compiler context (out of memory)");
91        goto error;
92    }
93
94    drv->mCompilerDriver = new bcc::RSCompilerDriver();
95    if (drv->mCompilerDriver == NULL) {
96        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
97        goto error;
98    }
99
100    // NEON-capable devices can use an accelerated math library for all
101    // reduced precision scripts.
102#if defined(ARCH_ARM_HAVE_NEON)
103    prec = drv->mExecutable->getInfo().getFloatPrecisionRequirement();
104    if (prec != bcc::RSInfo::FP_Full) {
105        coreLib = "/system/lib/libclcore_neon.bc";
106    }
107#endif
108
109    script->mHal.info.isThreadable = true;
110
111    drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub);
112    drv->mCompilerDriver->setRSRuntimeLookupContext(script);
113
114    exec = drv->mCompilerDriver->build(*drv->mCompilerContext,
115                                       cacheDir, resName,
116                                       (const char *)bitcode, bitcodeSize);
117
118    if (exec == NULL) {
119        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
120        goto error;
121    }
122
123    drv->mExecutable = exec;
124
125    exec->setThreadable(script->mHal.info.isThreadable);
126    if (!exec->syncInfo()) {
127        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
128    }
129
130    drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
131    drv->mRootExpand =
132        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
133    drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
134    drv->mFreeChildren =
135        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
136
137    info = &drv->mExecutable->getInfo();
138    // Copy info over to runtime
139    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
140    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
141    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
142    script->mHal.info.exportedPragmaKeyList =
143        const_cast<const char**>(exec->getPragmaKeys().array());
144    script->mHal.info.exportedPragmaValueList =
145        const_cast<const char**>(exec->getPragmaValues().array());
146
147    if (drv->mRootExpand) {
148        script->mHal.info.root = drv->mRootExpand;
149    } else {
150        script->mHal.info.root = drv->mRoot;
151    }
152
153    pthread_mutex_unlock(&rsdgInitMutex);
154    return true;
155
156error:
157
158    pthread_mutex_unlock(&rsdgInitMutex);
159    if (drv) {
160        delete drv->mCompilerContext;
161        delete drv->mCompilerDriver;
162        delete drv->mExecutable;
163        free(drv);
164    }
165    script->mHal.drv = NULL;
166    return false;
167
168}
169
170typedef struct {
171    Context *rsc;
172    Script *script;
173    ForEachFunc_t kernel;
174    uint32_t sig;
175    const Allocation * ain;
176    Allocation * aout;
177    const void * usr;
178    size_t usrLen;
179
180    uint32_t mSliceSize;
181    volatile int mSliceNum;
182
183    const uint8_t *ptrIn;
184    uint32_t eStrideIn;
185    uint8_t *ptrOut;
186    uint32_t eStrideOut;
187
188    uint32_t yStrideIn;
189    uint32_t yStrideOut;
190
191    uint32_t xStart;
192    uint32_t xEnd;
193    uint32_t yStart;
194    uint32_t yEnd;
195    uint32_t zStart;
196    uint32_t zEnd;
197    uint32_t arrayStart;
198    uint32_t arrayEnd;
199
200    uint32_t dimX;
201    uint32_t dimY;
202    uint32_t dimZ;
203    uint32_t dimArray;
204} MTLaunchStruct;
205typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
206
207static void wc_xy(void *usr, uint32_t idx) {
208    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
209    RsForEachStubParamStruct p;
210    memset(&p, 0, sizeof(p));
211    p.usr = mtls->usr;
212    p.usr_len = mtls->usrLen;
213    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
214    uint32_t sig = mtls->sig;
215
216    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
217    while (1) {
218        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
219        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
220        uint32_t yEnd = yStart + mtls->mSliceSize;
221        yEnd = rsMin(yEnd, mtls->yEnd);
222        if (yEnd <= yStart) {
223            return;
224        }
225
226        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
227        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
228        for (p.y = yStart; p.y < yEnd; p.y++) {
229            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
230            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
231            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
232        }
233    }
234}
235
236static void wc_x(void *usr, uint32_t idx) {
237    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
238    RsForEachStubParamStruct p;
239    memset(&p, 0, sizeof(p));
240    p.usr = mtls->usr;
241    p.usr_len = mtls->usrLen;
242    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
243    uint32_t sig = mtls->sig;
244
245    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
246    while (1) {
247        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
248        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
249        uint32_t xEnd = xStart + mtls->mSliceSize;
250        xEnd = rsMin(xEnd, mtls->xEnd);
251        if (xEnd <= xStart) {
252            return;
253        }
254
255        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
256        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
257
258        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
259        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
260        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
261    }
262}
263
264void rsdScriptInvokeForEach(const Context *rsc,
265                            Script *s,
266                            uint32_t slot,
267                            const Allocation * ain,
268                            Allocation * aout,
269                            const void * usr,
270                            uint32_t usrLen,
271                            const RsScriptCall *sc) {
272
273    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
274
275    MTLaunchStruct mtls;
276    memset(&mtls, 0, sizeof(mtls));
277
278    //ALOGE("for each script %p  in %p   out %p", s, ain, aout);
279
280    DrvScript *drv = (DrvScript *)s->mHal.drv;
281    rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
282    mtls.kernel = reinterpret_cast<ForEachFunc_t>(
283                      drv->mExecutable->getExportForeachFuncAddrs()[slot]);
284    rsAssert(mtls.kernel != NULL);
285    mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
286
287    if (ain) {
288        mtls.dimX = ain->getType()->getDimX();
289        mtls.dimY = ain->getType()->getDimY();
290        mtls.dimZ = ain->getType()->getDimZ();
291        //mtls.dimArray = ain->getType()->getDimArray();
292    } else if (aout) {
293        mtls.dimX = aout->getType()->getDimX();
294        mtls.dimY = aout->getType()->getDimY();
295        mtls.dimZ = aout->getType()->getDimZ();
296        //mtls.dimArray = aout->getType()->getDimArray();
297    } else {
298        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
299        return;
300    }
301
302    if (!sc || (sc->xEnd == 0)) {
303        mtls.xEnd = mtls.dimX;
304    } else {
305        rsAssert(sc->xStart < mtls.dimX);
306        rsAssert(sc->xEnd <= mtls.dimX);
307        rsAssert(sc->xStart < sc->xEnd);
308        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
309        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
310        if (mtls.xStart >= mtls.xEnd) return;
311    }
312
313    if (!sc || (sc->yEnd == 0)) {
314        mtls.yEnd = mtls.dimY;
315    } else {
316        rsAssert(sc->yStart < mtls.dimY);
317        rsAssert(sc->yEnd <= mtls.dimY);
318        rsAssert(sc->yStart < sc->yEnd);
319        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
320        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
321        if (mtls.yStart >= mtls.yEnd) return;
322    }
323
324    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
325    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
326    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
327    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
328
329    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
330
331    Context *mrsc = (Context *)rsc;
332    Script * oldTLS = setTLS(s);
333
334    mtls.rsc = mrsc;
335    mtls.ain = ain;
336    mtls.aout = aout;
337    mtls.script = s;
338    mtls.usr = usr;
339    mtls.usrLen = usrLen;
340    mtls.mSliceSize = 10;
341    mtls.mSliceNum = 0;
342
343    mtls.ptrIn = NULL;
344    mtls.eStrideIn = 0;
345    if (ain) {
346        mtls.ptrIn = (const uint8_t *)ain->getPtr();
347        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
348        mtls.yStrideIn = ain->mHal.drvState.stride;
349    }
350
351    mtls.ptrOut = NULL;
352    mtls.eStrideOut = 0;
353    if (aout) {
354        mtls.ptrOut = (uint8_t *)aout->getPtr();
355        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
356        mtls.yStrideOut = aout->mHal.drvState.stride;
357    }
358
359    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) {
360        dc->mInForEach = true;
361        if (mtls.dimY > 1) {
362            mtls.mSliceSize = mtls.dimY / (dc->mWorkers.mCount * 4);
363            if(mtls.mSliceSize < 1) {
364                mtls.mSliceSize = 1;
365            }
366
367            rsdLaunchThreads(mrsc, wc_xy, &mtls);
368        } else {
369            mtls.mSliceSize = mtls.dimX / (dc->mWorkers.mCount * 4);
370            if(mtls.mSliceSize < 1) {
371                mtls.mSliceSize = 1;
372            }
373
374            rsdLaunchThreads(mrsc, wc_x, &mtls);
375        }
376        dc->mInForEach = false;
377
378        //ALOGE("launch 1");
379    } else {
380        RsForEachStubParamStruct p;
381        memset(&p, 0, sizeof(p));
382        p.usr = mtls.usr;
383        p.usr_len = mtls.usrLen;
384        uint32_t sig = mtls.sig;
385
386        //ALOGE("launch 3");
387        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
388        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
389            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
390                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
391                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
392                                      mtls.dimX * mtls.dimY * p.z +
393                                      mtls.dimX * p.y;
394                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
395                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
396                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
397                       mtls.eStrideOut);
398                }
399            }
400        }
401    }
402
403    setTLS(oldTLS);
404}
405
406
407int rsdScriptInvokeRoot(const Context *dc, Script *script) {
408    DrvScript *drv = (DrvScript *)script->mHal.drv;
409
410    Script * oldTLS = setTLS(script);
411    int ret = drv->mRoot();
412    setTLS(oldTLS);
413
414    return ret;
415}
416
417void rsdScriptInvokeInit(const Context *dc, Script *script) {
418    DrvScript *drv = (DrvScript *)script->mHal.drv;
419
420    if (drv->mInit) {
421        drv->mInit();
422    }
423}
424
425void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
426    DrvScript *drv = (DrvScript *)script->mHal.drv;
427
428    if (drv->mFreeChildren) {
429        drv->mFreeChildren();
430    }
431}
432
433void rsdScriptInvokeFunction(const Context *dc, Script *script,
434                            uint32_t slot,
435                            const void *params,
436                            size_t paramLength) {
437    DrvScript *drv = (DrvScript *)script->mHal.drv;
438    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
439
440    Script * oldTLS = setTLS(script);
441    reinterpret_cast<void (*)(const void *, uint32_t)>(
442        drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
443    setTLS(oldTLS);
444}
445
446void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
447                           uint32_t slot, void *data, size_t dataLength) {
448    DrvScript *drv = (DrvScript *)script->mHal.drv;
449    //rsAssert(!script->mFieldIsObject[slot]);
450    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
451
452    int32_t *destPtr = reinterpret_cast<int32_t *>(
453                          drv->mExecutable->getExportVarAddrs()[slot]);
454    if (!destPtr) {
455        //ALOGV("Calling setVar on slot = %i which is null", slot);
456        return;
457    }
458
459    memcpy(destPtr, data, dataLength);
460}
461
462void rsdScriptSetGlobalVarWithElemDims(
463        const android::renderscript::Context *dc,
464        const android::renderscript::Script *script,
465        uint32_t slot, void *data, size_t dataLength,
466        const android::renderscript::Element *elem,
467        const size_t *dims, size_t dimLength) {
468    DrvScript *drv = (DrvScript *)script->mHal.drv;
469
470    int32_t *destPtr = reinterpret_cast<int32_t *>(
471        drv->mExecutable->getExportVarAddrs()[slot]);
472    if (!destPtr) {
473        //ALOGV("Calling setVar on slot = %i which is null", slot);
474        return;
475    }
476
477    // We want to look at dimension in terms of integer components,
478    // but dimLength is given in terms of bytes.
479    dimLength /= sizeof(int);
480
481    // Only a single dimension is currently supported.
482    rsAssert(dimLength == 1);
483    if (dimLength == 1) {
484        // First do the increment loop.
485        size_t stride = elem->getSizeBytes();
486        char *cVal = reinterpret_cast<char *>(data);
487        for (size_t i = 0; i < dims[0]; i++) {
488            elem->incRefs(cVal);
489            cVal += stride;
490        }
491
492        // Decrement loop comes after (to prevent race conditions).
493        char *oldVal = reinterpret_cast<char *>(destPtr);
494        for (size_t i = 0; i < dims[0]; i++) {
495            elem->decRefs(oldVal);
496            oldVal += stride;
497        }
498    }
499
500    memcpy(destPtr, data, dataLength);
501}
502
503void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
504    DrvScript *drv = (DrvScript *)script->mHal.drv;
505    //rsAssert(!script->mFieldIsObject[slot]);
506    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
507
508    int32_t *destPtr = reinterpret_cast<int32_t *>(
509                          drv->mExecutable->getExportVarAddrs()[slot]);
510    if (!destPtr) {
511        //ALOGV("Calling setVar on slot = %i which is null", slot);
512        return;
513    }
514
515    memcpy(destPtr, &data, sizeof(void *));
516}
517
518void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
519    DrvScript *drv = (DrvScript *)script->mHal.drv;
520    //rsAssert(script->mFieldIsObject[slot]);
521    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
522
523    int32_t *destPtr = reinterpret_cast<int32_t *>(
524                          drv->mExecutable->getExportVarAddrs()[slot]);
525    if (!destPtr) {
526        //ALOGV("Calling setVar on slot = %i which is null", slot);
527        return;
528    }
529
530    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
531}
532
533void rsdScriptDestroy(const Context *dc, Script *script) {
534    DrvScript *drv = (DrvScript *)script->mHal.drv;
535
536    if (drv == NULL) {
537        return;
538    }
539
540    if (drv->mExecutable) {
541        Vector<void *>::const_iterator var_addr_iter =
542            drv->mExecutable->getExportVarAddrs().begin();
543        Vector<void *>::const_iterator var_addr_end =
544            drv->mExecutable->getExportVarAddrs().end();
545
546        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
547            drv->mExecutable->getInfo().getObjectSlots().begin();
548        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
549            drv->mExecutable->getInfo().getObjectSlots().end();
550
551        while ((var_addr_iter != var_addr_end) &&
552               (is_object_iter != is_object_end)) {
553            // The field address can be NULL if the script-side has optimized
554            // the corresponding global variable away.
555            ObjectBase **obj_addr =
556                reinterpret_cast<ObjectBase **>(*var_addr_iter);
557            if (*is_object_iter) {
558                if (*var_addr_iter != NULL) {
559                    rsrClearObject(dc, script, obj_addr);
560                }
561            }
562            var_addr_iter++;
563            is_object_iter++;
564        }
565    }
566
567    delete drv->mCompilerContext;
568    delete drv->mCompilerDriver;
569    delete drv->mExecutable;
570
571    free(drv);
572    script->mHal.drv = NULL;
573}
574