rsdBcc.cpp revision 2980f07d3dbbca727e8efe24ace7e7928a935648
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20
21#include <bcc/BCCContext.h>
22#include <bcc/RenderScript/RSCompilerDriver.h>
23#include <bcc/RenderScript/RSExecutable.h>
24#include <bcc/RenderScript/RSInfo.h>
25
26#include "rsContext.h"
27#include "rsElement.h"
28#include "rsScriptC.h"
29
30#include "utils/Vector.h"
31#include "utils/Timers.h"
32#include "utils/StopWatch.h"
33
34using namespace android;
35using namespace android::renderscript;
36
37struct DrvScript {
38    int (*mRoot)();
39    int (*mRootExpand)();
40    void (*mInit)();
41    void (*mFreeChildren)();
42
43    bcc::BCCContext *mCompilerContext;
44    bcc::RSCompilerDriver *mCompilerDriver;
45    bcc::RSExecutable *mExecutable;
46};
47
48typedef void (*outer_foreach_t)(
49    const android::renderscript::RsForEachStubParamStruct *,
50    uint32_t x1, uint32_t x2,
51    uint32_t instep, uint32_t outstep);
52
53static Script * setTLS(Script *sc) {
54    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
55    rsAssert(tls);
56    Script *old = tls->mScript;
57    tls->mScript = sc;
58    return old;
59}
60
61
62bool rsdScriptInit(const Context *rsc,
63                     ScriptC *script,
64                     char const *resName,
65                     char const *cacheDir,
66                     uint8_t const *bitcode,
67                     size_t bitcodeSize,
68                     uint32_t flags) {
69    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
70
71    pthread_mutex_lock(&rsdgInitMutex);
72
73    bcc::RSExecutable *exec;
74    const bcc::RSInfo *info;
75    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
76    if (drv == NULL) {
77        goto error;
78    }
79    script->mHal.drv = drv;
80
81    drv->mCompilerContext = NULL;
82    drv->mCompilerDriver = NULL;
83    drv->mExecutable = NULL;
84
85    drv->mCompilerContext = new bcc::BCCContext();
86    if (drv->mCompilerContext == NULL) {
87        ALOGE("bcc: FAILS to create compiler context (out of memory)");
88        goto error;
89    }
90
91    drv->mCompilerDriver = new bcc::RSCompilerDriver();
92    if (drv->mCompilerDriver == NULL) {
93        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
94        goto error;
95    }
96
97    script->mHal.info.isThreadable = true;
98
99    drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub);
100    drv->mCompilerDriver->setRSRuntimeLookupContext(script);
101
102    exec = drv->mCompilerDriver->build(*drv->mCompilerContext,
103                                       cacheDir, resName,
104                                       (const char *)bitcode, bitcodeSize);
105
106    if (exec == NULL) {
107        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
108        goto error;
109    }
110
111    drv->mExecutable = exec;
112
113    exec->setThreadable(script->mHal.info.isThreadable);
114    if (!exec->syncInfo()) {
115        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
116    }
117
118    drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
119    drv->mRootExpand =
120        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
121    drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
122    drv->mFreeChildren =
123        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
124
125    info = &drv->mExecutable->getInfo();
126    // Copy info over to runtime
127    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
128    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
129    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
130    script->mHal.info.exportedPragmaKeyList =
131        const_cast<const char**>(exec->getPragmaKeys().array());
132    script->mHal.info.exportedPragmaValueList =
133        const_cast<const char**>(exec->getPragmaValues().array());
134
135    if (drv->mRootExpand) {
136        script->mHal.info.root = drv->mRootExpand;
137    } else {
138        script->mHal.info.root = drv->mRoot;
139    }
140
141    pthread_mutex_unlock(&rsdgInitMutex);
142    return true;
143
144error:
145
146    pthread_mutex_unlock(&rsdgInitMutex);
147    if (drv) {
148        delete drv->mCompilerContext;
149        delete drv->mCompilerDriver;
150        delete drv->mExecutable;
151        free(drv);
152    }
153    script->mHal.drv = NULL;
154    return false;
155
156}
157
158typedef struct {
159    Context *rsc;
160    Script *script;
161    ForEachFunc_t kernel;
162    uint32_t sig;
163    const Allocation * ain;
164    Allocation * aout;
165    const void * usr;
166    size_t usrLen;
167
168    uint32_t mSliceSize;
169    volatile int mSliceNum;
170
171    const uint8_t *ptrIn;
172    uint32_t eStrideIn;
173    uint8_t *ptrOut;
174    uint32_t eStrideOut;
175
176    uint32_t yStrideIn;
177    uint32_t yStrideOut;
178
179    uint32_t xStart;
180    uint32_t xEnd;
181    uint32_t yStart;
182    uint32_t yEnd;
183    uint32_t zStart;
184    uint32_t zEnd;
185    uint32_t arrayStart;
186    uint32_t arrayEnd;
187
188    uint32_t dimX;
189    uint32_t dimY;
190    uint32_t dimZ;
191    uint32_t dimArray;
192} MTLaunchStruct;
193typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
194
195static void wc_xy(void *usr, uint32_t idx) {
196    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
197    RsForEachStubParamStruct p;
198    memset(&p, 0, sizeof(p));
199    p.usr = mtls->usr;
200    p.usr_len = mtls->usrLen;
201    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
202    uint32_t sig = mtls->sig;
203
204    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
205    while (1) {
206        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
207        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
208        uint32_t yEnd = yStart + mtls->mSliceSize;
209        yEnd = rsMin(yEnd, mtls->yEnd);
210        if (yEnd <= yStart) {
211            return;
212        }
213
214        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
215        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
216        for (p.y = yStart; p.y < yEnd; p.y++) {
217            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
218            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
219            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
220        }
221    }
222}
223
224static void wc_x(void *usr, uint32_t idx) {
225    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
226    RsForEachStubParamStruct p;
227    memset(&p, 0, sizeof(p));
228    p.usr = mtls->usr;
229    p.usr_len = mtls->usrLen;
230    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
231    uint32_t sig = mtls->sig;
232
233    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
234    while (1) {
235        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
236        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
237        uint32_t xEnd = xStart + mtls->mSliceSize;
238        xEnd = rsMin(xEnd, mtls->xEnd);
239        if (xEnd <= xStart) {
240            return;
241        }
242
243        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
244        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
245
246        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
247        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
248        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
249    }
250}
251
252void rsdScriptInvokeForEach(const Context *rsc,
253                            Script *s,
254                            uint32_t slot,
255                            const Allocation * ain,
256                            Allocation * aout,
257                            const void * usr,
258                            uint32_t usrLen,
259                            const RsScriptCall *sc) {
260
261    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
262
263    MTLaunchStruct mtls;
264    memset(&mtls, 0, sizeof(mtls));
265
266    DrvScript *drv = (DrvScript *)s->mHal.drv;
267    rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
268    mtls.kernel = reinterpret_cast<ForEachFunc_t>(
269                      drv->mExecutable->getExportForeachFuncAddrs()[slot]);
270    rsAssert(mtls.kernel != NULL);
271    mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
272
273    if (ain) {
274        mtls.dimX = ain->getType()->getDimX();
275        mtls.dimY = ain->getType()->getDimY();
276        mtls.dimZ = ain->getType()->getDimZ();
277        //mtls.dimArray = ain->getType()->getDimArray();
278    } else if (aout) {
279        mtls.dimX = aout->getType()->getDimX();
280        mtls.dimY = aout->getType()->getDimY();
281        mtls.dimZ = aout->getType()->getDimZ();
282        //mtls.dimArray = aout->getType()->getDimArray();
283    } else {
284        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
285        return;
286    }
287
288    if (!sc || (sc->xEnd == 0)) {
289        mtls.xEnd = mtls.dimX;
290    } else {
291        rsAssert(sc->xStart < mtls.dimX);
292        rsAssert(sc->xEnd <= mtls.dimX);
293        rsAssert(sc->xStart < sc->xEnd);
294        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
295        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
296        if (mtls.xStart >= mtls.xEnd) return;
297    }
298
299    if (!sc || (sc->yEnd == 0)) {
300        mtls.yEnd = mtls.dimY;
301    } else {
302        rsAssert(sc->yStart < mtls.dimY);
303        rsAssert(sc->yEnd <= mtls.dimY);
304        rsAssert(sc->yStart < sc->yEnd);
305        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
306        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
307        if (mtls.yStart >= mtls.yEnd) return;
308    }
309
310    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
311    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
312    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
313    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
314
315    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
316
317    Context *mrsc = (Context *)rsc;
318    Script * oldTLS = setTLS(s);
319
320    mtls.rsc = mrsc;
321    mtls.ain = ain;
322    mtls.aout = aout;
323    mtls.script = s;
324    mtls.usr = usr;
325    mtls.usrLen = usrLen;
326    mtls.mSliceSize = 10;
327    mtls.mSliceNum = 0;
328
329    mtls.ptrIn = NULL;
330    mtls.eStrideIn = 0;
331    if (ain) {
332        mtls.ptrIn = (const uint8_t *)ain->getPtr();
333        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
334        mtls.yStrideIn = ain->mHal.drvState.stride;
335    }
336
337    mtls.ptrOut = NULL;
338    mtls.eStrideOut = 0;
339    if (aout) {
340        mtls.ptrOut = (uint8_t *)aout->getPtr();
341        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
342        mtls.yStrideOut = aout->mHal.drvState.stride;
343    }
344
345    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
346        if (mtls.dimY > 1) {
347            rsdLaunchThreads(mrsc, wc_xy, &mtls);
348        } else {
349            rsdLaunchThreads(mrsc, wc_x, &mtls);
350        }
351
352        //ALOGE("launch 1");
353    } else {
354        RsForEachStubParamStruct p;
355        memset(&p, 0, sizeof(p));
356        p.usr = mtls.usr;
357        p.usr_len = mtls.usrLen;
358        uint32_t sig = mtls.sig;
359
360        //ALOGE("launch 3");
361        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
362        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
363            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
364                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
365                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
366                                      mtls.dimX * mtls.dimY * p.z +
367                                      mtls.dimX * p.y;
368                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
369                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
370                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
371                       mtls.eStrideOut);
372                }
373            }
374        }
375    }
376
377    setTLS(oldTLS);
378}
379
380
381int rsdScriptInvokeRoot(const Context *dc, Script *script) {
382    DrvScript *drv = (DrvScript *)script->mHal.drv;
383
384    Script * oldTLS = setTLS(script);
385    int ret = drv->mRoot();
386    setTLS(oldTLS);
387
388    return ret;
389}
390
391void rsdScriptInvokeInit(const Context *dc, Script *script) {
392    DrvScript *drv = (DrvScript *)script->mHal.drv;
393
394    if (drv->mInit) {
395        drv->mInit();
396    }
397}
398
399void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
400    DrvScript *drv = (DrvScript *)script->mHal.drv;
401
402    if (drv->mFreeChildren) {
403        drv->mFreeChildren();
404    }
405}
406
407void rsdScriptInvokeFunction(const Context *dc, Script *script,
408                            uint32_t slot,
409                            const void *params,
410                            size_t paramLength) {
411    DrvScript *drv = (DrvScript *)script->mHal.drv;
412    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
413
414    Script * oldTLS = setTLS(script);
415    reinterpret_cast<void (*)(const void *, uint32_t)>(
416        drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
417    setTLS(oldTLS);
418}
419
420void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
421                           uint32_t slot, void *data, size_t dataLength) {
422    DrvScript *drv = (DrvScript *)script->mHal.drv;
423    //rsAssert(!script->mFieldIsObject[slot]);
424    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
425
426    int32_t *destPtr = reinterpret_cast<int32_t *>(
427                          drv->mExecutable->getExportVarAddrs()[slot]);
428    if (!destPtr) {
429        //ALOGV("Calling setVar on slot = %i which is null", slot);
430        return;
431    }
432
433    memcpy(destPtr, data, dataLength);
434}
435
436void rsdScriptSetGlobalVarWithElemDims(
437        const android::renderscript::Context *dc,
438        const android::renderscript::Script *script,
439        uint32_t slot, void *data, size_t dataLength,
440        const android::renderscript::Element *elem,
441        const size_t *dims, size_t dimLength) {
442    DrvScript *drv = (DrvScript *)script->mHal.drv;
443
444    int32_t *destPtr = reinterpret_cast<int32_t *>(
445                          drv->mExecutable->getExportVarAddrs()[slot]);
446    if (!destPtr) {
447        //ALOGV("Calling setVar on slot = %i which is null", slot);
448        return;
449    }
450
451    // We want to look at dimension in terms of integer components,
452    // but dimLength is given in terms of bytes.
453    dimLength /= sizeof(int);
454
455    // Only a single dimension is currently supported.
456    rsAssert(dimLength == 1);
457    if (dimLength == 1) {
458        // First do the increment loop.
459        size_t stride = elem->getSizeBytes();
460        char *cVal = reinterpret_cast<char *>(data);
461        for (size_t i = 0; i < dims[0]; i++) {
462            elem->incRefs(cVal);
463            cVal += stride;
464        }
465
466        // Decrement loop comes after (to prevent race conditions).
467        char *oldVal = reinterpret_cast<char *>(destPtr);
468        for (size_t i = 0; i < dims[0]; i++) {
469            elem->decRefs(oldVal);
470            oldVal += stride;
471        }
472    }
473
474    memcpy(destPtr, data, dataLength);
475}
476
477void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
478    DrvScript *drv = (DrvScript *)script->mHal.drv;
479    //rsAssert(!script->mFieldIsObject[slot]);
480    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
481
482    int32_t *destPtr = reinterpret_cast<int32_t *>(
483                          drv->mExecutable->getExportVarAddrs()[slot]);
484    if (!destPtr) {
485        //ALOGV("Calling setVar on slot = %i which is null", slot);
486        return;
487    }
488
489    memcpy(destPtr, &data, sizeof(void *));
490}
491
492void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
493    DrvScript *drv = (DrvScript *)script->mHal.drv;
494    //rsAssert(script->mFieldIsObject[slot]);
495    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
496
497    int32_t *destPtr = reinterpret_cast<int32_t *>(
498                          drv->mExecutable->getExportVarAddrs()[slot]);
499    if (!destPtr) {
500        //ALOGV("Calling setVar on slot = %i which is null", slot);
501        return;
502    }
503
504    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
505}
506
507void rsdScriptDestroy(const Context *dc, Script *script) {
508    DrvScript *drv = (DrvScript *)script->mHal.drv;
509
510    if (drv == NULL) {
511        return;
512    }
513
514    if (drv->mExecutable) {
515        Vector<void *>::const_iterator var_addr_iter =
516            drv->mExecutable->getExportVarAddrs().begin();
517        Vector<void *>::const_iterator var_addr_end =
518            drv->mExecutable->getExportVarAddrs().end();
519
520        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
521            drv->mExecutable->getInfo().getObjectSlots().begin();
522        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
523            drv->mExecutable->getInfo().getObjectSlots().end();
524
525        while ((var_addr_iter != var_addr_end) &&
526               (is_object_iter != is_object_end)) {
527            // The field address can be NULL if the script-side has optimized
528            // the corresponding global variable away.
529            ObjectBase **obj_addr =
530                reinterpret_cast<ObjectBase **>(*var_addr_iter);
531            if (*is_object_iter) {
532                if (*var_addr_iter != NULL) {
533                    rsrClearObject(dc, script, obj_addr);
534                }
535            }
536            var_addr_iter++;
537            is_object_iter++;
538        }
539    }
540
541    delete drv->mCompilerContext;
542    delete drv->mCompilerDriver;
543    delete drv->mExecutable;
544
545    free(drv);
546    script->mHal.drv = NULL;
547}
548
549
550