rsdBcc.cpp revision ec3fc1163c01e18b1454057723c451f0d96868ad
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18#include "rsdBcc.h"
19#include "rsdRuntime.h"
20
21#include <bcc/BCCContext.h>
22#include <bcc/RenderScript/RSCompilerDriver.h>
23#include <bcc/RenderScript/RSExecutable.h>
24#include <bcc/RenderScript/RSInfo.h>
25
26#include "rsContext.h"
27#include "rsScriptC.h"
28
29#include "utils/Vector.h"
30#include "utils/Timers.h"
31#include "utils/StopWatch.h"
32
33using namespace android;
34using namespace android::renderscript;
35
36struct DrvScript {
37    int (*mRoot)();
38    int (*mRootExpand)();
39    void (*mInit)();
40    void (*mFreeChildren)();
41
42    bcc::BCCContext *mCompilerContext;
43    bcc::RSCompilerDriver *mCompilerDriver;
44    bcc::RSExecutable *mExecutable;
45};
46
47typedef void (*outer_foreach_t)(
48    const android::renderscript::RsForEachStubParamStruct *,
49    uint32_t x1, uint32_t x2,
50    uint32_t instep, uint32_t outstep);
51
52static Script * setTLS(Script *sc) {
53    ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
54    rsAssert(tls);
55    Script *old = tls->mScript;
56    tls->mScript = sc;
57    return old;
58}
59
60
61bool rsdScriptInit(const Context *rsc,
62                     ScriptC *script,
63                     char const *resName,
64                     char const *cacheDir,
65                     uint8_t const *bitcode,
66                     size_t bitcodeSize,
67                     uint32_t flags) {
68    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
69
70    pthread_mutex_lock(&rsdgInitMutex);
71
72    bcc::RSExecutable *exec;
73    const bcc::RSInfo *info;
74    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
75    if (drv == NULL) {
76        goto error;
77    }
78    script->mHal.drv = drv;
79
80    drv->mCompilerContext = NULL;
81    drv->mCompilerDriver = NULL;
82    drv->mExecutable = NULL;
83
84    drv->mCompilerContext = new bcc::BCCContext();
85    if (drv->mCompilerContext == NULL) {
86        ALOGE("bcc: FAILS to create compiler context (out of memory)");
87        goto error;
88    }
89
90    drv->mCompilerDriver = new bcc::RSCompilerDriver();
91    if (drv->mCompilerDriver == NULL) {
92        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
93        goto error;
94    }
95
96    script->mHal.info.isThreadable = true;
97
98    drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub);
99    drv->mCompilerDriver->setRSRuntimeLookupContext(script);
100
101    exec = drv->mCompilerDriver->build(*drv->mCompilerContext,
102                                       cacheDir, resName,
103                                       (const char *)bitcode, bitcodeSize);
104
105    if (exec == NULL) {
106        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
107        goto error;
108    }
109
110    drv->mExecutable = exec;
111
112    exec->setThreadable(script->mHal.info.isThreadable);
113    if (!exec->syncInfo()) {
114        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
115    }
116
117    drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
118    drv->mRootExpand =
119        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
120    drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
121    drv->mFreeChildren =
122        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
123
124    info = &drv->mExecutable->getInfo();
125    // Copy info over to runtime
126    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
127    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
128    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
129    script->mHal.info.exportedPragmaKeyList =
130        const_cast<const char**>(exec->getPragmaKeys().array());
131    script->mHal.info.exportedPragmaValueList =
132        const_cast<const char**>(exec->getPragmaValues().array());
133
134    if (drv->mRootExpand) {
135        script->mHal.info.root = drv->mRootExpand;
136    } else {
137        script->mHal.info.root = drv->mRoot;
138    }
139
140    pthread_mutex_unlock(&rsdgInitMutex);
141    return true;
142
143error:
144
145    pthread_mutex_unlock(&rsdgInitMutex);
146    if (drv) {
147        delete drv->mCompilerContext;
148        delete drv->mCompilerDriver;
149        delete drv->mExecutable;
150        free(drv);
151    }
152    script->mHal.drv = NULL;
153    return false;
154
155}
156
157typedef struct {
158    Context *rsc;
159    Script *script;
160    ForEachFunc_t kernel;
161    uint32_t sig;
162    const Allocation * ain;
163    Allocation * aout;
164    const void * usr;
165    size_t usrLen;
166
167    uint32_t mSliceSize;
168    volatile int mSliceNum;
169
170    const uint8_t *ptrIn;
171    uint32_t eStrideIn;
172    uint8_t *ptrOut;
173    uint32_t eStrideOut;
174
175    uint32_t yStrideIn;
176    uint32_t yStrideOut;
177
178    uint32_t xStart;
179    uint32_t xEnd;
180    uint32_t yStart;
181    uint32_t yEnd;
182    uint32_t zStart;
183    uint32_t zEnd;
184    uint32_t arrayStart;
185    uint32_t arrayEnd;
186
187    uint32_t dimX;
188    uint32_t dimY;
189    uint32_t dimZ;
190    uint32_t dimArray;
191} MTLaunchStruct;
192typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
193
194static void wc_xy(void *usr, uint32_t idx) {
195    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
196    RsForEachStubParamStruct p;
197    memset(&p, 0, sizeof(p));
198    p.usr = mtls->usr;
199    p.usr_len = mtls->usrLen;
200    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
201    uint32_t sig = mtls->sig;
202
203    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
204    while (1) {
205        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
206        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
207        uint32_t yEnd = yStart + mtls->mSliceSize;
208        yEnd = rsMin(yEnd, mtls->yEnd);
209        if (yEnd <= yStart) {
210            return;
211        }
212
213        //ALOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
214        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
215        for (p.y = yStart; p.y < yEnd; p.y++) {
216            p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
217            p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
218            fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
219        }
220    }
221}
222
223static void wc_x(void *usr, uint32_t idx) {
224    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
225    RsForEachStubParamStruct p;
226    memset(&p, 0, sizeof(p));
227    p.usr = mtls->usr;
228    p.usr_len = mtls->usrLen;
229    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
230    uint32_t sig = mtls->sig;
231
232    outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
233    while (1) {
234        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
235        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
236        uint32_t xEnd = xStart + mtls->mSliceSize;
237        xEnd = rsMin(xEnd, mtls->xEnd);
238        if (xEnd <= xStart) {
239            return;
240        }
241
242        //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
243        //ALOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
244
245        p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
246        p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
247        fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
248    }
249}
250
251void rsdScriptInvokeForEach(const Context *rsc,
252                            Script *s,
253                            uint32_t slot,
254                            const Allocation * ain,
255                            Allocation * aout,
256                            const void * usr,
257                            uint32_t usrLen,
258                            const RsScriptCall *sc) {
259
260    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
261
262    MTLaunchStruct mtls;
263    memset(&mtls, 0, sizeof(mtls));
264
265    DrvScript *drv = (DrvScript *)s->mHal.drv;
266    rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
267    mtls.kernel = reinterpret_cast<ForEachFunc_t>(
268                      drv->mExecutable->getExportForeachFuncAddrs()[slot]);
269    rsAssert(mtls.kernel != NULL);
270    mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
271
272    if (ain) {
273        mtls.dimX = ain->getType()->getDimX();
274        mtls.dimY = ain->getType()->getDimY();
275        mtls.dimZ = ain->getType()->getDimZ();
276        //mtls.dimArray = ain->getType()->getDimArray();
277    } else if (aout) {
278        mtls.dimX = aout->getType()->getDimX();
279        mtls.dimY = aout->getType()->getDimY();
280        mtls.dimZ = aout->getType()->getDimZ();
281        //mtls.dimArray = aout->getType()->getDimArray();
282    } else {
283        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
284        return;
285    }
286
287    if (!sc || (sc->xEnd == 0)) {
288        mtls.xEnd = mtls.dimX;
289    } else {
290        rsAssert(sc->xStart < mtls.dimX);
291        rsAssert(sc->xEnd <= mtls.dimX);
292        rsAssert(sc->xStart < sc->xEnd);
293        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
294        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
295        if (mtls.xStart >= mtls.xEnd) return;
296    }
297
298    if (!sc || (sc->yEnd == 0)) {
299        mtls.yEnd = mtls.dimY;
300    } else {
301        rsAssert(sc->yStart < mtls.dimY);
302        rsAssert(sc->yEnd <= mtls.dimY);
303        rsAssert(sc->yStart < sc->yEnd);
304        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
305        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
306        if (mtls.yStart >= mtls.yEnd) return;
307    }
308
309    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
310    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
311    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
312    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
313
314    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
315
316    Context *mrsc = (Context *)rsc;
317    Script * oldTLS = setTLS(s);
318
319    mtls.rsc = mrsc;
320    mtls.ain = ain;
321    mtls.aout = aout;
322    mtls.script = s;
323    mtls.usr = usr;
324    mtls.usrLen = usrLen;
325    mtls.mSliceSize = 10;
326    mtls.mSliceNum = 0;
327
328    mtls.ptrIn = NULL;
329    mtls.eStrideIn = 0;
330    if (ain) {
331        mtls.ptrIn = (const uint8_t *)ain->getPtr();
332        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
333        mtls.yStrideIn = ain->mHal.drvState.stride;
334    }
335
336    mtls.ptrOut = NULL;
337    mtls.eStrideOut = 0;
338    if (aout) {
339        mtls.ptrOut = (uint8_t *)aout->getPtr();
340        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
341        mtls.yStrideOut = aout->mHal.drvState.stride;
342    }
343
344    if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
345        if (mtls.dimY > 1) {
346            rsdLaunchThreads(mrsc, wc_xy, &mtls);
347        } else {
348            rsdLaunchThreads(mrsc, wc_x, &mtls);
349        }
350
351        //ALOGE("launch 1");
352    } else {
353        RsForEachStubParamStruct p;
354        memset(&p, 0, sizeof(p));
355        p.usr = mtls.usr;
356        p.usr_len = mtls.usrLen;
357        uint32_t sig = mtls.sig;
358
359        //ALOGE("launch 3");
360        outer_foreach_t fn = (outer_foreach_t) mtls.kernel;
361        for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
362            for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
363                for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
364                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
365                                      mtls.dimX * mtls.dimY * p.z +
366                                      mtls.dimX * p.y;
367                    p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
368                    p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
369                    fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
370                       mtls.eStrideOut);
371                }
372            }
373        }
374    }
375
376    setTLS(oldTLS);
377}
378
379
380int rsdScriptInvokeRoot(const Context *dc, Script *script) {
381    DrvScript *drv = (DrvScript *)script->mHal.drv;
382
383    Script * oldTLS = setTLS(script);
384    int ret = drv->mRoot();
385    setTLS(oldTLS);
386
387    return ret;
388}
389
390void rsdScriptInvokeInit(const Context *dc, Script *script) {
391    DrvScript *drv = (DrvScript *)script->mHal.drv;
392
393    if (drv->mInit) {
394        drv->mInit();
395    }
396}
397
398void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
399    DrvScript *drv = (DrvScript *)script->mHal.drv;
400
401    if (drv->mFreeChildren) {
402        drv->mFreeChildren();
403    }
404}
405
406void rsdScriptInvokeFunction(const Context *dc, Script *script,
407                            uint32_t slot,
408                            const void *params,
409                            size_t paramLength) {
410    DrvScript *drv = (DrvScript *)script->mHal.drv;
411    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
412
413    Script * oldTLS = setTLS(script);
414    reinterpret_cast<void (*)(const void *, uint32_t)>(
415        drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
416    setTLS(oldTLS);
417}
418
419void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
420                           uint32_t slot, void *data, size_t dataLength) {
421    DrvScript *drv = (DrvScript *)script->mHal.drv;
422    //rsAssert(!script->mFieldIsObject[slot]);
423    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
424
425    int32_t *destPtr = reinterpret_cast<int32_t *>(
426                          drv->mExecutable->getExportVarAddrs()[slot]);
427    if (!destPtr) {
428        //ALOGV("Calling setVar on slot = %i which is null", slot);
429        return;
430    }
431
432    memcpy(destPtr, data, dataLength);
433}
434
435void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
436    DrvScript *drv = (DrvScript *)script->mHal.drv;
437    //rsAssert(!script->mFieldIsObject[slot]);
438    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
439
440    int32_t *destPtr = reinterpret_cast<int32_t *>(
441                          drv->mExecutable->getExportVarAddrs()[slot]);
442    if (!destPtr) {
443        //ALOGV("Calling setVar on slot = %i which is null", slot);
444        return;
445    }
446
447    memcpy(destPtr, &data, sizeof(void *));
448}
449
450void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
451    DrvScript *drv = (DrvScript *)script->mHal.drv;
452    //rsAssert(script->mFieldIsObject[slot]);
453    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
454
455    int32_t *destPtr = reinterpret_cast<int32_t *>(
456                          drv->mExecutable->getExportVarAddrs()[slot]);
457    if (!destPtr) {
458        //ALOGV("Calling setVar on slot = %i which is null", slot);
459        return;
460    }
461
462    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
463}
464
465void rsdScriptDestroy(const Context *dc, Script *script) {
466    DrvScript *drv = (DrvScript *)script->mHal.drv;
467
468    if (drv == NULL) {
469        return;
470    }
471
472    if (drv->mExecutable) {
473        Vector<void *>::const_iterator var_addr_iter =
474            drv->mExecutable->getExportVarAddrs().begin();
475        Vector<void *>::const_iterator var_addr_end =
476            drv->mExecutable->getExportVarAddrs().end();
477
478        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
479            drv->mExecutable->getInfo().getObjectSlots().begin();
480        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
481            drv->mExecutable->getInfo().getObjectSlots().end();
482
483        while ((var_addr_iter != var_addr_end) &&
484               (is_object_iter != is_object_end)) {
485            // The field address can be NULL if the script-side has optimized
486            // the corresponding global variable away.
487            ObjectBase **obj_addr =
488                reinterpret_cast<ObjectBase **>(*var_addr_iter);
489            if (*is_object_iter) {
490                if (*var_addr_iter != NULL) {
491                    rsrClearObject(dc, script, obj_addr);
492                }
493            }
494            var_addr_iter++;
495            is_object_iter++;
496        }
497    }
498
499    delete drv->mCompilerContext;
500    delete drv->mCompilerDriver;
501    delete drv->mExecutable;
502
503    free(drv);
504    script->mHal.drv = NULL;
505}
506
507
508