rsCpuScript.cpp revision 709a0978ae141198018ca9769f8d96292a8928e6
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18
19#include "rsCpuCore.h"
20
21#include "rsCpuScript.h"
22//#include "rsdRuntime.h"
23//#include "rsdAllocation.h"
24//#include "rsCpuIntrinsics.h"
25
26
27#include "utils/Vector.h"
28#include "utils/Timers.h"
29#include "utils/StopWatch.h"
30
31
32#include <bcc/BCCContext.h>
33#include <bcc/Renderscript/RSCompilerDriver.h>
34#include <bcc/Renderscript/RSExecutable.h>
35#include <bcc/Renderscript/RSInfo.h>
36
37namespace android {
38namespace renderscript {
39
40
41
42RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
43    mCtx = ctx;
44    mScript = s;
45
46    mRoot = NULL;
47    mRootExpand = NULL;
48    mInit = NULL;
49    mFreeChildren = NULL;
50
51    mCompilerContext = NULL;
52    mCompilerDriver = NULL;
53    mExecutable = NULL;
54
55    mBoundAllocs = NULL;
56    mIntrinsicData = NULL;
57    mIsThreadable = true;
58}
59
60
61bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
62                            uint8_t const *bitcode, size_t bitcodeSize,
63                            uint32_t flags) {
64    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
65    //ALOGE("rsdScriptInit %p %p", rsc, script);
66
67    mCtx->lockMutex();
68
69    bcc::RSExecutable *exec;
70    const bcc::RSInfo *info;
71
72    mCompilerContext = NULL;
73    mCompilerDriver = NULL;
74    mExecutable = NULL;
75
76    mCompilerContext = new bcc::BCCContext();
77    if (mCompilerContext == NULL) {
78        ALOGE("bcc: FAILS to create compiler context (out of memory)");
79        mCtx->unlockMutex();
80        return false;
81    }
82
83    mCompilerDriver = new bcc::RSCompilerDriver();
84    if (mCompilerDriver == NULL) {
85        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
86        mCtx->unlockMutex();
87        return false;
88    }
89
90    mCompilerDriver->setRSRuntimeLookupFunction(lookupRuntimeStub);
91    mCompilerDriver->setRSRuntimeLookupContext(this);
92
93    exec = mCompilerDriver->build(*mCompilerContext, cacheDir, resName,
94                                  (const char *)bitcode, bitcodeSize, NULL);
95
96    if (exec == NULL) {
97        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
98        mCtx->unlockMutex();
99        return false;
100    }
101
102    mExecutable = exec;
103
104    exec->setThreadable(mIsThreadable);
105    if (!exec->syncInfo()) {
106        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
107    }
108
109    mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
110    mRootExpand =
111        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
112    mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
113    mFreeChildren =
114        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
115
116
117    info = &mExecutable->getInfo();
118    if (info->getExportVarNames().size()) {
119        mBoundAllocs = new Allocation *[info->getExportVarNames().size()];
120        memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size());
121    }
122
123    mCtx->unlockMutex();
124    return true;
125}
126
127void RsdCpuScriptImpl::populateScript(Script *script) {
128    const bcc::RSInfo *info = &mExecutable->getInfo();
129
130    // Copy info over to runtime
131    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
132    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
133    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
134    script->mHal.info.exportedPragmaKeyList =
135        const_cast<const char**>(mExecutable->getPragmaKeys().array());
136    script->mHal.info.exportedPragmaValueList =
137        const_cast<const char**>(mExecutable->getPragmaValues().array());
138
139    if (mRootExpand) {
140        script->mHal.info.root = mRootExpand;
141    } else {
142        script->mHal.info.root = mRoot;
143    }
144}
145
146/*
147bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) {
148    pthread_mutex_lock(&rsdgInitMutex);
149
150    DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
151    if (drv == NULL) {
152        goto error;
153    }
154    s->mHal.drv = drv;
155    drv->mIntrinsicID = iid;
156    drv->mIntrinsicData = rsdIntrinsic_Init(rsc, s, iid, &drv->mIntrinsicFuncs);
157    s->mHal.info.isThreadable = true;
158
159    pthread_mutex_unlock(&rsdgInitMutex);
160    return true;
161
162error:
163    pthread_mutex_unlock(&rsdgInitMutex);
164    return false;
165}
166*/
167
168typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
169
170void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
171                                        const void * usr, uint32_t usrLen,
172                                        const RsScriptCall *sc,
173                                        MTLaunchStruct *mtls) {
174
175    memset(mtls, 0, sizeof(MTLaunchStruct));
176
177    if (ain) {
178        mtls->fep.dimX = ain->getType()->getDimX();
179        mtls->fep.dimY = ain->getType()->getDimY();
180        mtls->fep.dimZ = ain->getType()->getDimZ();
181        //mtls->dimArray = ain->getType()->getDimArray();
182    } else if (aout) {
183        mtls->fep.dimX = aout->getType()->getDimX();
184        mtls->fep.dimY = aout->getType()->getDimY();
185        mtls->fep.dimZ = aout->getType()->getDimZ();
186        //mtls->dimArray = aout->getType()->getDimArray();
187    } else {
188        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
189        return;
190    }
191
192    if (!sc || (sc->xEnd == 0)) {
193        mtls->xEnd = mtls->fep.dimX;
194    } else {
195        rsAssert(sc->xStart < mtls->fep.dimX);
196        rsAssert(sc->xEnd <= mtls->fep.dimX);
197        rsAssert(sc->xStart < sc->xEnd);
198        mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
199        mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
200        if (mtls->xStart >= mtls->xEnd) return;
201    }
202
203    if (!sc || (sc->yEnd == 0)) {
204        mtls->yEnd = mtls->fep.dimY;
205    } else {
206        rsAssert(sc->yStart < mtls->fep.dimY);
207        rsAssert(sc->yEnd <= mtls->fep.dimY);
208        rsAssert(sc->yStart < sc->yEnd);
209        mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
210        mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
211        if (mtls->yStart >= mtls->yEnd) return;
212    }
213
214    mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
215    mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
216    mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
217    mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
218
219    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
220
221    mtls->rsc = mCtx;
222    mtls->ain = ain;
223    mtls->aout = aout;
224    mtls->fep.usr = usr;
225    mtls->fep.usrLen = usrLen;
226    mtls->mSliceSize = 1;
227    mtls->mSliceNum = 0;
228
229    mtls->fep.ptrIn = NULL;
230    mtls->fep.eStrideIn = 0;
231    mtls->isThreadable = mIsThreadable;
232
233    if (ain) {
234        mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
235        mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
236        mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
237    }
238
239    mtls->fep.ptrOut = NULL;
240    mtls->fep.eStrideOut = 0;
241    if (aout) {
242        mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
243        mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
244        mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
245    }
246}
247
248
249void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
250                                     const Allocation * ain,
251                                     Allocation * aout,
252                                     const void * usr,
253                                     uint32_t usrLen,
254                                     const RsScriptCall *sc) {
255
256    MTLaunchStruct mtls;
257    forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
258    forEachKernelSetup(slot, &mtls);
259
260    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
261    mCtx->launchThreads(ain, aout, sc, &mtls);
262    mCtx->setTLS(oldTLS);
263}
264
265void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
266
267    mtls->script = this;
268    mtls->fep.slot = slot;
269
270    rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
271    mtls->kernel = reinterpret_cast<ForEachFunc_t>(
272                      mExecutable->getExportForeachFuncAddrs()[slot]);
273    rsAssert(mtls->kernel != NULL);
274    mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
275}
276
277int RsdCpuScriptImpl::invokeRoot() {
278    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
279    int ret = mRoot();
280    mCtx->setTLS(oldTLS);
281    return ret;
282}
283
284void RsdCpuScriptImpl::invokeInit() {
285    if (mInit) {
286        mInit();
287    }
288}
289
290void RsdCpuScriptImpl::invokeFreeChildren() {
291    if (mFreeChildren) {
292        mFreeChildren();
293    }
294}
295
296void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
297                                      size_t paramLength) {
298    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
299
300    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
301    reinterpret_cast<void (*)(const void *, uint32_t)>(
302        mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
303    mCtx->setTLS(oldTLS);
304}
305
306void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
307    //rsAssert(!script->mFieldIsObject[slot]);
308    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
309
310    //if (mIntrinsicID) {
311        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
312        //return;
313    //}
314
315    int32_t *destPtr = reinterpret_cast<int32_t *>(
316                          mExecutable->getExportVarAddrs()[slot]);
317    if (!destPtr) {
318        //ALOGV("Calling setVar on slot = %i which is null", slot);
319        return;
320    }
321
322    memcpy(destPtr, data, dataLength);
323}
324
325void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
326                                                const Element *elem,
327                                                const size_t *dims, size_t dimLength) {
328
329    int32_t *destPtr = reinterpret_cast<int32_t *>(
330        mExecutable->getExportVarAddrs()[slot]);
331    if (!destPtr) {
332        //ALOGV("Calling setVar on slot = %i which is null", slot);
333        return;
334    }
335
336    // We want to look at dimension in terms of integer components,
337    // but dimLength is given in terms of bytes.
338    dimLength /= sizeof(int);
339
340    // Only a single dimension is currently supported.
341    rsAssert(dimLength == 1);
342    if (dimLength == 1) {
343        // First do the increment loop.
344        size_t stride = elem->getSizeBytes();
345        const char *cVal = reinterpret_cast<const char *>(data);
346        for (size_t i = 0; i < dims[0]; i++) {
347            elem->incRefs(cVal);
348            cVal += stride;
349        }
350
351        // Decrement loop comes after (to prevent race conditions).
352        char *oldVal = reinterpret_cast<char *>(destPtr);
353        for (size_t i = 0; i < dims[0]; i++) {
354            elem->decRefs(oldVal);
355            oldVal += stride;
356        }
357    }
358
359    memcpy(destPtr, data, dataLength);
360}
361
362void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
363
364    //rsAssert(!script->mFieldIsObject[slot]);
365    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
366
367    int32_t *destPtr = reinterpret_cast<int32_t *>(
368                          mExecutable->getExportVarAddrs()[slot]);
369    if (!destPtr) {
370        //ALOGV("Calling setVar on slot = %i which is null", slot);
371        return;
372    }
373
374    void *ptr = NULL;
375    mBoundAllocs[slot] = data;
376    if(data) {
377        ptr = data->mHal.drvState.lod[0].mallocPtr;
378    }
379    memcpy(destPtr, &ptr, sizeof(void *));
380}
381
382void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
383
384    //rsAssert(script->mFieldIsObject[slot]);
385    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
386
387    //if (mIntrinsicID) {
388        //mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, alloc);
389        //return;
390    //}
391
392    int32_t *destPtr = reinterpret_cast<int32_t *>(
393                          mExecutable->getExportVarAddrs()[slot]);
394    if (!destPtr) {
395        //ALOGV("Calling setVar on slot = %i which is null", slot);
396        return;
397    }
398
399    rsrSetObject(mCtx->getContext(), (ObjectBase **)destPtr, data);
400}
401
402RsdCpuScriptImpl::~RsdCpuScriptImpl() {
403
404    if (mExecutable) {
405        Vector<void *>::const_iterator var_addr_iter =
406            mExecutable->getExportVarAddrs().begin();
407        Vector<void *>::const_iterator var_addr_end =
408            mExecutable->getExportVarAddrs().end();
409
410        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
411            mExecutable->getInfo().getObjectSlots().begin();
412        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
413            mExecutable->getInfo().getObjectSlots().end();
414
415        while ((var_addr_iter != var_addr_end) &&
416               (is_object_iter != is_object_end)) {
417            // The field address can be NULL if the script-side has optimized
418            // the corresponding global variable away.
419            ObjectBase **obj_addr =
420                reinterpret_cast<ObjectBase **>(*var_addr_iter);
421            if (*is_object_iter) {
422                if (*var_addr_iter != NULL) {
423                    rsrClearObject(mCtx->getContext(), obj_addr);
424                }
425            }
426            var_addr_iter++;
427            is_object_iter++;
428        }
429    }
430
431    if (mCompilerContext) {
432        delete mCompilerContext;
433    }
434    if (mCompilerDriver) {
435        delete mCompilerDriver;
436    }
437    if (mExecutable) {
438        delete mExecutable;
439    }
440    if (mBoundAllocs) {
441        delete[] mBoundAllocs;
442    }
443}
444
445Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
446    if (!ptr) {
447        return NULL;
448    }
449
450    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
451        Allocation *a = mBoundAllocs[ct];
452        if (!a) continue;
453        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
454            return a;
455        }
456    }
457    ALOGE("rsGetAllocation, failed to find %p", ptr);
458    return NULL;
459}
460
461
462}
463}
464