rsCpuScript.cpp revision cadfac411e6690e39de36c4f9e94deb9b7d2d08e
1/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18
19#include "rsCpuCore.h"
20
21#include "rsCpuScript.h"
22//#include "rsdRuntime.h"
23//#include "rsdAllocation.h"
24//#include "rsCpuIntrinsics.h"
25
26
27#include "utils/Vector.h"
28#include "utils/Timers.h"
29#include "utils/StopWatch.h"
30
31
32#include <bcc/BCCContext.h>
33#include <bcc/Renderscript/RSCompilerDriver.h>
34#include <bcc/Renderscript/RSExecutable.h>
35#include <bcc/Renderscript/RSInfo.h>
36
37namespace android {
38namespace renderscript {
39
40
41
42RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
43    mCtx = ctx;
44    mScript = s;
45
46    mRoot = NULL;
47    mRootExpand = NULL;
48    mInit = NULL;
49    mFreeChildren = NULL;
50
51    mCompilerContext = NULL;
52    mCompilerDriver = NULL;
53    mExecutable = NULL;
54
55    mBoundAllocs = NULL;
56    mIntrinsicData = NULL;
57    mIsThreadable = true;
58}
59
60
61bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
62                            uint8_t const *bitcode, size_t bitcodeSize,
63                            uint32_t flags) {
64    //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
65    //ALOGE("rsdScriptInit %p %p", rsc, script);
66
67    mCtx->lockMutex();
68
69    bcc::RSExecutable *exec;
70    const bcc::RSInfo *info;
71
72    mCompilerContext = NULL;
73    mCompilerDriver = NULL;
74    mExecutable = NULL;
75
76    mCompilerContext = new bcc::BCCContext();
77    if (mCompilerContext == NULL) {
78        ALOGE("bcc: FAILS to create compiler context (out of memory)");
79        mCtx->unlockMutex();
80        return false;
81    }
82
83    mCompilerDriver = new bcc::RSCompilerDriver();
84    if (mCompilerDriver == NULL) {
85        ALOGE("bcc: FAILS to create compiler driver (out of memory)");
86        mCtx->unlockMutex();
87        return false;
88    }
89
90    mCompilerDriver->setRSRuntimeLookupFunction(lookupRuntimeStub);
91    mCompilerDriver->setRSRuntimeLookupContext(this);
92
93    exec = mCompilerDriver->build(*mCompilerContext, cacheDir, resName,
94                                  (const char *)bitcode, bitcodeSize, NULL,
95                                  mCtx->getLinkRuntimeCallback());
96
97    if (exec == NULL) {
98        ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
99        mCtx->unlockMutex();
100        return false;
101    }
102
103    mExecutable = exec;
104
105    exec->setThreadable(mIsThreadable);
106    if (!exec->syncInfo()) {
107        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
108    }
109
110    mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
111    mRootExpand =
112        reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
113    mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
114    mFreeChildren =
115        reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
116
117
118    info = &mExecutable->getInfo();
119    if (info->getExportVarNames().size()) {
120        mBoundAllocs = new Allocation *[info->getExportVarNames().size()];
121        memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size());
122    }
123
124    mCtx->unlockMutex();
125    return true;
126}
127
128void RsdCpuScriptImpl::populateScript(Script *script) {
129    const bcc::RSInfo *info = &mExecutable->getInfo();
130
131    // Copy info over to runtime
132    script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
133    script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
134    script->mHal.info.exportedPragmaCount = info->getPragmas().size();
135    script->mHal.info.exportedPragmaKeyList =
136        const_cast<const char**>(mExecutable->getPragmaKeys().array());
137    script->mHal.info.exportedPragmaValueList =
138        const_cast<const char**>(mExecutable->getPragmaValues().array());
139
140    if (mRootExpand) {
141        script->mHal.info.root = mRootExpand;
142    } else {
143        script->mHal.info.root = mRoot;
144    }
145}
146
147
148typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
149
150void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
151                                        const void * usr, uint32_t usrLen,
152                                        const RsScriptCall *sc,
153                                        MTLaunchStruct *mtls) {
154
155    memset(mtls, 0, sizeof(MTLaunchStruct));
156
157    // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
158    if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
159        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
160        return;
161    }
162    if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
163        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
164        return;
165    }
166
167    if (ain) {
168        mtls->fep.dimX = ain->getType()->getDimX();
169        mtls->fep.dimY = ain->getType()->getDimY();
170        mtls->fep.dimZ = ain->getType()->getDimZ();
171        //mtls->dimArray = ain->getType()->getDimArray();
172    } else if (aout) {
173        mtls->fep.dimX = aout->getType()->getDimX();
174        mtls->fep.dimY = aout->getType()->getDimY();
175        mtls->fep.dimZ = aout->getType()->getDimZ();
176        //mtls->dimArray = aout->getType()->getDimArray();
177    } else {
178        mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
179        return;
180    }
181
182    if (!sc || (sc->xEnd == 0)) {
183        mtls->xEnd = mtls->fep.dimX;
184    } else {
185        rsAssert(sc->xStart < mtls->fep.dimX);
186        rsAssert(sc->xEnd <= mtls->fep.dimX);
187        rsAssert(sc->xStart < sc->xEnd);
188        mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
189        mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
190        if (mtls->xStart >= mtls->xEnd) return;
191    }
192
193    if (!sc || (sc->yEnd == 0)) {
194        mtls->yEnd = mtls->fep.dimY;
195    } else {
196        rsAssert(sc->yStart < mtls->fep.dimY);
197        rsAssert(sc->yEnd <= mtls->fep.dimY);
198        rsAssert(sc->yStart < sc->yEnd);
199        mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
200        mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
201        if (mtls->yStart >= mtls->yEnd) return;
202    }
203
204    if (!sc || (sc->zEnd == 0)) {
205        mtls->zEnd = mtls->fep.dimZ;
206    } else {
207        rsAssert(sc->zStart < mtls->fep.dimZ);
208        rsAssert(sc->zEnd <= mtls->fep.dimZ);
209        rsAssert(sc->zStart < sc->zEnd);
210        mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
211        mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
212        if (mtls->zStart >= mtls->zEnd) return;
213    }
214
215    mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
216    mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
217    mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
218    mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
219
220    rsAssert(!ain || (ain->getType()->getDimZ() == 0));
221
222    mtls->rsc = mCtx;
223    mtls->ain = ain;
224    mtls->aout = aout;
225    mtls->fep.usr = usr;
226    mtls->fep.usrLen = usrLen;
227    mtls->mSliceSize = 1;
228    mtls->mSliceNum = 0;
229
230    mtls->fep.ptrIn = NULL;
231    mtls->fep.eStrideIn = 0;
232    mtls->isThreadable = mIsThreadable;
233
234    if (ain) {
235        mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
236        mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
237        mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
238    }
239
240    mtls->fep.ptrOut = NULL;
241    mtls->fep.eStrideOut = 0;
242    if (aout) {
243        mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
244        mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
245        mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
246    }
247}
248
249
250void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
251                                     const Allocation * ain,
252                                     Allocation * aout,
253                                     const void * usr,
254                                     uint32_t usrLen,
255                                     const RsScriptCall *sc) {
256
257    MTLaunchStruct mtls;
258    forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
259    forEachKernelSetup(slot, &mtls);
260
261    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
262    mCtx->launchThreads(ain, aout, sc, &mtls);
263    mCtx->setTLS(oldTLS);
264}
265
266void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
267
268    mtls->script = this;
269    mtls->fep.slot = slot;
270
271    rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
272    mtls->kernel = reinterpret_cast<ForEachFunc_t>(
273                      mExecutable->getExportForeachFuncAddrs()[slot]);
274    rsAssert(mtls->kernel != NULL);
275    mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
276}
277
278int RsdCpuScriptImpl::invokeRoot() {
279    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
280    int ret = mRoot();
281    mCtx->setTLS(oldTLS);
282    return ret;
283}
284
285void RsdCpuScriptImpl::invokeInit() {
286    if (mInit) {
287        mInit();
288    }
289}
290
291void RsdCpuScriptImpl::invokeFreeChildren() {
292    if (mFreeChildren) {
293        mFreeChildren();
294    }
295}
296
297void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
298                                      size_t paramLength) {
299    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
300
301    RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
302    reinterpret_cast<void (*)(const void *, uint32_t)>(
303        mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
304    mCtx->setTLS(oldTLS);
305}
306
307void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
308    //rsAssert(!script->mFieldIsObject[slot]);
309    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
310
311    //if (mIntrinsicID) {
312        //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
313        //return;
314    //}
315
316    int32_t *destPtr = reinterpret_cast<int32_t *>(
317                          mExecutable->getExportVarAddrs()[slot]);
318    if (!destPtr) {
319        //ALOGV("Calling setVar on slot = %i which is null", slot);
320        return;
321    }
322
323    memcpy(destPtr, data, dataLength);
324}
325
326void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
327                                                const Element *elem,
328                                                const size_t *dims, size_t dimLength) {
329
330    int32_t *destPtr = reinterpret_cast<int32_t *>(
331        mExecutable->getExportVarAddrs()[slot]);
332    if (!destPtr) {
333        //ALOGV("Calling setVar on slot = %i which is null", slot);
334        return;
335    }
336
337    // We want to look at dimension in terms of integer components,
338    // but dimLength is given in terms of bytes.
339    dimLength /= sizeof(int);
340
341    // Only a single dimension is currently supported.
342    rsAssert(dimLength == 1);
343    if (dimLength == 1) {
344        // First do the increment loop.
345        size_t stride = elem->getSizeBytes();
346        const char *cVal = reinterpret_cast<const char *>(data);
347        for (size_t i = 0; i < dims[0]; i++) {
348            elem->incRefs(cVal);
349            cVal += stride;
350        }
351
352        // Decrement loop comes after (to prevent race conditions).
353        char *oldVal = reinterpret_cast<char *>(destPtr);
354        for (size_t i = 0; i < dims[0]; i++) {
355            elem->decRefs(oldVal);
356            oldVal += stride;
357        }
358    }
359
360    memcpy(destPtr, data, dataLength);
361}
362
363void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
364
365    //rsAssert(!script->mFieldIsObject[slot]);
366    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
367
368    int32_t *destPtr = reinterpret_cast<int32_t *>(
369                          mExecutable->getExportVarAddrs()[slot]);
370    if (!destPtr) {
371        //ALOGV("Calling setVar on slot = %i which is null", slot);
372        return;
373    }
374
375    void *ptr = NULL;
376    mBoundAllocs[slot] = data;
377    if(data) {
378        ptr = data->mHal.drvState.lod[0].mallocPtr;
379    }
380    memcpy(destPtr, &ptr, sizeof(void *));
381}
382
383void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
384
385    //rsAssert(script->mFieldIsObject[slot]);
386    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
387
388    //if (mIntrinsicID) {
389        //mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, alloc);
390        //return;
391    //}
392
393    int32_t *destPtr = reinterpret_cast<int32_t *>(
394                          mExecutable->getExportVarAddrs()[slot]);
395    if (!destPtr) {
396        //ALOGV("Calling setVar on slot = %i which is null", slot);
397        return;
398    }
399
400    rsrSetObject(mCtx->getContext(), (ObjectBase **)destPtr, data);
401}
402
403RsdCpuScriptImpl::~RsdCpuScriptImpl() {
404
405    if (mExecutable) {
406        Vector<void *>::const_iterator var_addr_iter =
407            mExecutable->getExportVarAddrs().begin();
408        Vector<void *>::const_iterator var_addr_end =
409            mExecutable->getExportVarAddrs().end();
410
411        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
412            mExecutable->getInfo().getObjectSlots().begin();
413        bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
414            mExecutable->getInfo().getObjectSlots().end();
415
416        while ((var_addr_iter != var_addr_end) &&
417               (is_object_iter != is_object_end)) {
418            // The field address can be NULL if the script-side has optimized
419            // the corresponding global variable away.
420            ObjectBase **obj_addr =
421                reinterpret_cast<ObjectBase **>(*var_addr_iter);
422            if (*is_object_iter) {
423                if (*var_addr_iter != NULL) {
424                    rsrClearObject(mCtx->getContext(), obj_addr);
425                }
426            }
427            var_addr_iter++;
428            is_object_iter++;
429        }
430    }
431
432    if (mCompilerContext) {
433        delete mCompilerContext;
434    }
435    if (mCompilerDriver) {
436        delete mCompilerDriver;
437    }
438    if (mExecutable) {
439        delete mExecutable;
440    }
441    if (mBoundAllocs) {
442        delete[] mBoundAllocs;
443    }
444}
445
446Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
447    if (!ptr) {
448        return NULL;
449    }
450
451    for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
452        Allocation *a = mBoundAllocs[ct];
453        if (!a) continue;
454        if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
455            return a;
456        }
457    }
458    ALOGE("rsGetAllocation, failed to find %p", ptr);
459    return NULL;
460}
461
462
463}
464}
465