rsScriptC.cpp revision afb743aca56c18beb7ab924e75cb6e070ef3e55a
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsContext.h"
18#include "rsScriptC.h"
19#include "rsMatrix.h"
20#include "../../compile/libbcc/include/bcc/bcc.h"
21#include "utils/Timers.h"
22#include "utils/StopWatch.h"
23
24#include <GLES/gl.h>
25#include <GLES/glext.h>
26
27using namespace android;
28using namespace android::renderscript;
29
30#define GET_TLS()  Context::ScriptTLSStruct * tls = \
31    (Context::ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
32    Context * rsc = tls->mContext; \
33    ScriptC * sc = (ScriptC *) tls->mScript
34
35
36ScriptC::ScriptC(Context *rsc) : Script(rsc) {
37    mBccScript = NULL;
38    memset(&mProgram, 0, sizeof(mProgram));
39}
40
41ScriptC::~ScriptC() {
42    if (mBccScript) {
43        bccDeleteScript(mBccScript);
44    }
45    free(mEnviroment.mScriptText);
46    mEnviroment.mScriptText = NULL;
47}
48
49void ScriptC::setupScript(Context *rsc) {
50    setupGLState(rsc);
51    mEnviroment.mStartTimeMillis
52                = nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC));
53
54    for (uint32_t ct=0; ct < mEnviroment.mFieldCount; ct++) {
55        if (mSlots[ct].get() && !mTypes[ct].get()) {
56            mTypes[ct].set(mSlots[ct]->getType());
57        }
58
59        if (!mTypes[ct].get())
60            continue;
61        void *ptr = NULL;
62        if (mSlots[ct].get()) {
63            ptr = mSlots[ct]->getPtr();
64        }
65        void **dest = ((void ***)mEnviroment.mFieldAddress)[ct];
66
67        if (rsc->props.mLogScripts) {
68            LOGV("%p ScriptC::setupScript slot=%i  dst=%p  src=%p  type=%p", rsc, ct, dest, ptr, mSlots[ct]->getType());
69
70            //const uint32_t *p32 = (const uint32_t *)ptr;
71            //for (uint32_t ct2=0; ct2 < mSlots[ct]->getType()->getDimX(); ct2++) {
72                //LOGE("  %i = 0x%08x ", ct2, p32[ct2]);
73            //}
74        }
75
76        if (dest) {
77            *dest = ptr;
78        }
79    }
80}
81
82const Allocation *ScriptC::ptrToAllocation(const void *ptr) const {
83    if (!ptr) {
84        return NULL;
85    }
86    for (uint32_t ct=0; ct < mEnviroment.mFieldCount; ct++) {
87        if (!mSlots[ct].get())
88            continue;
89        if (mSlots[ct]->getPtr() == ptr) {
90            return mSlots[ct].get();
91        }
92    }
93    LOGE("ScriptC::ptrToAllocation, failed to find %p", ptr);
94    return NULL;
95}
96
97Script * ScriptC::setTLS(Script *sc) {
98    Context::ScriptTLSStruct * tls = (Context::ScriptTLSStruct *)
99                                  pthread_getspecific(Context::gThreadTLSKey);
100    rsAssert(tls);
101    Script *old = tls->mScript;
102    tls->mScript = sc;
103    return old;
104}
105
106void ScriptC::setupGLState(Context *rsc) {
107    if (mEnviroment.mFragmentStore.get()) {
108        rsc->setFragmentStore(mEnviroment.mFragmentStore.get());
109    }
110    if (mEnviroment.mFragment.get()) {
111        rsc->setFragment(mEnviroment.mFragment.get());
112    }
113    if (mEnviroment.mVertex.get()) {
114        rsc->setVertex(mEnviroment.mVertex.get());
115    }
116    if (mEnviroment.mRaster.get()) {
117        rsc->setRaster(mEnviroment.mRaster.get());
118    }
119}
120
121uint32_t ScriptC::run(Context *rsc) {
122    if (mProgram.mRoot == NULL) {
123        rsc->setError(RS_ERROR_BAD_SCRIPT, "Attempted to run bad script");
124        return 0;
125    }
126
127    setupScript(rsc);
128
129    uint32_t ret = 0;
130    Script * oldTLS = setTLS(this);
131
132    if (rsc->props.mLogScripts) {
133        LOGV("%p ScriptC::run invoking root,  ptr %p", rsc, mProgram.mRoot);
134    }
135
136    ret = mProgram.mRoot();
137
138    if (rsc->props.mLogScripts) {
139        LOGV("%p ScriptC::run invoking complete, ret=%i", rsc, ret);
140    }
141
142    setTLS(oldTLS);
143    return ret;
144}
145
146typedef struct {
147    Context *rsc;
148    ScriptC *script;
149    const Allocation * ain;
150    Allocation * aout;
151    const void * usr;
152
153    uint32_t mSliceSize;
154    volatile int mSliceNum;
155
156    const uint8_t *ptrIn;
157    uint32_t eStrideIn;
158    uint8_t *ptrOut;
159    uint32_t eStrideOut;
160
161    uint32_t xStart;
162    uint32_t xEnd;
163    uint32_t yStart;
164    uint32_t yEnd;
165    uint32_t zStart;
166    uint32_t zEnd;
167    uint32_t arrayStart;
168    uint32_t arrayEnd;
169
170    uint32_t dimX;
171    uint32_t dimY;
172    uint32_t dimZ;
173    uint32_t dimArray;
174} MTLaunchStruct;
175typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
176
177static void wc_xy(void *usr, uint32_t idx) {
178    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
179
180    while (1) {
181        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
182        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
183        uint32_t yEnd = yStart + mtls->mSliceSize;
184        yEnd = rsMin(yEnd, mtls->yEnd);
185        if (yEnd <= yStart) {
186            return;
187        }
188
189        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
190        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
191        for (uint32_t y = yStart; y < yEnd; y++) {
192            uint32_t offset = mtls->dimX * y;
193            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
194            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
195
196            for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
197                ((rs_t)mtls->script->mProgram.mRoot) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
198                xPtrIn += mtls->eStrideIn;
199                xPtrOut += mtls->eStrideOut;
200            }
201        }
202    }
203}
204
205static void wc_x(void *usr, uint32_t idx) {
206    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
207
208    while (1) {
209        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
210        uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
211        uint32_t xEnd = xStart + mtls->mSliceSize;
212        xEnd = rsMin(xEnd, mtls->xEnd);
213        if (xEnd <= xStart) {
214            return;
215        }
216
217        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
218        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
219        uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
220        const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
221        for (uint32_t x = xStart; x < xEnd; x++) {
222            ((rs_t)mtls->script->mProgram.mRoot) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
223            xPtrIn += mtls->eStrideIn;
224            xPtrOut += mtls->eStrideOut;
225        }
226    }
227}
228
229void ScriptC::runForEach(Context *rsc,
230                         const Allocation * ain,
231                         Allocation * aout,
232                         const void * usr,
233                         const RsScriptCall *sc) {
234    MTLaunchStruct mtls;
235    memset(&mtls, 0, sizeof(mtls));
236
237    if (ain) {
238        mtls.dimX = ain->getType()->getDimX();
239        mtls.dimY = ain->getType()->getDimY();
240        mtls.dimZ = ain->getType()->getDimZ();
241        //mtls.dimArray = ain->getType()->getDimArray();
242    } else if (aout) {
243        mtls.dimX = aout->getType()->getDimX();
244        mtls.dimY = aout->getType()->getDimY();
245        mtls.dimZ = aout->getType()->getDimZ();
246        //mtls.dimArray = aout->getType()->getDimArray();
247    } else {
248        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
249        return;
250    }
251
252    if (!sc || (sc->xEnd == 0)) {
253        mtls.xEnd = mtls.dimX;
254    } else {
255        rsAssert(sc->xStart < mtls.dimX);
256        rsAssert(sc->xEnd <= mtls.dimX);
257        rsAssert(sc->xStart < sc->xEnd);
258        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
259        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
260        if (mtls.xStart >= mtls.xEnd) return;
261    }
262
263    if (!sc || (sc->yEnd == 0)) {
264        mtls.yEnd = mtls.dimY;
265    } else {
266        rsAssert(sc->yStart < mtls.dimY);
267        rsAssert(sc->yEnd <= mtls.dimY);
268        rsAssert(sc->yStart < sc->yEnd);
269        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
270        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
271        if (mtls.yStart >= mtls.yEnd) return;
272    }
273
274    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
275    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
276    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
277    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
278
279    rsAssert(ain->getType()->getDimZ() == 0);
280
281    setupScript(rsc);
282    Script * oldTLS = setTLS(this);
283
284    mtls.rsc = rsc;
285    mtls.ain = ain;
286    mtls.aout = aout;
287    mtls.script = this;
288    mtls.usr = usr;
289    mtls.mSliceSize = 10;
290    mtls.mSliceNum = 0;
291
292    mtls.ptrIn = NULL;
293    mtls.eStrideIn = 0;
294    if (ain) {
295        mtls.ptrIn = (const uint8_t *)ain->getPtr();
296        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
297    }
298
299    mtls.ptrOut = NULL;
300    mtls.eStrideOut = 0;
301    if (aout) {
302        mtls.ptrOut = (uint8_t *)aout->getPtr();
303        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
304    }
305
306    if ((rsc->getWorkerPoolSize() > 1) && mEnviroment.mIsThreadable) {
307        if (mtls.dimY > 1) {
308            rsc->launchThreads(wc_xy, &mtls);
309        } else {
310            rsc->launchThreads(wc_x, &mtls);
311        }
312
313        //LOGE("launch 1");
314    } else {
315        //LOGE("launch 3");
316        for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
317            for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
318                for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
319                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
320                                      mtls.dimX * mtls.dimY * z +
321                                      mtls.dimX * y;
322                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
323                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
324
325                    for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
326                        ((rs_t)mProgram.mRoot) (xPtrIn, xPtrOut, usr, x, y, z, ar);
327                        xPtrIn += mtls.eStrideIn;
328                        xPtrOut += mtls.eStrideOut;
329                    }
330                }
331            }
332        }
333    }
334
335    setTLS(oldTLS);
336}
337
338void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len) {
339    //LOGE("rsi_ScriptInvoke %i", slot);
340    if ((slot >= mEnviroment.mInvokeFunctionCount) ||
341        (mEnviroment.mInvokeFunctions[slot] == NULL)) {
342        rsc->setError(RS_ERROR_BAD_SCRIPT, "Calling invoke on bad script");
343        return;
344    }
345    setupScript(rsc);
346    Script * oldTLS = setTLS(this);
347
348    if (rsc->props.mLogScripts) {
349        LOGV("%p ScriptC::Invoke invoking slot %i,  ptr %p", rsc, slot, mEnviroment.mInvokeFunctions[slot]);
350    }
351    ((void (*)(const void *, uint32_t))
352        mEnviroment.mInvokeFunctions[slot])(data, len);
353    if (rsc->props.mLogScripts) {
354        LOGV("%p ScriptC::Invoke complete", rsc);
355    }
356
357    setTLS(oldTLS);
358}
359
360ScriptCState::ScriptCState() {
361    mScript.clear();
362}
363
364ScriptCState::~ScriptCState() {
365    mScript.clear();
366}
367
368void ScriptCState::init(Context *rsc) {
369    clear(rsc);
370}
371
372void ScriptCState::clear(Context *rsc) {
373    rsAssert(rsc);
374    mScript.clear();
375    mScript.set(new ScriptC(rsc));
376}
377
378static BCCvoid* symbolLookup(BCCvoid* pContext, const BCCchar* name) {
379    const ScriptCState::SymbolTable_t *sym;
380    ScriptC *s = (ScriptC *)pContext;
381    sym = ScriptCState::lookupSymbol(name);
382    if (!sym) {
383        sym = ScriptCState::lookupSymbolCL(name);
384    }
385    if (!sym) {
386        sym = ScriptCState::lookupSymbolGL(name);
387    }
388    if (sym) {
389        s->mEnviroment.mIsThreadable &= sym->threadable;
390        return sym->mPtr;
391    }
392    LOGE("ScriptC sym lookup failed for %s", name);
393    return NULL;
394}
395
396extern const char rs_runtime_lib_bc[];
397extern unsigned rs_runtime_lib_bc_size;
398
399void ScriptCState::runCompiler(Context *rsc, ScriptC *s) {
400    {
401        StopWatch compileTimer("RenderScript compile time");
402        s->mBccScript = bccCreateScript();
403        s->mEnviroment.mIsThreadable = true;
404        bccScriptBitcode(s->mBccScript, s->mEnviroment.mScriptText, s->mEnviroment.mScriptTextLength);
405        //bccLinkBitcode(s->mBccScript, rs_runtime_lib_bc, rs_runtime_lib_bc_size);
406        bccRegisterSymbolCallback(s->mBccScript, symbolLookup, s);
407        bccCompileScript(s->mBccScript);
408        bccGetScriptLabel(s->mBccScript, "root", (BCCvoid**) &s->mProgram.mRoot);
409        bccGetScriptLabel(s->mBccScript, "init", (BCCvoid**) &s->mProgram.mInit);
410    }
411    LOGV("%p ScriptCState::runCompiler root %p,  init %p", rsc, s->mProgram.mRoot, s->mProgram.mInit);
412
413    if (s->mProgram.mInit) {
414        s->mProgram.mInit();
415    }
416
417    bccGetExportFuncs(s->mBccScript, (BCCsizei*) &s->mEnviroment.mInvokeFunctionCount, 0, NULL);
418    if (s->mEnviroment.mInvokeFunctionCount <= 0)
419        s->mEnviroment.mInvokeFunctions = NULL;
420    else {
421        s->mEnviroment.mInvokeFunctions = (Script::InvokeFunc_t*) calloc(s->mEnviroment.mInvokeFunctionCount, sizeof(Script::InvokeFunc_t));
422        bccGetExportFuncs(s->mBccScript, NULL, s->mEnviroment.mInvokeFunctionCount, (BCCvoid **) s->mEnviroment.mInvokeFunctions);
423    }
424
425    bccGetExportVars(s->mBccScript, (BCCsizei*) &s->mEnviroment.mFieldCount, 0, NULL);
426    if (s->mEnviroment.mFieldCount <= 0)
427        s->mEnviroment.mFieldAddress = NULL;
428    else {
429        s->mEnviroment.mFieldAddress = (void **) calloc(s->mEnviroment.mFieldCount, sizeof(void *));
430        bccGetExportVars(s->mBccScript, NULL, s->mEnviroment.mFieldCount, (BCCvoid **) s->mEnviroment.mFieldAddress);
431        s->initSlots();
432    }
433
434    s->mEnviroment.mFragment.set(rsc->getDefaultProgramFragment());
435    s->mEnviroment.mVertex.set(rsc->getDefaultProgramVertex());
436    s->mEnviroment.mFragmentStore.set(rsc->getDefaultProgramStore());
437    s->mEnviroment.mRaster.set(rsc->getDefaultProgramRaster());
438
439    if (s->mProgram.mRoot) {
440        const static int pragmaMax = 16;
441        BCCsizei pragmaCount;
442        BCCchar * str[pragmaMax];
443        bccGetPragmas(s->mBccScript, &pragmaCount, pragmaMax, &str[0]);
444
445        for (int ct=0; ct < pragmaCount; ct+=2) {
446            //LOGE("pragme %s %s", str[ct], str[ct+1]);
447            if (!strcmp(str[ct], "version")) {
448                continue;
449            }
450
451            if (!strcmp(str[ct], "stateVertex")) {
452                if (!strcmp(str[ct+1], "default")) {
453                    continue;
454                }
455                if (!strcmp(str[ct+1], "parent")) {
456                    s->mEnviroment.mVertex.clear();
457                    continue;
458                }
459                LOGE("Unreconized value %s passed to stateVertex", str[ct+1]);
460            }
461
462            if (!strcmp(str[ct], "stateRaster")) {
463                if (!strcmp(str[ct+1], "default")) {
464                    continue;
465                }
466                if (!strcmp(str[ct+1], "parent")) {
467                    s->mEnviroment.mRaster.clear();
468                    continue;
469                }
470                LOGE("Unreconized value %s passed to stateRaster", str[ct+1]);
471            }
472
473            if (!strcmp(str[ct], "stateFragment")) {
474                if (!strcmp(str[ct+1], "default")) {
475                    continue;
476                }
477                if (!strcmp(str[ct+1], "parent")) {
478                    s->mEnviroment.mFragment.clear();
479                    continue;
480                }
481                LOGE("Unreconized value %s passed to stateFragment", str[ct+1]);
482            }
483
484            if (!strcmp(str[ct], "stateStore")) {
485                if (!strcmp(str[ct+1], "default")) {
486                    continue;
487                }
488                if (!strcmp(str[ct+1], "parent")) {
489                    s->mEnviroment.mFragmentStore.clear();
490                    continue;
491                }
492                LOGE("Unreconized value %s passed to stateStore", str[ct+1]);
493            }
494
495        }
496
497
498    } else {
499        // Deal with an error.
500    }
501}
502
503namespace android {
504namespace renderscript {
505
506void rsi_ScriptCBegin(Context * rsc) {
507    ScriptCState *ss = &rsc->mScriptC;
508    ss->clear(rsc);
509}
510
511void rsi_ScriptCSetText(Context *rsc, const char *text, uint32_t len) {
512    ScriptCState *ss = &rsc->mScriptC;
513
514    char *t = (char *)malloc(len + 1);
515    memcpy(t, text, len);
516    t[len] = 0;
517    ss->mScript->mEnviroment.mScriptText = t;
518    ss->mScript->mEnviroment.mScriptTextLength = len;
519}
520
521RsScript rsi_ScriptCCreate(Context * rsc) {
522    ScriptCState *ss = &rsc->mScriptC;
523
524    ObjectBaseRef<ScriptC> s(ss->mScript);
525    ss->mScript.clear();
526    s->incUserRef();
527
528    ss->runCompiler(rsc, s.get());
529    ss->clear(rsc);
530    return s.get();
531}
532
533}
534}
535