rsdScriptGroup.cpp revision bee28c6b56d0c399402014cd58cea426da2b98c1
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsdCore.h"
18
19#include <bcc/BCCContext.h>
20#include <bcc/Renderscript/RSCompilerDriver.h>
21#include <bcc/Renderscript/RSExecutable.h>
22#include <bcc/Renderscript/RSInfo.h>
23
24#include "rsScript.h"
25#include "rsScriptGroup.h"
26#include "rsdScriptGroup.h"
27#include "rsdBcc.h"
28#include "rsdAllocation.h"
29
30using namespace android;
31using namespace android::renderscript;
32
33
34bool rsdScriptGroupInit(const android::renderscript::Context *rsc,
35                        const android::renderscript::ScriptGroup *sg) {
36    return true;
37}
38
39void rsdScriptGroupSetInput(const android::renderscript::Context *rsc,
40                            const android::renderscript::ScriptGroup *sg,
41                            const android::renderscript::ScriptKernelID *kid,
42                            android::renderscript::Allocation *) {
43}
44
45void rsdScriptGroupSetOutput(const android::renderscript::Context *rsc,
46                             const android::renderscript::ScriptGroup *sg,
47                             const android::renderscript::ScriptKernelID *kid,
48                             android::renderscript::Allocation *) {
49}
50
51struct ScriptList {
52    size_t count;
53    Allocation *const* ins;
54    bool const* inExts;
55    Allocation *const* outs;
56    bool const* outExts;
57    const void *const* usrPtrs;
58    size_t const *usrSizes;
59    uint32_t const *sigs;
60    const void *const* fnPtrs;
61
62    const ScriptKernelID *const* kernels;
63};
64
65typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
66                                      uint32_t xstart, uint32_t xend,
67                                      uint32_t instep, uint32_t outstep);
68
69static void ScriptGroupRoot(const RsForEachStubParamStruct *p,
70                            uint32_t xstart, uint32_t xend,
71                            uint32_t instep, uint32_t outstep) {
72
73    const ScriptList *sl = (const ScriptList *)p->usr;
74    RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
75    const void *oldUsr = p->usr;
76
77    for(size_t ct=0; ct < sl->count; ct++) {
78        ScriptGroupRootFunc_t func;
79        func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
80        mp->usr = sl->usrPtrs[ct];
81
82        mp->ptrIn = NULL;
83        mp->in = NULL;
84        mp->ptrOut = NULL;
85        mp->out = NULL;
86
87        if (sl->ins[ct]) {
88            DrvAllocation *drv = (DrvAllocation *)sl->ins[ct]->mHal.drv;
89            mp->ptrIn = (const uint8_t *)drv->lod[0].mallocPtr;
90            mp->in = mp->ptrIn;
91            if (sl->inExts[ct]) {
92                mp->in = mp->ptrIn + drv->lod[0].stride * p->y;
93            } else {
94                if (drv->lod[0].dimY > p->lid) {
95                    mp->in = mp->ptrIn + drv->lod[0].stride * p->lid;
96                }
97            }
98        }
99
100        if (sl->outs[ct]) {
101            DrvAllocation *drv = (DrvAllocation *)sl->outs[ct]->mHal.drv;
102            mp->ptrOut = (uint8_t *)drv->lod[0].mallocPtr;
103            mp->out = mp->ptrOut;
104            if (sl->outExts[ct]) {
105                mp->out = mp->ptrOut + drv->lod[0].stride * p->y;
106            } else {
107                if (drv->lod[0].dimY > p->lid) {
108                    mp->out = mp->ptrOut + drv->lod[0].stride * p->lid;
109                }
110            }
111        }
112
113        //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
114        func(p, xstart, xend, instep, outstep);
115    }
116    //ALOGE("script group root");
117
118    //ConvolveParams *cp = (ConvolveParams *)p->usr;
119
120    mp->usr = oldUsr;
121}
122
123
124void rsdScriptGroupExecute(const android::renderscript::Context *rsc,
125                           const android::renderscript::ScriptGroup *sg) {
126
127    Vector<Allocation *> ins;
128    Vector<bool> inExts;
129    Vector<Allocation *> outs;
130    Vector<bool> outExts;
131    Vector<const ScriptKernelID *> kernels;
132    bool fieldDep = false;
133
134    for (size_t ct=0; ct < sg->mNodes.size(); ct++) {
135        ScriptGroup::Node *n = sg->mNodes[ct];
136        Script *s = n->mKernels[0]->mScript;
137
138        //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
139
140        for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
141            if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
142                //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
143                s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
144            }
145        }
146
147        for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
148            const ScriptKernelID *k = n->mKernels[ct2];
149            Allocation *ain = NULL;
150            Allocation *aout = NULL;
151            bool inExt = false;
152            bool outExt = false;
153
154            for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
155                if (n->mInputs[ct3]->mDstKernel.get() == k) {
156                    ain = n->mInputs[ct3]->mAlloc.get();
157                    //ALOGE(" link in %p", ain);
158                }
159            }
160            for (size_t ct3=0; ct3 < sg->mInputs.size(); ct3++) {
161                if (sg->mInputs[ct3]->mKernel == k) {
162                    ain = sg->mInputs[ct3]->mAlloc.get();
163                    inExt = true;
164                    //ALOGE(" io in %p", ain);
165                }
166            }
167
168            for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
169                if (n->mOutputs[ct3]->mSource.get() == k) {
170                    aout = n->mOutputs[ct3]->mAlloc.get();
171                    if(n->mOutputs[ct3]->mDstField.get() != NULL) {
172                        fieldDep = true;
173                    }
174                    //ALOGE(" link out %p", aout);
175                }
176            }
177            for (size_t ct3=0; ct3 < sg->mOutputs.size(); ct3++) {
178                if (sg->mOutputs[ct3]->mKernel == k) {
179                    aout = sg->mOutputs[ct3]->mAlloc.get();
180                    outExt = true;
181                    //ALOGE(" io out %p", aout);
182                }
183            }
184
185            if ((k->mHasKernelOutput == (aout != NULL)) &&
186                (k->mHasKernelInput == (ain != NULL))) {
187                ins.add(ain);
188                inExts.add(inExt);
189                outs.add(aout);
190                outExts.add(outExt);
191                kernels.add(k);
192            }
193        }
194
195    }
196
197    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
198    MTLaunchStruct mtls;
199
200    if(fieldDep) {
201        for (size_t ct=0; ct < ins.size(); ct++) {
202            Script *s = kernels[ct]->mScript;
203            DrvScript *drv = (DrvScript *)s->mHal.drv;
204            uint32_t slot = kernels[ct]->mSlot;
205
206            rsdScriptInvokeForEachMtlsSetup(rsc, ins[ct], outs[ct], NULL, 0, NULL, &mtls);
207            mtls.script = s;
208
209            if (drv->mIntrinsicID) {
210                mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
211                mtls.fep.usr = drv->mIntrinsicData;
212            } else {
213                mtls.kernel = reinterpret_cast<ForEachFunc_t>(
214                                  drv->mExecutable->getExportForeachFuncAddrs()[slot]);
215                rsAssert(mtls.kernel != NULL);
216                mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
217            }
218
219            rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ins[ct], outs[ct],
220                                   NULL, 0, NULL, &mtls);
221        }
222    } else {
223        ScriptList sl;
224        sl.ins = ins.array();
225        sl.outs = outs.array();
226        sl.kernels = kernels.array();
227        sl.count = kernels.size();
228
229        Vector<const void *> usrPtrs;
230        Vector<const void *> fnPtrs;
231        Vector<uint32_t> sigs;
232        for (size_t ct=0; ct < kernels.size(); ct++) {
233            Script *s = kernels[ct]->mScript;
234            DrvScript *drv = (DrvScript *)s->mHal.drv;
235
236            if (drv->mIntrinsicID) {
237                fnPtrs.add((void *)drv->mIntrinsicFuncs.root);
238                usrPtrs.add(drv->mIntrinsicData);
239                sigs.add(0);
240            } else {
241                int slot = kernels[ct]->mSlot;
242                fnPtrs.add((void *)drv->mExecutable->getExportForeachFuncAddrs()[slot]);
243                usrPtrs.add(NULL);
244                sigs.add(drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second);
245            }
246        }
247        sl.sigs = sigs.array();
248        sl.usrPtrs = usrPtrs.array();
249        sl.fnPtrs = fnPtrs.array();
250        sl.inExts = inExts.array();
251        sl.outExts = outExts.array();
252
253        rsdScriptInvokeForEachMtlsSetup(rsc, ins[0], outs[0], NULL, 0, NULL, &mtls);
254        mtls.script = NULL;
255        mtls.kernel = (void (*)())&ScriptGroupRoot;
256        mtls.fep.usr = &sl;
257        rsdScriptLaunchThreads(rsc, true, ins[0], outs[0], NULL, 0, NULL, &mtls);
258    }
259
260}
261
262void rsdScriptGroupDestroy(const android::renderscript::Context *rsc,
263                           const android::renderscript::ScriptGroup *sg) {
264}
265
266
267