1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19#include "rsScriptGroup.h"
20#include "rsCpuScriptGroup.h"
21//#include "rsdBcc.h"
22//#include "rsdAllocation.h"
23
24using namespace android;
25using namespace android::renderscript;
26
27CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
28    mCtx = ctx;
29    mSG = sg;
30}
31
32CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33
34}
35
36bool CpuScriptGroupImpl::init() {
37    return true;
38}
39
40void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41}
42
43void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44}
45
46
47typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
48                                      uint32_t xstart, uint32_t xend,
49                                      uint32_t instep, uint32_t outstep);
50
51void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
52                                         uint32_t xstart, uint32_t xend,
53                                         uint32_t instep, uint32_t outstep) {
54
55
56    const ScriptList *sl = (const ScriptList *)p->usr;
57    RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
58    const void *oldUsr = p->usr;
59
60    for(size_t ct=0; ct < sl->count; ct++) {
61        ScriptGroupRootFunc_t func;
62        func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
63        mp->usr = sl->usrPtrs[ct];
64
65        mp->ptrIn = NULL;
66        mp->in = NULL;
67        mp->ptrOut = NULL;
68        mp->out = NULL;
69
70        uint32_t istep = 0;
71        uint32_t ostep = 0;
72
73        if (sl->ins[ct]) {
74            mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
75            istep = sl->ins[ct]->mHal.state.elementSizeBytes;
76            mp->in = mp->ptrIn;
77            if (sl->inExts[ct]) {
78                mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
79            } else {
80                if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
81                    mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
82                }
83            }
84        }
85
86        if (sl->outs[ct]) {
87            mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
88            mp->out = mp->ptrOut;
89            ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
90            if (sl->outExts[ct]) {
91                mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
92            } else {
93                if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
94                    mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
95                }
96            }
97        }
98
99        //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
100        func(p, xstart, xend, istep, ostep);
101    }
102    //ALOGE("script group root");
103
104    //ConvolveParams *cp = (ConvolveParams *)p->usr;
105
106    mp->usr = oldUsr;
107}
108
109
110
111void CpuScriptGroupImpl::execute() {
112    Vector<Allocation *> ins;
113    Vector<bool> inExts;
114    Vector<Allocation *> outs;
115    Vector<bool> outExts;
116    Vector<const ScriptKernelID *> kernels;
117    bool fieldDep = false;
118
119    for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
120        ScriptGroup::Node *n = mSG->mNodes[ct];
121        Script *s = n->mKernels[0]->mScript;
122        if (s->hasObjectSlots()) {
123            // Disable the ScriptGroup optimization if we have global RS
124            // objects that might interfere between kernels.
125            fieldDep = true;
126        }
127
128        //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
129
130        for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
131            if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
132                //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
133                s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
134            }
135        }
136
137        for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
138            const ScriptKernelID *k = n->mKernels[ct2];
139            Allocation *ain = NULL;
140            Allocation *aout = NULL;
141            bool inExt = false;
142            bool outExt = false;
143
144            if (k->mScript->hasObjectSlots()) {
145                // Disable the ScriptGroup optimization if we have global RS
146                // objects that might interfere between kernels.
147                fieldDep = true;
148            }
149
150            for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
151                if (n->mInputs[ct3]->mDstKernel.get() == k) {
152                    ain = n->mInputs[ct3]->mAlloc.get();
153                    //ALOGE(" link in %p", ain);
154                }
155            }
156            for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
157                if (mSG->mInputs[ct3]->mKernel == k) {
158                    ain = mSG->mInputs[ct3]->mAlloc.get();
159                    inExt = true;
160                    //ALOGE(" io in %p", ain);
161                }
162            }
163
164            for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
165                if (n->mOutputs[ct3]->mSource.get() == k) {
166                    aout = n->mOutputs[ct3]->mAlloc.get();
167                    if(n->mOutputs[ct3]->mDstField.get() != NULL) {
168                        fieldDep = true;
169                    }
170                    //ALOGE(" link out %p", aout);
171                }
172            }
173            for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
174                if (mSG->mOutputs[ct3]->mKernel == k) {
175                    aout = mSG->mOutputs[ct3]->mAlloc.get();
176                    outExt = true;
177                    //ALOGE(" io out %p", aout);
178                }
179            }
180
181            if ((k->mHasKernelOutput == (aout != NULL)) &&
182                (k->mHasKernelInput == (ain != NULL))) {
183                ins.add(ain);
184                inExts.add(inExt);
185                outs.add(aout);
186                outExts.add(outExt);
187                kernels.add(k);
188            }
189        }
190
191    }
192
193    MTLaunchStruct mtls;
194
195    if(fieldDep) {
196        for (size_t ct=0; ct < ins.size(); ct++) {
197            Script *s = kernels[ct]->mScript;
198            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
199            uint32_t slot = kernels[ct]->mSlot;
200
201            si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
202            si->forEachKernelSetup(slot, &mtls);
203            si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
204            mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
205            si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
206        }
207    } else {
208        ScriptList sl;
209        sl.ins = ins.array();
210        sl.outs = outs.array();
211        sl.kernels = kernels.array();
212        sl.count = kernels.size();
213
214        Vector<const void *> usrPtrs;
215        Vector<const void *> fnPtrs;
216        Vector<uint32_t> sigs;
217        for (size_t ct=0; ct < kernels.size(); ct++) {
218            Script *s = kernels[ct]->mScript;
219            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
220
221            si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
222            fnPtrs.add((void *)mtls.kernel);
223            usrPtrs.add(mtls.fep.usr);
224            sigs.add(mtls.fep.usrLen);
225            si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
226        }
227        sl.sigs = sigs.array();
228        sl.usrPtrs = usrPtrs.array();
229        sl.fnPtrs = fnPtrs.array();
230        sl.inExts = inExts.array();
231        sl.outExts = outExts.array();
232
233        Script *s = kernels[0]->mScript;
234        RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
235        si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
236        mtls.script = NULL;
237        mtls.kernel = (void (*)())&scriptGroupRoot;
238        mtls.fep.usr = &sl;
239        mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
240
241        for (size_t ct=0; ct < kernels.size(); ct++) {
242            Script *s = kernels[ct]->mScript;
243            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
244            si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
245        }
246    }
247}
248
249
250