rsCpuScriptGroup.cpp revision b0abb140ac51b93d1a85aadaa63fe057f2d29850
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
19#include "rsScriptGroup.h"
20#include "rsCpuScriptGroup.h"
21
22using namespace android;
23using namespace android::renderscript;
24
25CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
26    mCtx = ctx;
27    mSG = (ScriptGroup*)sg;
28}
29
30CpuScriptGroupImpl::~CpuScriptGroupImpl() {
31
32}
33
34bool CpuScriptGroupImpl::init() {
35    return true;
36}
37
38void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
39}
40
41void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
42}
43
44
45typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
46                                      uint32_t xstart, uint32_t xend,
47                                      uint32_t outstep);
48
49void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
50                                         uint32_t xstart, uint32_t xend,
51                                         uint32_t outstep) {
52
53
54    const ScriptList *sl             = (const ScriptList *)kinfo->usr;
55    RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
56
57    const uint32_t oldInStride = mkinfo->inStride[0];
58
59    for (size_t ct = 0; ct < sl->count; ct++) {
60        ScriptGroupRootFunc_t func;
61        func          = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
62        mkinfo->usr   = sl->usrPtrs[ct];
63
64        if (sl->ins[ct]) {
65            rsAssert(kinfo->inLen == 1);
66
67            mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
68
69            mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
70
71            if (sl->inExts[ct]) {
72                mkinfo->inPtr[0] =
73                  (mkinfo->inPtr[0] +
74                   sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
75
76            } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
77                mkinfo->inPtr[0] =
78                  (mkinfo->inPtr[0] +
79                   sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
80            }
81
82        } else {
83            rsAssert(kinfo->inLen == 0);
84
85            mkinfo->inPtr[0]     = nullptr;
86            mkinfo->inStride[0]  = 0;
87        }
88
89        uint32_t ostep;
90        if (sl->outs[ct]) {
91            rsAssert(kinfo->outLen == 1);
92
93            mkinfo->outPtr[0] =
94              (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
95
96            ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
97
98            if (sl->outExts[ct]) {
99                mkinfo->outPtr[0] =
100                  mkinfo->outPtr[0] +
101                  sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
102
103            } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
104                mkinfo->outPtr[0] =
105                  mkinfo->outPtr[0] +
106                  sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
107            }
108        } else {
109            rsAssert(kinfo->outLen == 0);
110
111            mkinfo->outPtr[0] = nullptr;
112            ostep             = 0;
113        }
114
115        //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
116        func(kinfo, xstart, xend, ostep);
117    }
118    //ALOGE("script group root");
119
120    mkinfo->inStride[0] = oldInStride;
121    mkinfo->usr         = sl;
122}
123
124
125
126void CpuScriptGroupImpl::execute() {
127    Vector<Allocation *> ins;
128    Vector<bool> inExts;
129    Vector<Allocation *> outs;
130    Vector<bool> outExts;
131    Vector<const ScriptKernelID *> kernels;
132    bool fieldDep = false;
133
134    for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
135        ScriptGroup::Node *n = mSG->mNodes[ct];
136        Script *s = n->mKernels[0]->mScript;
137        if (s->hasObjectSlots()) {
138            // Disable the ScriptGroup optimization if we have global RS
139            // objects that might interfere between kernels.
140            fieldDep = true;
141        }
142
143        //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
144
145        for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
146            if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
147                //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
148                s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
149            }
150        }
151
152        for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
153            const ScriptKernelID *k = n->mKernels[ct2];
154            Allocation *ain = nullptr;
155            Allocation *aout = nullptr;
156            bool inExt = false;
157            bool outExt = false;
158
159            for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
160                if (n->mInputs[ct3]->mDstKernel.get() == k) {
161                    ain = n->mInputs[ct3]->mAlloc.get();
162                    break;
163                }
164            }
165            if (ain == nullptr) {
166                for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
167                    if (mSG->mInputs[ct3]->mKernel == k) {
168                        ain = mSG->mInputs[ct3]->mAlloc.get();
169                        inExt = true;
170                        break;
171                    }
172                }
173            }
174
175            for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
176                if (n->mOutputs[ct3]->mSource.get() == k) {
177                    aout = n->mOutputs[ct3]->mAlloc.get();
178                    if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
179                        fieldDep = true;
180                    }
181                    break;
182                }
183            }
184            if (aout == nullptr) {
185                for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
186                    if (mSG->mOutputs[ct3]->mKernel == k) {
187                        aout = mSG->mOutputs[ct3]->mAlloc.get();
188                        outExt = true;
189                        break;
190                    }
191                }
192            }
193
194            rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
195                     (k->mHasKernelInput == (ain != nullptr)));
196
197            ins.add(ain);
198            inExts.add(inExt);
199            outs.add(aout);
200            outExts.add(outExt);
201            kernels.add(k);
202        }
203
204    }
205
206    MTLaunchStruct mtls;
207
208    if (fieldDep) {
209        for (size_t ct=0; ct < ins.size(); ct++) {
210            Script *s = kernels[ct]->mScript;
211            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
212            uint32_t slot = kernels[ct]->mSlot;
213
214            uint32_t inLen;
215            const Allocation **ains;
216
217            if (ins[ct] == nullptr) {
218                inLen = 0;
219                ains  = nullptr;
220
221            } else {
222                inLen = 1;
223                ains  = const_cast<const Allocation**>(&ins[ct]);
224            }
225
226            bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
227
228            si->forEachKernelSetup(slot, &mtls);
229            si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
230                          mtls.fep.usrLen, nullptr);
231
232            if (launchOK) {
233                mCtx->launchThreads(ains, inLen, outs[ct], nullptr, &mtls);
234            }
235
236            si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
237        }
238    } else {
239        ScriptList sl;
240        sl.ins = ins.array();
241        sl.outs = outs.array();
242        sl.kernels = kernels.array();
243        sl.count = kernels.size();
244
245        uint32_t inLen;
246        const Allocation **ains;
247
248        if (ins[0] == nullptr) {
249            inLen = 0;
250            ains  = nullptr;
251
252        } else {
253            inLen = 1;
254            ains  = const_cast<const Allocation**>(&ins[0]);
255        }
256
257        Vector<const void *> usrPtrs;
258        Vector<const void *> fnPtrs;
259        Vector<uint32_t> sigs;
260        for (size_t ct=0; ct < kernels.size(); ct++) {
261            Script *s = kernels[ct]->mScript;
262            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
263
264            si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
265            fnPtrs.add((void *)mtls.kernel);
266            usrPtrs.add(mtls.fep.usr);
267            sigs.add(mtls.fep.usrLen);
268            si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
269                          mtls.fep.usr, mtls.fep.usrLen, nullptr);
270        }
271        sl.sigs = sigs.array();
272        sl.usrPtrs = usrPtrs.array();
273        sl.fnPtrs = fnPtrs.array();
274        sl.inExts = inExts.array();
275        sl.outExts = outExts.array();
276
277        Script *s = kernels[0]->mScript;
278        RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
279
280        if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
281
282            mtls.script = nullptr;
283            mtls.kernel = (void (*)())&scriptGroupRoot;
284            mtls.fep.usr = &sl;
285
286            mCtx->launchThreads(ains, inLen, outs[0], nullptr, &mtls);
287        }
288
289        for (size_t ct=0; ct < kernels.size(); ct++) {
290            Script *s = kernels[ct]->mScript;
291            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
292            si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
293                           nullptr);
294        }
295    }
296}
297