1709a0978ae141198018ca9769f8d96292a8928e6Jason Sams/*
2709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Copyright (C) 2011 The Android Open Source Project
3709a0978ae141198018ca9769f8d96292a8928e6Jason Sams *
4709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
5709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * you may not use this file except in compliance with the License.
6709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * You may obtain a copy of the License at
7709a0978ae141198018ca9769f8d96292a8928e6Jason Sams *
8709a0978ae141198018ca9769f8d96292a8928e6Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
9709a0978ae141198018ca9769f8d96292a8928e6Jason Sams *
10709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Unless required by applicable law or agreed to in writing, software
11709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
12709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * See the License for the specific language governing permissions and
14709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * limitations under the License.
15709a0978ae141198018ca9769f8d96292a8928e6Jason Sams */
16709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
17709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuCore.h"
18709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuScript.h"
19709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsScriptGroup.h"
20709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuScriptGroup.h"
21709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
22709a0978ae141198018ca9769f8d96292a8928e6Jason Samsusing namespace android;
23709a0978ae141198018ca9769f8d96292a8928e6Jason Samsusing namespace android::renderscript;
24709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
251ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
26709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    mCtx = ctx;
271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni    mSG = (ScriptGroup*)sg;
28709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
29709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
30709a0978ae141198018ca9769f8d96292a8928e6Jason SamsCpuScriptGroupImpl::~CpuScriptGroupImpl() {
31709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
32709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
33709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
34709a0978ae141198018ca9769f8d96292a8928e6Jason Samsbool CpuScriptGroupImpl::init() {
35709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    return true;
36709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
37709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
38709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
39709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
40709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
41709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
42709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
43709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
44709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grosstypedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
46709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                                      uint32_t xstart, uint32_t xend,
479ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                      uint32_t outstep);
48709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
49b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
50709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                                         uint32_t xstart, uint32_t xend,
519ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                         uint32_t outstep) {
52709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
53709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
54b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const ScriptList *sl             = (const ScriptList *)kinfo->usr;
55b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
56709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
57b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    const uint32_t oldInStride = mkinfo->inStride[0];
58f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
5980ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes    for (size_t ct = 0; ct < sl->count; ct++) {
60709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        ScriptGroupRootFunc_t func;
6180ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes        func          = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        mkinfo->usr   = sl->usrPtrs[ct];
63709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
64709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        if (sl->ins[ct]) {
65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            rsAssert(kinfo->inLen == 1);
66b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
67b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
6880ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes
69b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
7080ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes
71709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            if (sl->inExts[ct]) {
72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                mkinfo->inPtr[0] =
73b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                  (mkinfo->inPtr[0] +
74b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                   sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
77b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                mkinfo->inPtr[0] =
78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                  (mkinfo->inPtr[0] +
79b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                   sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
80709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
81f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
82f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes        } else {
83b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            rsAssert(kinfo->inLen == 0);
84b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
85b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mkinfo->inPtr[0]     = nullptr;
86b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mkinfo->inStride[0]  = 0;
87709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
88709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
89f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes        uint32_t ostep;
90709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        if (sl->outs[ct]) {
91b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            rsAssert(kinfo->outLen == 1);
92b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
93b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mkinfo->outPtr[0] =
9480ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes              (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
9580ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes
9617e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
9780ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes
98709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            if (sl->outExts[ct]) {
99b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                mkinfo->outPtr[0] =
100b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                  mkinfo->outPtr[0] +
101b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                  sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
102b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
103b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
104b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                mkinfo->outPtr[0] =
105b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                  mkinfo->outPtr[0] +
106b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross                  sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
107709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
108f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes        } else {
109b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            rsAssert(kinfo->outLen == 0);
110b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross
111b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            mkinfo->outPtr[0] = nullptr;
112b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross            ostep             = 0;
113709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
114709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
115709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
116b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        func(kinfo, xstart, xend, ostep);
117709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    }
118709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    //ALOGE("script group root");
119709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
120b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    mkinfo->inStride[0] = oldInStride;
121b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    mkinfo->usr         = sl;
122709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
123709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
124709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
125709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
126709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::execute() {
127b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni    Vector<Allocation *> ins;
128b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni    Vector<bool> inExts;
129b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni    Vector<Allocation *> outs;
130b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni    Vector<bool> outExts;
131b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni    Vector<const ScriptKernelID *> kernels;
132709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    bool fieldDep = false;
133709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
134709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
135709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        ScriptGroup::Node *n = mSG->mNodes[ct];
136709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        Script *s = n->mKernels[0]->mScript;
137c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines        if (s->hasObjectSlots()) {
138c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            // Disable the ScriptGroup optimization if we have global RS
139c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            // objects that might interfere between kernels.
140c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            fieldDep = true;
141c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines        }
142709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
143709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
144709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
145709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
146709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
147709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
148709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
149709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
150709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
151709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
152709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
153709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            const ScriptKernelID *k = n->mKernels[ct2];
15444bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            Allocation *ain = nullptr;
15544bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            Allocation *aout = nullptr;
156709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            bool inExt = false;
157709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            bool outExt = false;
158709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
159709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
160709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                if (n->mInputs[ct3]->mDstKernel.get() == k) {
161709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    ain = n->mInputs[ct3]->mAlloc.get();
1625f6f16fe612e1c30732becc66205500994889ac9Yang Ni                    break;
163709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
164709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
16544bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            if (ain == nullptr) {
1665f6f16fe612e1c30732becc66205500994889ac9Yang Ni                for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
1675f6f16fe612e1c30732becc66205500994889ac9Yang Ni                    if (mSG->mInputs[ct3]->mKernel == k) {
1685f6f16fe612e1c30732becc66205500994889ac9Yang Ni                        ain = mSG->mInputs[ct3]->mAlloc.get();
1695f6f16fe612e1c30732becc66205500994889ac9Yang Ni                        inExt = true;
1705f6f16fe612e1c30732becc66205500994889ac9Yang Ni                        break;
1715f6f16fe612e1c30732becc66205500994889ac9Yang Ni                    }
172709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
173709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
174709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
175709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
176709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                if (n->mOutputs[ct3]->mSource.get() == k) {
177709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    aout = n->mOutputs[ct3]->mAlloc.get();
17844bef6fba6244292b751387f3d6c31cca96c28adChris Wailes                    if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
179709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                        fieldDep = true;
180709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    }
1815f6f16fe612e1c30732becc66205500994889ac9Yang Ni                    break;
182709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
183709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
18444bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            if (aout == nullptr) {
1855f6f16fe612e1c30732becc66205500994889ac9Yang Ni                for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
1865f6f16fe612e1c30732becc66205500994889ac9Yang Ni                    if (mSG->mOutputs[ct3]->mKernel == k) {
1875f6f16fe612e1c30732becc66205500994889ac9Yang Ni                        aout = mSG->mOutputs[ct3]->mAlloc.get();
1885f6f16fe612e1c30732becc66205500994889ac9Yang Ni                        outExt = true;
1895f6f16fe612e1c30732becc66205500994889ac9Yang Ni                        break;
1905f6f16fe612e1c30732becc66205500994889ac9Yang Ni                    }
191709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
192709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
193709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
19444bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
19544bef6fba6244292b751387f3d6c31cca96c28adChris Wailes                     (k->mHasKernelInput == (ain != nullptr)));
1965f6f16fe612e1c30732becc66205500994889ac9Yang Ni
197b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni            ins.add(ain);
198b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni            inExts.add(inExt);
199b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni            outs.add(aout);
200b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni            outExts.add(outExt);
201b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni            kernels.add(k);
202709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
203709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
204709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    }
205709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
20614ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala    MTLaunchStructForEach mtls;
207709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
208f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes    if (fieldDep) {
209709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        for (size_t ct=0; ct < ins.size(); ct++) {
210709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            Script *s = kernels[ct]->mScript;
211709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
212709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            uint32_t slot = kernels[ct]->mSlot;
213709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
214f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            uint32_t inLen;
215f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            const Allocation **ains;
216f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
21744bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            if (ins[ct] == nullptr) {
218f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                inLen = 0;
21944bef6fba6244292b751387f3d6c31cca96c28adChris Wailes                ains  = nullptr;
220f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
221f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            } else {
222f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                inLen = 1;
223f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                ains  = const_cast<const Allocation**>(&ins[ct]);
224f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            }
225f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
226bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams            bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
227f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
228709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            si->forEachKernelSetup(slot, &mtls);
229f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
23044bef6fba6244292b751387f3d6c31cca96c28adChris Wailes                          mtls.fep.usrLen, nullptr);
231f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
232bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams            if (launchOK) {
23314ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala                mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls);
234bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams            }
235f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
23644bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
237709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
238709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    } else {
239709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        ScriptList sl;
240b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.ins = ins.array();
241b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.outs = outs.array();
242b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.kernels = kernels.array();
243b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.count = kernels.size();
244709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
245f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes        uint32_t inLen;
246f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes        const Allocation **ains;
247f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
24844bef6fba6244292b751387f3d6c31cca96c28adChris Wailes        if (ins[0] == nullptr) {
249f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            inLen = 0;
25044bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            ains  = nullptr;
251f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
252f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes        } else {
253f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            inLen = 1;
254f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            ains  = const_cast<const Allocation**>(&ins[0]);
255f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes        }
256f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
257b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        Vector<const void *> usrPtrs;
258b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        Vector<const void *> fnPtrs;
259b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        Vector<uint32_t> sigs;
260709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        for (size_t ct=0; ct < kernels.size(); ct++) {
261709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            Script *s = kernels[ct]->mScript;
262709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
263709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
264709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
265b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni            fnPtrs.add((void *)mtls.kernel);
266b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni            usrPtrs.add(mtls.fep.usr);
267b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni            sigs.add(mtls.fep.usrLen);
268f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes            si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
26944bef6fba6244292b751387f3d6c31cca96c28adChris Wailes                          mtls.fep.usr, mtls.fep.usrLen, nullptr);
270709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
271b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.sigs = sigs.array();
272b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.usrPtrs = usrPtrs.array();
273b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.fnPtrs = fnPtrs.array();
274b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.inExts = inExts.array();
275b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni        sl.outExts = outExts.array();
276709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
277709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        Script *s = kernels[0]->mScript;
278709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
279f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
280bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams        if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
281f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
282bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams            mtls.script = nullptr;
28314ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala            mtls.kernel = &scriptGroupRoot;
284bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams            mtls.fep.usr = &sl;
285f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes
28614ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala            mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls);
287bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams        }
28817e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams
28917e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams        for (size_t ct=0; ct < kernels.size(); ct++) {
29017e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            Script *s = kernels[ct]->mScript;
29117e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
29244bef6fba6244292b751387f3d6c31cca96c28adChris Wailes            si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
29344bef6fba6244292b751387f3d6c31cca96c28adChris Wailes                           nullptr);
29417e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams        }
295709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    }
296709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
297