1709a0978ae141198018ca9769f8d96292a8928e6Jason Sams/*
2709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Copyright (C) 2011 The Android Open Source Project
3709a0978ae141198018ca9769f8d96292a8928e6Jason Sams *
4709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
5709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * you may not use this file except in compliance with the License.
6709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * You may obtain a copy of the License at
7709a0978ae141198018ca9769f8d96292a8928e6Jason Sams *
8709a0978ae141198018ca9769f8d96292a8928e6Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
9709a0978ae141198018ca9769f8d96292a8928e6Jason Sams *
10709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Unless required by applicable law or agreed to in writing, software
11709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
12709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * See the License for the specific language governing permissions and
14709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * limitations under the License.
15709a0978ae141198018ca9769f8d96292a8928e6Jason Sams */
16709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
17709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuCore.h"
18709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuScript.h"
19709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsScriptGroup.h"
20709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuScriptGroup.h"
21709a0978ae141198018ca9769f8d96292a8928e6Jason Sams//#include "rsdBcc.h"
22709a0978ae141198018ca9769f8d96292a8928e6Jason Sams//#include "rsdAllocation.h"
23709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
24709a0978ae141198018ca9769f8d96292a8928e6Jason Samsusing namespace android;
25709a0978ae141198018ca9769f8d96292a8928e6Jason Samsusing namespace android::renderscript;
26709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
27709a0978ae141198018ca9769f8d96292a8928e6Jason SamsCpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
28709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    mCtx = ctx;
29709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    mSG = sg;
30709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
31709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
32709a0978ae141198018ca9769f8d96292a8928e6Jason SamsCpuScriptGroupImpl::~CpuScriptGroupImpl() {
33709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
34709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
35709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
36709a0978ae141198018ca9769f8d96292a8928e6Jason Samsbool CpuScriptGroupImpl::init() {
37709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    return true;
38709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
39709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
40709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
42709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
43709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
45709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
46709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
47709a0978ae141198018ca9769f8d96292a8928e6Jason Samstypedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
48709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                                      uint32_t xstart, uint32_t xend,
49709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                                      uint32_t instep, uint32_t outstep);
50709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
51709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
52709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                                         uint32_t xstart, uint32_t xend,
53709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                                         uint32_t instep, uint32_t outstep) {
54709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
55709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
56709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    const ScriptList *sl = (const ScriptList *)p->usr;
57709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
58709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    const void *oldUsr = p->usr;
59709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
60709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    for(size_t ct=0; ct < sl->count; ct++) {
61709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        ScriptGroupRootFunc_t func;
62709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
63709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mp->usr = sl->usrPtrs[ct];
64709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
65709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mp->ptrIn = NULL;
66709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mp->in = NULL;
67709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mp->ptrOut = NULL;
68709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mp->out = NULL;
69709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
7017e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams        uint32_t istep = 0;
7117e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams        uint32_t ostep = 0;
7217e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams
73709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        if (sl->ins[ct]) {
74709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
7517e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            istep = sl->ins[ct]->mHal.state.elementSizeBytes;
76709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            mp->in = mp->ptrIn;
77709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            if (sl->inExts[ct]) {
78709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
79709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            } else {
80709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
81709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
82709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
83709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
84709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
85709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
86709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        if (sl->outs[ct]) {
87709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
88709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            mp->out = mp->ptrOut;
8917e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
90709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            if (sl->outExts[ct]) {
91709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
92709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            } else {
93709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
94709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
95709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
96709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
97709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
98709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
99709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
10017e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams        func(p, xstart, xend, istep, ostep);
101709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    }
102709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    //ALOGE("script group root");
103709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
104709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    //ConvolveParams *cp = (ConvolveParams *)p->usr;
105709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
106709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    mp->usr = oldUsr;
107709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
108709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
109709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
110709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
111709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::execute() {
112709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    Vector<Allocation *> ins;
113709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    Vector<bool> inExts;
114709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    Vector<Allocation *> outs;
115709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    Vector<bool> outExts;
116709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    Vector<const ScriptKernelID *> kernels;
117709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    bool fieldDep = false;
118709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
119709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
120709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        ScriptGroup::Node *n = mSG->mNodes[ct];
121709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        Script *s = n->mKernels[0]->mScript;
122c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines        if (s->hasObjectSlots()) {
123c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            // Disable the ScriptGroup optimization if we have global RS
124c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            // objects that might interfere between kernels.
125c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            fieldDep = true;
126c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines        }
127709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
128709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
129709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
130709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
131709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
132709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
133709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
134709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
135709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
136709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
137709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
138709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            const ScriptKernelID *k = n->mKernels[ct2];
139709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            Allocation *ain = NULL;
140709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            Allocation *aout = NULL;
141709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            bool inExt = false;
142709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            bool outExt = false;
143709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
144c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            if (k->mScript->hasObjectSlots()) {
145c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines                // Disable the ScriptGroup optimization if we have global RS
146c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines                // objects that might interfere between kernels.
147c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines                fieldDep = true;
148c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            }
149c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines
150709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
151709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                if (n->mInputs[ct3]->mDstKernel.get() == k) {
152709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    ain = n->mInputs[ct3]->mAlloc.get();
153709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    //ALOGE(" link in %p", ain);
154709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
155709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
156709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
157709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                if (mSG->mInputs[ct3]->mKernel == k) {
158709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    ain = mSG->mInputs[ct3]->mAlloc.get();
159709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    inExt = true;
160709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    //ALOGE(" io in %p", ain);
161709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
162709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
163709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
164709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
165709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                if (n->mOutputs[ct3]->mSource.get() == k) {
166709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    aout = n->mOutputs[ct3]->mAlloc.get();
167709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    if(n->mOutputs[ct3]->mDstField.get() != NULL) {
168709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                        fieldDep = true;
169709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    }
170709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    //ALOGE(" link out %p", aout);
171709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
172709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
173709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
174709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                if (mSG->mOutputs[ct3]->mKernel == k) {
175709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    aout = mSG->mOutputs[ct3]->mAlloc.get();
176709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    outExt = true;
177709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                    //ALOGE(" io out %p", aout);
178709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                }
179709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
180709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
181709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            if ((k->mHasKernelOutput == (aout != NULL)) &&
182709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                (k->mHasKernelInput == (ain != NULL))) {
183709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                ins.add(ain);
184709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                inExts.add(inExt);
185709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                outs.add(aout);
186709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                outExts.add(outExt);
187709a0978ae141198018ca9769f8d96292a8928e6Jason Sams                kernels.add(k);
188709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            }
189709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
190709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
191709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    }
192709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
193709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    MTLaunchStruct mtls;
194709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
195709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    if(fieldDep) {
196709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        for (size_t ct=0; ct < ins.size(); ct++) {
197709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            Script *s = kernels[ct]->mScript;
198709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
199709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            uint32_t slot = kernels[ct]->mSlot;
200709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
201709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
202709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            si->forEachKernelSetup(slot, &mtls);
203c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
204709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
205c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines            si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
206709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
207709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    } else {
208709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        ScriptList sl;
209709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.ins = ins.array();
210709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.outs = outs.array();
211709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.kernels = kernels.array();
212709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.count = kernels.size();
213709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
214709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        Vector<const void *> usrPtrs;
215709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        Vector<const void *> fnPtrs;
216709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        Vector<uint32_t> sigs;
217709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        for (size_t ct=0; ct < kernels.size(); ct++) {
218709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            Script *s = kernels[ct]->mScript;
219709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
220709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
221709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
222709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            fnPtrs.add((void *)mtls.kernel);
223709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            usrPtrs.add(mtls.fep.usr);
224709a0978ae141198018ca9769f8d96292a8928e6Jason Sams            sigs.add(mtls.fep.usrLen);
22517e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
226709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        }
227709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.sigs = sigs.array();
228709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.usrPtrs = usrPtrs.array();
229709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.fnPtrs = fnPtrs.array();
230709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.inExts = inExts.array();
231709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        sl.outExts = outExts.array();
232709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
233709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        Script *s = kernels[0]->mScript;
234709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
235709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
236709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mtls.script = NULL;
237709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mtls.kernel = (void (*)())&scriptGroupRoot;
238709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mtls.fep.usr = &sl;
239709a0978ae141198018ca9769f8d96292a8928e6Jason Sams        mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
24017e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams
24117e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams        for (size_t ct=0; ct < kernels.size(); ct++) {
24217e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            Script *s = kernels[ct]->mScript;
24317e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
24417e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams            si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
24517e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams        }
246709a0978ae141198018ca9769f8d96292a8928e6Jason Sams    }
247709a0978ae141198018ca9769f8d96292a8928e6Jason Sams}
248709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
249709a0978ae141198018ca9769f8d96292a8928e6Jason Sams
250