1709a0978ae141198018ca9769f8d96292a8928e6Jason Sams/* 2709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Copyright (C) 2011 The Android Open Source Project 3709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * 4709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * you may not use this file except in compliance with the License. 6709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * You may obtain a copy of the License at 7709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * 8709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * http://www.apache.org/licenses/LICENSE-2.0 9709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * 10709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * Unless required by applicable law or agreed to in writing, software 11709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * See the License for the specific language governing permissions and 14709a0978ae141198018ca9769f8d96292a8928e6Jason Sams * limitations under the License. 15709a0978ae141198018ca9769f8d96292a8928e6Jason Sams */ 16709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 17709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuCore.h" 18709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuScript.h" 19709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsScriptGroup.h" 20709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuScriptGroup.h" 21709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 22709a0978ae141198018ca9769f8d96292a8928e6Jason Samsusing namespace android; 23709a0978ae141198018ca9769f8d96292a8928e6Jason Samsusing namespace android::renderscript; 24709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 251ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) { 26709a0978ae141198018ca9769f8d96292a8928e6Jason Sams mCtx = ctx; 271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mSG = (ScriptGroup*)sg; 28709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 29709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 30709a0978ae141198018ca9769f8d96292a8928e6Jason SamsCpuScriptGroupImpl::~CpuScriptGroupImpl() { 31709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 32709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 33709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 34709a0978ae141198018ca9769f8d96292a8928e6Jason Samsbool CpuScriptGroupImpl::init() { 35709a0978ae141198018ca9769f8d96292a8928e6Jason Sams return true; 36709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 37709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 38709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { 39709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 40709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 41709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { 42709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 43709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 44709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grosstypedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo, 46709a0978ae141198018ca9769f8d96292a8928e6Jason Sams uint32_t xstart, uint32_t xend, 479ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 48709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 49b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo, 50709a0978ae141198018ca9769f8d96292a8928e6Jason Sams uint32_t xstart, uint32_t xend, 519ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 52709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 53709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 54b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const ScriptList *sl = (const ScriptList *)kinfo->usr; 55b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 56709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 57b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const uint32_t oldInStride = mkinfo->inStride[0]; 58f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 5980ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes for (size_t ct = 0; ct < sl->count; ct++) { 60709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ScriptGroupRootFunc_t func; 6180ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; 62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->usr = sl->usrPtrs[ct]; 63709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 64709a0978ae141198018ca9769f8d96292a8928e6Jason Sams if (sl->ins[ct]) { 65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->inLen == 1); 66b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 67b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; 6880ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes 69b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes; 7080ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes 71709a0978ae141198018ca9769f8d96292a8928e6Jason Sams if (sl->inExts[ct]) { 72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->inPtr[0] = 73b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross (mkinfo->inPtr[0] + 74b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y); 75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) { 77b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->inPtr[0] = 78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross (mkinfo->inPtr[0] + 79b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid); 80709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 81f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 82f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes } else { 83b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->inLen == 0); 84b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 85b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->inPtr[0] = nullptr; 86b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->inStride[0] = 0; 87709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 88709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 89f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uint32_t ostep; 90709a0978ae141198018ca9769f8d96292a8928e6Jason Sams if (sl->outs[ct]) { 91b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->outLen == 1); 92b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 93b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->outPtr[0] = 9480ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; 9580ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes 9617e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams ostep = sl->outs[ct]->mHal.state.elementSizeBytes; 9780ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes 98709a0978ae141198018ca9769f8d96292a8928e6Jason Sams if (sl->outExts[ct]) { 99b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->outPtr[0] = 100b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->outPtr[0] + 101b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y; 102b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 103b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) { 104b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->outPtr[0] = 105b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->outPtr[0] + 106b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid; 107709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 108f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes } else { 109b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->outLen == 0); 110b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 111b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->outPtr[0] = nullptr; 112b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ostep = 0; 113709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 114709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 115709a0978ae141198018ca9769f8d96292a8928e6Jason Sams //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); 116b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross func(kinfo, xstart, xend, ostep); 117709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 118709a0978ae141198018ca9769f8d96292a8928e6Jason Sams //ALOGE("script group root"); 119709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 120b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->inStride[0] = oldInStride; 121b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mkinfo->usr = sl; 122709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 123709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 124709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 125709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 126709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid CpuScriptGroupImpl::execute() { 127b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni Vector<Allocation *> ins; 128b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni Vector<bool> inExts; 129b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni Vector<Allocation *> outs; 130b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni Vector<bool> outExts; 131b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni Vector<const ScriptKernelID *> kernels; 132709a0978ae141198018ca9769f8d96292a8928e6Jason Sams bool fieldDep = false; 133709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 134709a0978ae141198018ca9769f8d96292a8928e6Jason Sams for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { 135709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ScriptGroup::Node *n = mSG->mNodes[ct]; 136709a0978ae141198018ca9769f8d96292a8928e6Jason Sams Script *s = n->mKernels[0]->mScript; 137c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines if (s->hasObjectSlots()) { 138c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines // Disable the ScriptGroup optimization if we have global RS 139c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines // objects that might interfere between kernels. 140c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines fieldDep = true; 141c78839b5bbcffae7d64a5a1c9aa60c9a4c5d3918Stephen Hines } 142709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 143709a0978ae141198018ca9769f8d96292a8928e6Jason Sams //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); 144709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 145709a0978ae141198018ca9769f8d96292a8928e6Jason Sams for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { 146709a0978ae141198018ca9769f8d96292a8928e6Jason Sams if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { 147709a0978ae141198018ca9769f8d96292a8928e6Jason Sams //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); 148709a0978ae141198018ca9769f8d96292a8928e6Jason Sams s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); 149709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 150709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 151709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 152709a0978ae141198018ca9769f8d96292a8928e6Jason Sams for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { 153709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const ScriptKernelID *k = n->mKernels[ct2]; 15444bef6fba6244292b751387f3d6c31cca96c28adChris Wailes Allocation *ain = nullptr; 15544bef6fba6244292b751387f3d6c31cca96c28adChris Wailes Allocation *aout = nullptr; 156709a0978ae141198018ca9769f8d96292a8928e6Jason Sams bool inExt = false; 157709a0978ae141198018ca9769f8d96292a8928e6Jason Sams bool outExt = false; 158709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 159709a0978ae141198018ca9769f8d96292a8928e6Jason Sams for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { 160709a0978ae141198018ca9769f8d96292a8928e6Jason Sams if (n->mInputs[ct3]->mDstKernel.get() == k) { 161709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ain = n->mInputs[ct3]->mAlloc.get(); 1625f6f16fe612e1c30732becc66205500994889ac9Yang Ni break; 163709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 164709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 16544bef6fba6244292b751387f3d6c31cca96c28adChris Wailes if (ain == nullptr) { 1665f6f16fe612e1c30732becc66205500994889ac9Yang Ni for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { 1675f6f16fe612e1c30732becc66205500994889ac9Yang Ni if (mSG->mInputs[ct3]->mKernel == k) { 1685f6f16fe612e1c30732becc66205500994889ac9Yang Ni ain = mSG->mInputs[ct3]->mAlloc.get(); 1695f6f16fe612e1c30732becc66205500994889ac9Yang Ni inExt = true; 1705f6f16fe612e1c30732becc66205500994889ac9Yang Ni break; 1715f6f16fe612e1c30732becc66205500994889ac9Yang Ni } 172709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 173709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 174709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 175709a0978ae141198018ca9769f8d96292a8928e6Jason Sams for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { 176709a0978ae141198018ca9769f8d96292a8928e6Jason Sams if (n->mOutputs[ct3]->mSource.get() == k) { 177709a0978ae141198018ca9769f8d96292a8928e6Jason Sams aout = n->mOutputs[ct3]->mAlloc.get(); 17844bef6fba6244292b751387f3d6c31cca96c28adChris Wailes if(n->mOutputs[ct3]->mDstField.get() != nullptr) { 179709a0978ae141198018ca9769f8d96292a8928e6Jason Sams fieldDep = true; 180709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 1815f6f16fe612e1c30732becc66205500994889ac9Yang Ni break; 182709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 183709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 18444bef6fba6244292b751387f3d6c31cca96c28adChris Wailes if (aout == nullptr) { 1855f6f16fe612e1c30732becc66205500994889ac9Yang Ni for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { 1865f6f16fe612e1c30732becc66205500994889ac9Yang Ni if (mSG->mOutputs[ct3]->mKernel == k) { 1875f6f16fe612e1c30732becc66205500994889ac9Yang Ni aout = mSG->mOutputs[ct3]->mAlloc.get(); 1885f6f16fe612e1c30732becc66205500994889ac9Yang Ni outExt = true; 1895f6f16fe612e1c30732becc66205500994889ac9Yang Ni break; 1905f6f16fe612e1c30732becc66205500994889ac9Yang Ni } 191709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 192709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 193709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 19444bef6fba6244292b751387f3d6c31cca96c28adChris Wailes rsAssert((k->mHasKernelOutput == (aout != nullptr)) && 19544bef6fba6244292b751387f3d6c31cca96c28adChris Wailes (k->mHasKernelInput == (ain != nullptr))); 1965f6f16fe612e1c30732becc66205500994889ac9Yang Ni 197b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni ins.add(ain); 198b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni inExts.add(inExt); 199b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni outs.add(aout); 200b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni outExts.add(outExt); 201b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni kernels.add(k); 202709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 203709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 204709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 205709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 20614ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala MTLaunchStructForEach mtls; 207709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 208f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes if (fieldDep) { 209709a0978ae141198018ca9769f8d96292a8928e6Jason Sams for (size_t ct=0; ct < ins.size(); ct++) { 210709a0978ae141198018ca9769f8d96292a8928e6Jason Sams Script *s = kernels[ct]->mScript; 211709a0978ae141198018ca9769f8d96292a8928e6Jason Sams RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 212709a0978ae141198018ca9769f8d96292a8928e6Jason Sams uint32_t slot = kernels[ct]->mSlot; 213709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 214f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uint32_t inLen; 215f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const Allocation **ains; 216f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 21744bef6fba6244292b751387f3d6c31cca96c28adChris Wailes if (ins[ct] == nullptr) { 218f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes inLen = 0; 21944bef6fba6244292b751387f3d6c31cca96c28adChris Wailes ains = nullptr; 220f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 221f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes } else { 222f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes inLen = 1; 223f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes ains = const_cast<const Allocation**>(&ins[ct]); 224f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes } 225f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 226bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls); 227f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 228709a0978ae141198018ca9769f8d96292a8928e6Jason Sams si->forEachKernelSetup(slot, &mtls); 229f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr, 23044bef6fba6244292b751387f3d6c31cca96c28adChris Wailes mtls.fep.usrLen, nullptr); 231f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 232bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams if (launchOK) { 23314ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls); 234bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams } 235f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 23644bef6fba6244292b751387f3d6c31cca96c28adChris Wailes si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr); 237709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 238709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } else { 239709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ScriptList sl; 240b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.ins = ins.array(); 241b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.outs = outs.array(); 242b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.kernels = kernels.array(); 243b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.count = kernels.size(); 244709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 245f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uint32_t inLen; 246f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const Allocation **ains; 247f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 24844bef6fba6244292b751387f3d6c31cca96c28adChris Wailes if (ins[0] == nullptr) { 249f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes inLen = 0; 25044bef6fba6244292b751387f3d6c31cca96c28adChris Wailes ains = nullptr; 251f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 252f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes } else { 253f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes inLen = 1; 254f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes ains = const_cast<const Allocation**>(&ins[0]); 255f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes } 256f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 257b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni Vector<const void *> usrPtrs; 258b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni Vector<const void *> fnPtrs; 259b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni Vector<uint32_t> sigs; 260709a0978ae141198018ca9769f8d96292a8928e6Jason Sams for (size_t ct=0; ct < kernels.size(); ct++) { 261709a0978ae141198018ca9769f8d96292a8928e6Jason Sams Script *s = kernels[ct]->mScript; 262709a0978ae141198018ca9769f8d96292a8928e6Jason Sams RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 263709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 264709a0978ae141198018ca9769f8d96292a8928e6Jason Sams si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); 265b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni fnPtrs.add((void *)mtls.kernel); 266b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni usrPtrs.add(mtls.fep.usr); 267b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sigs.add(mtls.fep.usrLen); 268f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], 26944bef6fba6244292b751387f3d6c31cca96c28adChris Wailes mtls.fep.usr, mtls.fep.usrLen, nullptr); 270709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 271b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.sigs = sigs.array(); 272b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.usrPtrs = usrPtrs.array(); 273b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.fnPtrs = fnPtrs.array(); 274b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.inExts = inExts.array(); 275b8353c5943f4038fd7f08db3d958390ce9418798Yang Ni sl.outExts = outExts.array(); 276709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 277709a0978ae141198018ca9769f8d96292a8928e6Jason Sams Script *s = kernels[0]->mScript; 278709a0978ae141198018ca9769f8d96292a8928e6Jason Sams RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 279f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 280bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) { 281f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 282bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams mtls.script = nullptr; 28314ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala mtls.kernel = &scriptGroupRoot; 284bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams mtls.fep.usr = &sl; 285f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes 28614ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls); 287bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams } 28817e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams 28917e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams for (size_t ct=0; ct < kernels.size(); ct++) { 29017e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams Script *s = kernels[ct]->mScript; 29117e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 29244bef6fba6244292b751387f3d6c31cca96c28adChris Wailes si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0, 29344bef6fba6244292b751387f3d6c31cca96c28adChris Wailes nullptr); 29417e3cdc24776d8fdbf1ce16287b9b4dcd516708fJason Sams } 295709a0978ae141198018ca9769f8d96292a8928e6Jason Sams } 296709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 297