rsCpuScriptGroup2.cpp revision bf2111d3b3de310932099514f06924e48fa1d7b2
1#include "rsCpuScriptGroup2.h" 2 3#include "cpu_ref/rsCpuCore.h" 4#include "rsClosure.h" 5#include "rsContext.h" 6#include "rsCpuCore.h" 7#include "rsCpuScript.h" 8#include "rsScript.h" 9#include "rsScriptGroup2.h" 10 11namespace android { 12namespace renderscript { 13 14namespace { 15 16static const size_t DefaultKernelArgCount = 2; 17 18void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, 19 uint32_t xend, uint32_t outstep) { 20 const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr; 21 RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; 22 const void **oldIns = kparams->ins; 23 uint32_t *oldStrides = kparams->inEStrides; 24 25 std::vector<const void*> ins(DefaultKernelArgCount); 26 std::vector<uint32_t> strides(DefaultKernelArgCount); 27 28 for (CPUClosure* cpuClosure : closures) { 29 const Closure* closure = cpuClosure->mClosure; 30 31 auto in_iter = ins.begin(); 32 auto stride_iter = strides.begin(); 33 34 for (const auto& arg : closure->mArgs) { 35 const Allocation* a = (const Allocation*)arg; 36 const uint32_t eStride = a->mHal.state.elementSizeBytes; 37 const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 38 eStride * xstart; 39 if (kparams->dimY > 1) { 40 ptr += a->mHal.drvState.lod[0].stride * kparams->y; 41 } 42 *in_iter++ = ptr; 43 *stride_iter++ = eStride; 44 } 45 46 mutable_kparams->ins = &ins[0]; 47 mutable_kparams->inEStrides = &strides[0]; 48 49 const Allocation* out = closure->mReturnValue; 50 const uint32_t ostep = out->mHal.state.elementSizeBytes; 51 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 52 ostep * xstart; 53 if (kparams->dimY > 1) { 54 ptr += out->mHal.drvState.lod[0].stride * kparams->y; 55 } 56 57 mutable_kparams->out = (void*)ptr; 58 59 mutable_kparams->usr = cpuClosure->mUsrPtr; 60 61 cpuClosure->mFunc(kparams, xstart, xend, ostep); 62 } 63 64 mutable_kparams->ins = oldIns; 65 mutable_kparams->inEStrides = oldStrides; 66 mutable_kparams->usr = &closures; 67} 68 69/* 70 Returns true if closure depends on any closure in batch via a glboal variable 71 TODO: this probably should go into class Closure. 72 */ 73bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) { 74 for (const auto &p : closure->mClosure->mGlobalDeps) { 75 const Closure* dep = p.first; 76 for (CPUClosure* c : batch) { 77 if (c->mClosure == dep) { 78 return true; 79 } 80 } 81 } 82 for (const auto &p : closure->mClosure->mArgDeps) { 83 const Closure* dep = p.first; 84 for (CPUClosure* c : batch) { 85 if (c->mClosure == dep) { 86 for (const auto &p1 : *p.second) { 87 if (p1.second != nullptr) { 88 return true; 89 } 90 } 91 } 92 } 93 } 94 return false; 95} 96 97} // namespace 98 99CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 100 const ScriptGroupBase *sg) : 101 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { 102 list<CPUClosure*>* batch = new list<CPUClosure*>(); 103 for (Closure* closure: mGroup->mClosures) { 104 const ScriptKernelID* kernelID = closure->mKernelID.get(); 105 RsdCpuScriptImpl* si = 106 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); 107 108 MTLaunchStruct mtls; 109 si->forEachKernelSetup(kernelID->mSlot, &mtls); 110 // TODO: Is mtls.fep.usrLen ever used? 111 CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, 112 mtls.fep.usr, mtls.fep.usrLen); 113 if (conflict(*batch, cc)) { 114 mBatches.push_back(batch); 115 batch = new list<CPUClosure*>(); 116 } 117 batch->push_back(cc); 118 } 119 mBatches.push_back(batch); 120} 121 122CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 123 for (list<CPUClosure*>* batch : mBatches) { 124 for (CPUClosure* c : *batch) { 125 delete c; 126 } 127 } 128} 129 130void CpuScriptGroup2Impl::execute() { 131 for (list<CPUClosure*>* batch : mBatches) { 132 setGlobalsForBatch(*batch); 133 runBatch(*batch); 134 } 135} 136 137void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) { 138 for (CPUClosure* cpuClosure : batch) { 139 const Closure* closure = cpuClosure->mClosure; 140 const ScriptKernelID* kernelID = closure->mKernelID.get(); 141 Script* s = kernelID->mScript; 142 for (const auto& p : closure->mGlobals) { 143 const void* value = p.second.first; 144 int size = p.second.second; 145 // We use -1 size to indicate an ObjectBase rather than a primitive type 146 if (size < 0) { 147 s->setVarObj(p.first->mSlot, (ObjectBase*)value); 148 } else { 149 s->setVar(p.first->mSlot, (const void*)&value, size); 150 } 151 } 152 } 153} 154 155void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) { 156 for (CPUClosure* cpuClosure : batch) { 157 const Closure* closure = cpuClosure->mClosure; 158 const ScriptKernelID* kernelID = closure->mKernelID.get(); 159 cpuClosure->mSi->preLaunch(kernelID->mSlot, 160 (const Allocation**)&closure->mArgs[0], 161 closure->mArgs.size(), closure->mReturnValue, 162 cpuClosure->mUsrPtr, cpuClosure->mUsrSize, 163 nullptr); 164 } 165 166 const CPUClosure* cpuClosure = batch.front(); 167 const Closure* closure = cpuClosure->mClosure; 168 MTLaunchStruct mtls; 169 170 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0], 171 closure->mArgs.size(), 172 closure->mReturnValue, 173 nullptr, 0, nullptr, &mtls)) { 174 175 mtls.script = nullptr; 176 mtls.kernel = (void (*)())&groupRoot; 177 mtls.fep.usr = &batch; 178 179 mCpuRefImpl->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 180 } 181 182 for (CPUClosure* cpuClosure : batch) { 183 const Closure* closure = cpuClosure->mClosure; 184 const ScriptKernelID* kernelID = closure->mKernelID.get(); 185 cpuClosure->mSi->postLaunch(kernelID->mSlot, 186 (const Allocation**)&closure->mArgs[0], 187 closure->mArgs.size(), closure->mReturnValue, 188 nullptr, 0, nullptr); 189 } 190} 191 192} // namespace renderscript 193} // namespace android 194