rsCpuScriptGroup2.cpp revision bf2111d3b3de310932099514f06924e48fa1d7b2
1#include "rsCpuScriptGroup2.h"
2
3#include "cpu_ref/rsCpuCore.h"
4#include "rsClosure.h"
5#include "rsContext.h"
6#include "rsCpuCore.h"
7#include "rsCpuScript.h"
8#include "rsScript.h"
9#include "rsScriptGroup2.h"
10
11namespace android {
12namespace renderscript {
13
14namespace {
15
16static const size_t DefaultKernelArgCount = 2;
17
18void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
19               uint32_t xend, uint32_t outstep) {
20  const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
21  RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
22  const void **oldIns  = kparams->ins;
23  uint32_t *oldStrides = kparams->inEStrides;
24
25  std::vector<const void*> ins(DefaultKernelArgCount);
26  std::vector<uint32_t> strides(DefaultKernelArgCount);
27
28  for (CPUClosure* cpuClosure : closures) {
29    const Closure* closure = cpuClosure->mClosure;
30
31    auto in_iter = ins.begin();
32    auto stride_iter = strides.begin();
33
34    for (const auto& arg : closure->mArgs) {
35      const Allocation* a = (const Allocation*)arg;
36      const uint32_t eStride = a->mHal.state.elementSizeBytes;
37      const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
38          eStride * xstart;
39      if (kparams->dimY > 1) {
40        ptr += a->mHal.drvState.lod[0].stride * kparams->y;
41      }
42      *in_iter++ = ptr;
43      *stride_iter++ = eStride;
44    }
45
46    mutable_kparams->ins = &ins[0];
47    mutable_kparams->inEStrides = &strides[0];
48
49    const Allocation* out = closure->mReturnValue;
50    const uint32_t ostep = out->mHal.state.elementSizeBytes;
51    const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
52           ostep * xstart;
53    if (kparams->dimY > 1) {
54      ptr += out->mHal.drvState.lod[0].stride * kparams->y;
55    }
56
57    mutable_kparams->out = (void*)ptr;
58
59    mutable_kparams->usr = cpuClosure->mUsrPtr;
60
61    cpuClosure->mFunc(kparams, xstart, xend, ostep);
62  }
63
64  mutable_kparams->ins        = oldIns;
65  mutable_kparams->inEStrides = oldStrides;
66  mutable_kparams->usr        = &closures;
67}
68
69/*
70  Returns true if closure depends on any closure in batch via a glboal variable
71  TODO: this probably should go into class Closure.
72 */
73bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) {
74  for (const auto &p : closure->mClosure->mGlobalDeps) {
75    const Closure* dep = p.first;
76    for (CPUClosure* c : batch) {
77      if (c->mClosure == dep) {
78        return true;
79      }
80    }
81  }
82  for (const auto &p : closure->mClosure->mArgDeps) {
83    const Closure* dep = p.first;
84    for (CPUClosure* c : batch) {
85      if (c->mClosure == dep) {
86        for (const auto &p1 : *p.second) {
87          if (p1.second != nullptr) {
88            return true;
89          }
90        }
91      }
92    }
93  }
94  return false;
95}
96
97}  // namespace
98
99CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
100                                         const ScriptGroupBase *sg) :
101    mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
102  list<CPUClosure*>* batch = new list<CPUClosure*>();
103  for (Closure* closure: mGroup->mClosures) {
104    const ScriptKernelID* kernelID = closure->mKernelID.get();
105    RsdCpuScriptImpl* si =
106        (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
107
108    MTLaunchStruct mtls;
109    si->forEachKernelSetup(kernelID->mSlot, &mtls);
110    // TODO: Is mtls.fep.usrLen ever used?
111    CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
112                                    mtls.fep.usr, mtls.fep.usrLen);
113    if (conflict(*batch, cc)) {
114      mBatches.push_back(batch);
115      batch = new list<CPUClosure*>();
116    }
117    batch->push_back(cc);
118  }
119  mBatches.push_back(batch);
120}
121
122CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
123  for (list<CPUClosure*>* batch : mBatches) {
124    for (CPUClosure* c : *batch) {
125      delete c;
126    }
127  }
128}
129
130void CpuScriptGroup2Impl::execute() {
131  for (list<CPUClosure*>* batch : mBatches) {
132    setGlobalsForBatch(*batch);
133    runBatch(*batch);
134  }
135}
136
137void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) {
138  for (CPUClosure* cpuClosure : batch) {
139    const Closure* closure = cpuClosure->mClosure;
140    const ScriptKernelID* kernelID = closure->mKernelID.get();
141    Script* s = kernelID->mScript;
142    for (const auto& p : closure->mGlobals) {
143      const void* value = p.second.first;
144      int size = p.second.second;
145      // We use -1 size to indicate an ObjectBase rather than a primitive type
146      if (size < 0) {
147        s->setVarObj(p.first->mSlot, (ObjectBase*)value);
148      } else {
149        s->setVar(p.first->mSlot, (const void*)&value, size);
150      }
151    }
152  }
153}
154
155void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) {
156  for (CPUClosure* cpuClosure : batch) {
157    const Closure* closure = cpuClosure->mClosure;
158    const ScriptKernelID* kernelID = closure->mKernelID.get();
159    cpuClosure->mSi->preLaunch(kernelID->mSlot,
160                               (const Allocation**)&closure->mArgs[0],
161                               closure->mArgs.size(), closure->mReturnValue,
162                               cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
163                               nullptr);
164  }
165
166  const CPUClosure* cpuClosure = batch.front();
167  const Closure* closure = cpuClosure->mClosure;
168  MTLaunchStruct mtls;
169
170  if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
171                                        closure->mArgs.size(),
172                                        closure->mReturnValue,
173                                        nullptr, 0, nullptr, &mtls)) {
174
175      mtls.script = nullptr;
176      mtls.kernel = (void (*)())&groupRoot;
177      mtls.fep.usr = &batch;
178
179      mCpuRefImpl->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
180  }
181
182  for (CPUClosure* cpuClosure : batch) {
183    const Closure* closure = cpuClosure->mClosure;
184    const ScriptKernelID* kernelID = closure->mKernelID.get();
185    cpuClosure->mSi->postLaunch(kernelID->mSlot,
186                                (const Allocation**)&closure->mArgs[0],
187                                closure->mArgs.size(), closure->mReturnValue,
188                                nullptr, 0, nullptr);
189  }
190}
191
192}  // namespace renderscript
193}  // namespace android
194