rsCpuScriptGroup2.cpp revision 062c287f573ecc06c38ee4295e5627e12c52ac3d
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h> 5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h> 6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set> 9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream> 10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 12da0f069871343119251d6b0586be356dc2146a62Yang Ni 13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 15da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h> 16da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 17da0f069871343119251d6b0586be356dc2146a62Yang Ni 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 222abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h" 231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 26da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 27da0f069871343119251d6b0586be356dc2146a62Yang Ni 28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 29da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 36da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, 391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 40ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr; 41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; 42eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void **oldIns = kparams->ins; 43eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni uint32_t *oldStrides = kparams->inEStrides; 44eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 45eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<const void*> ins(DefaultKernelArgCount); 46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<uint32_t> strides(DefaultKernelArgCount); 47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : closures) { 49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 50eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 51eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni auto in_iter = ins.begin(); 52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni auto stride_iter = strides.begin(); 53eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (size_t i = 0; i < closure->mNumArg; i++) { 55ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const void* arg = closure->mArgs[i]; 56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* a = (const Allocation*)arg; 57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 59eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni eStride * xstart; 60eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kparams->dimY > 1) { 61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ptr += a->mHal.drvState.lod[0].stride * kparams->y; 62eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 63eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni *in_iter++ = ptr; 64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni *stride_iter++ = eStride; 65eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->ins = &ins[0]; 68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->inEStrides = &strides[0]; 691ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* out = closure->mReturnValue; 71eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 72eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ostep * xstart; 74eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kparams->dimY > 1) { 75eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ptr += out->mHal.drvState.lod[0].stride * kparams->y; 76eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 78eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->out = (void*)ptr; 791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 80eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mFunc(kparams, xstart, xend, ostep); 811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 821ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 83eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->ins = oldIns; 84eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->inEStrides = oldStrides; 851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 87da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 88da0f069871343119251d6b0586be356dc2146a62Yang Ni 89062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) : 90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mGroup(group), mFunc(nullptr) { 91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mName = strndup(name, strlen(name)); 92062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 93062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 94da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 96eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete c; 97eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 98062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni free(mName); 99da0f069871343119251d6b0586be356dc2146a62Yang Ni} 100da0f069871343119251d6b0586be356dc2146a62Yang Ni 101ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const { 102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.empty()) { 103eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 104eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 105da0f069871343119251d6b0586be356dc2146a62Yang Ni 106ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* closure = cpuClosure->mClosure; 107ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 108062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 109eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 110eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // closure. 1111ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1121ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 113eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& globalDeps = closure->mGlobalDeps; 115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& argDeps = closure->mArgDeps; 116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (CPUClosure* c : mClosures) { 118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* batched = c->mClosure; 119ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (globalDeps.find(batched) != globalDeps.end()) { 120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 121eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& it = argDeps.find(batched); 123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (it != argDeps.end()) { 124ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& args = (*it).second; 125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (const auto &p1 : *args) { 126ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (p1.second->get() != nullptr) { 127ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 128eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 129eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 132ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 133eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1361ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 138062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 139062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mExecutable(nullptr), mScriptObj(nullptr) { 140eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!mGroup->mClosures.empty()); 141eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 142062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Batch* batch = new Batch(this, "Batch0"); 143062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 144eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Closure* closure: mGroup->mClosures) { 145eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc; 146062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 147062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni RsdCpuScriptImpl* si = 148062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 149062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (closure->mIsKernel) { 150eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 151062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni si->forEachKernelSetup(funcID->mSlot, &mtls); 152062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 154eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si); 155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1561ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (batch->conflict(cc)) { 158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 160062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << "Batch" << ++i; 161062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch = new Batch(this, ss.str().c_str()); 162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 163da0f069871343119251d6b0586be356dc2146a62Yang Ni 164eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->mClosures.push_back(cc); 165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 166da0f069871343119251d6b0586be356dc2146a62Yang Ni 167eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!batch->mClosures.empty()); 168eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 169da0f069871343119251d6b0586be356dc2146a62Yang Ni 170da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni compile(mGroup->mCacheDir); 172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr && mExecutable != nullptr) { 173062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Batch* batch : mBatches) { 174062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch->resolveFuncPtr(mScriptObj); 175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILITY_LIB 178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 180062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) { 181062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::string funcName(mName); 182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mClosures.front()->mClosure->mIsKernel) { 183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni funcName.append(".expand"); 184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 185062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mFunc = dlsym(sharedObj, funcName.c_str()); 186062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (mFunc != nullptr); 1871ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1881ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1891ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 191eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete batch; 192eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: move this dlclose into ~ScriptExecutable(). 194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr) { 195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni dlclose(mScriptObj); 196062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 197062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni delete mExecutable; 198da0f069871343119251d6b0586be356dc2146a62Yang Ni} 199da0f069871343119251d6b0586be356dc2146a62Yang Ni 200da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 201da0f069871343119251d6b0586be356dc2146a62Yang Ni 202da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 203da0f069871343119251d6b0586be356dc2146a62Yang Ni 204da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) { 205eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni unsigned found = path.find_last_of("/\\"); 206eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return path.substr(found + 1); 207da0f069871343119251d6b0586be356dc2146a62Yang Ni} 208da0f069871343119251d6b0586be356dc2146a62Yang Ni 209da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 210062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const vector<string>& inputs, const vector<string>& kernelBatches, 211062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const vector<string>& invokeBatches, 212eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& output_dir, const string& output_filename, 213eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& rsLib, vector<const char*>* args) { 214eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 215eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-fPIC"); 216eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-embedRSInfo"); 217eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-mtriple"); 218eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 219eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-bclib"); 220eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(rsLib.c_str()); 221eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const string& input : inputs) { 222eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(input.c_str()); 223eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 224062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : kernelBatches) { 225062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-merge"); 226062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 227062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 228062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : invokeBatches) { 229062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-invoke"); 230062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 231eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 232eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-output_path"); 233eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(output_dir.c_str()); 234eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-o"); 235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(output_filename.c_str()); 236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(nullptr); 237da0f069871343119251d6b0586be356dc2146a62Yang Ni} 238da0f069871343119251d6b0586be356dc2146a62Yang Ni 239da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments, 240da0f069871343119251d6b0586be356dc2146a62Yang Ni const string& commandLine) { 241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const pid_t pid = fork(); 242da0f069871343119251d6b0586be356dc2146a62Yang Ni 243eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (pid == -1) { 244eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Couldn't fork for bcc execution"); 245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 246eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 247da0f069871343119251d6b0586be356dc2146a62Yang Ni 248eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (pid == 0) { 249eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Child process 250eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGV("Invoking BCC with: %s", commandLine.c_str()); 251eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); 252da0f069871343119251d6b0586be356dc2146a62Yang Ni 253eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("execv() failed: %s", strerror(errno)); 254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni abort(); 255eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 257da0f069871343119251d6b0586be356dc2146a62Yang Ni 258eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Parent process 259eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int status = 0; 260eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const pid_t w = waitpid(pid, &status, 0); 261eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (w == -1) { 262eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 264da0f069871343119251d6b0586be356dc2146a62Yang Ni 265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { 266eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("bcc terminated unexpectedly"); 267eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 268eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 269da0f069871343119251d6b0586be356dc2146a62Yang Ni 270eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return true; 271da0f069871343119251d6b0586be356dc2146a62Yang Ni} 272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid generateSourceSlot(const Closure& closure, 274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const std::vector<std::string>& inputs, 275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream& ss) { 276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = funcID->mScript; 278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 279062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (!script->isIntrinsic()); 280062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 281062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 282062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 283062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 284062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 285062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 286062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputs.begin(); 287062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 288062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << index << "," << funcID->mSlot << "."; 289062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 290062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 291062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILTY_LIB 292da0f069871343119251d6b0586be356dc2146a62Yang Ni 293da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 294da0f069871343119251d6b0586be356dc2146a62Yang Ni 295062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) { 296da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 297062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mGroup->mClosures.size() < 2) { 298eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 299eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 300da0f069871343119251d6b0586be356dc2146a62Yang Ni 301eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 302eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Fuse the input kernels and generate native code in an object file 303eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 304da0f069871343119251d6b0586be356dc2146a62Yang Ni 305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::set<string> inputSet; 306062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Closure* closure : mGroup->mClosures) { 307062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = closure->mFunctionID.get()->mScript; 308da0f069871343119251d6b0586be356dc2146a62Yang Ni 309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // If any script is an intrinsic, give up trying fusing the kernels. 310eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (script->isIntrinsic()) { 311eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 312eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 313da0f069871343119251d6b0586be356dc2146a62Yang Ni 314eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const RsdCpuScriptImpl *cpuScript = 315eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 316eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputSet.insert(bitcodeFilename); 318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> inputs(inputSet.begin(), inputSet.end()); 321062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 322062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> kernelBatches; 323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> invokeBatches; 324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 325062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& batch : mBatches) { 327062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() > 0); 328da0f069871343119251d6b0586be356dc2146a62Yang Ni 329062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 330062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << batch->mName << ":"; 331062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 332062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!batch->mClosures.front()->mClosure->mIsKernel) { 333062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() == 1); 334062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); 335062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni invokeBatches.push_back(ss.str()); 336062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 337062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& cpuClosure : batch->mClosures) { 338062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni generateSourceSlot(*cpuClosure->mClosure, inputs, ss); 339062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 340062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni kernelBatches.push_back(ss.str()); 341062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 342eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 343da0f069871343119251d6b0586be356dc2146a62Yang Ni 344433558f0f9abbf07770db288183a15fd261cace2Yabin Cui rsAssert(cacheDir != nullptr); 345433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string objFilePath(cacheDir); 346433558f0f9abbf07770db288183a15fd261cace2Yabin Cui objFilePath.append("/fusedXXXXXX.o"); 347433558f0f9abbf07770db288183a15fd261cace2Yabin Cui // Find unique object file name, to make following file names unique. 348433558f0f9abbf07770db288183a15fd261cace2Yabin Cui int tempfd = mkstemps(&objFilePath[0], 2); 349433558f0f9abbf07770db288183a15fd261cace2Yabin Cui if (tempfd == -1) { 350433558f0f9abbf07770db288183a15fd261cace2Yabin Cui return; 351433558f0f9abbf07770db288183a15fd261cace2Yabin Cui } 352433558f0f9abbf07770db288183a15fd261cace2Yabin Cui TEMP_FAILURE_RETRY(close(tempfd)); 353433558f0f9abbf07770db288183a15fd261cace2Yabin Cui 354433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); 355eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string rsLibPath(SYSLIBPATH"/libclcore.bc"); 356eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni vector<const char*> arguments; 357062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 358062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni outputFileName, rsLibPath, &arguments); 3592abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni std::unique_ptr<const char> joined( 3602abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni rsuJoinStrings(arguments.size() - 1, arguments.data())); 3612abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni string commandLine (joined.get()); 362eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 363eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!fuseAndCompile(arguments.data(), commandLine)) { 364433558f0f9abbf07770db288183a15fd261cace2Yabin Cui unlink(objFilePath.c_str()); 365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 366eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 367da0f069871343119251d6b0586be356dc2146a62Yang Ni 368eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 369eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Create and load the shared lib 370eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 371da0f069871343119251d6b0586be356dc2146a62Yang Ni 372eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const char* resName = outputFileName.c_str(); 373da0f069871343119251d6b0586be356dc2146a62Yang Ni 374eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { 375eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Failed to link object file '%s'", resName); 376eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 377eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 378da0f069871343119251d6b0586be356dc2146a62Yang Ni 379062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 380062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj == nullptr) { 381eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Unable to load '%s'", resName); 382eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 383eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 384da0f069871343119251d6b0586be356dc2146a62Yang Ni 385eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 386062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni nullptr, // RS context. Unused. 387062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj); 388da0f069871343119251d6b0586be356dc2146a62Yang Ni 389da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 3901ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3911ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3921ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (auto batch : mBatches) { 394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->setGlobalsForBatch(); 395eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->run(); 396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 3971ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3981ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 399da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 402062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 403062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* s = funcID->mScript;; 404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& p : closure->mGlobals) { 405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void* value = p.second.first; 406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int size = p.second.second; 407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (value == nullptr && size == 0) { 408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This indicates the current closure depends on another closure for a 409eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // global in their shared module (script). In this case we don't need to 410eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // copy the value. For example, an invoke intializes a global variable 411eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // which a kernel later reads. 412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni continue; 413eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 414ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni rsAssert(p.first != nullptr); 415ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)", 416ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure, p.first, p.first->mScript, p.first->mSlot); 417062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* script = p.first->mScript; 418062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 419062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 420062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int slot = p.first->mSlot; 421062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ScriptExecutable* exec = mGroup->getExecutable(); 422062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (exec != nullptr) { 423062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const char* varName = cpuScript->getFieldName(slot); 424062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni void* addr = exec->getFieldAddress(varName); 425062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 426062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 427062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (rs_object_base*)addr, (ObjectBase*)value); 428062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 429062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni memcpy(addr, (const void*)&value, size); 430062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 431eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 432062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 433062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 434062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVarObj(slot, (ObjectBase*)value); 435062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 436062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVar(slot, (const void*)&value, size); 437062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 438eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 439eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 4411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 443da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 444062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!mClosures.front()->mClosure->mIsKernel) { 445062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(mClosures.size() == 1); 446062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 447062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // This batch contains a single closure for an invoke function 448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni CPUClosure* cc = mClosures.front(); 449062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Closure* c = cc->mClosure; 450062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 451062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 452062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: Need align pointers for x86_64. 453062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 454062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 455062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 456062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 457062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(invokeID != nullptr); 458062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 459062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 460062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 461062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni return; 462062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 463062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 464062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 465eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 466eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 467eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 468eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 470ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 471ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 472eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 473eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls); 474eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 475eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 476eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = nullptr; 477062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mtls.kernel = (ForEachFunc_t)mFunc; 478eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 479eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads( 480ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 481ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 482eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 483eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, &mtls); 484eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 485eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 486eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 487eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 488eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 489eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 490062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 491062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 492eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 493ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 494ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 495062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni nullptr, 0, nullptr); 496eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 497da0f069871343119251d6b0586be356dc2146a62Yang Ni 498eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 499eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 501da0f069871343119251d6b0586be356dc2146a62Yang Ni 502ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 503ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, 504eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mReturnValue, 505eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls)) { 506da0f069871343119251d6b0586be356dc2146a62Yang Ni 507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 508eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = (void (*)())&groupRoot; 509eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = &mClosures; 510da0f069871343119251d6b0586be356dc2146a62Yang Ni 511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 512eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 513eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 515eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 516062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 517062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 518eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 519ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 520ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 521eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr); 522eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 5231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 5241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 5251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 5261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 527