rsCpuScriptGroup2.cpp revision 2fa8a238dd69afebdeb757adcb1d674043d78e32
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h> 5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h> 6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set> 9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream> 10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 12da0f069871343119251d6b0586be356dc2146a62Yang Ni 13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 16da0f069871343119251d6b0586be356dc2146a62Yang Ni 171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h" 221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 26da0f069871343119251d6b0586be356dc2146a62Yang Ni 27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const size_t oldInLen = mutable_kinfo->inLen; 43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross decltype(mutable_kinfo->inStride) oldInStride; 45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : closures) { 48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross // There had better be enough space in mutable_kinfo 51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (size_t i = 0; i < closure->mNumArg; i++) { 54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const void* arg = closure->mArgs[i]; 55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* a = (const Allocation*)arg; 56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni eStride * xstart; 59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inPtr[i] = ptr; 63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inStride[i] = eStride; 64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = closure->mNumArg; 661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* out = closure->mReturnValue; 68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ostep * xstart; 71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->outLen <= 1); 76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross cpuClosure->mFunc(kinfo, xstart, xend, ostep); 791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = oldInLen; 82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 85da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 86da0f069871343119251d6b0586be356dc2146a62Yang Ni 87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) : 88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mGroup(group), mFunc(nullptr) { 89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mName = strndup(name, strlen(name)); 90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete c; 95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni free(mName); 97da0f069871343119251d6b0586be356dc2146a62Yang Ni} 98da0f069871343119251d6b0586be356dc2146a62Yang Ni 99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const { 100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.empty()) { 101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 103da0f069871343119251d6b0586be356dc2146a62Yang Ni 104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* closure = cpuClosure->mClosure; 105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // closure. 1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& globalDeps = closure->mGlobalDeps; 113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& argDeps = closure->mArgDeps; 114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (CPUClosure* c : mClosures) { 116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* batched = c->mClosure; 117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (globalDeps.find(batched) != globalDeps.end()) { 118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& it = argDeps.find(batched); 121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (it != argDeps.end()) { 122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& args = (*it).second; 123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (const auto &p1 : *args) { 124ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (p1.second->get() != nullptr) { 125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 131eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 136062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 137062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mExecutable(nullptr), mScriptObj(nullptr) { 138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!mGroup->mClosures.empty()); 139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 140062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Batch* batch = new Batch(this, "Batch0"); 141062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Closure* closure: mGroup->mClosures) { 143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc; 144062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 145062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni RsdCpuScriptImpl* si = 146062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 147062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (closure->mIsKernel) { 148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 149062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni si->forEachKernelSetup(funcID->mSlot, &mtls); 150062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si); 153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (batch->conflict(cc)) { 156eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 157062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 158062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << "Batch" << ++i; 159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch = new Batch(this, ss.str().c_str()); 160eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 161da0f069871343119251d6b0586be356dc2146a62Yang Ni 162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->mClosures.push_back(cc); 163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 164da0f069871343119251d6b0586be356dc2146a62Yang Ni 165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!batch->mClosures.empty()); 166eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 167da0f069871343119251d6b0586be356dc2146a62Yang Ni 168da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni compile(mGroup->mCacheDir); 170062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr && mExecutable != nullptr) { 171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Batch* batch : mBatches) { 172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch->resolveFuncPtr(mScriptObj); 173062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 174eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILITY_LIB 176062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) { 179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::string funcName(mName); 180062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mClosures.front()->mClosure->mIsKernel) { 181062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni funcName.append(".expand"); 182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mFunc = dlsym(sharedObj, funcName.c_str()); 184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (mFunc != nullptr); 1851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1871ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 188eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 189eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete batch; 190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 191062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: move this dlclose into ~ScriptExecutable(). 192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr) { 193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni dlclose(mScriptObj); 194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni delete mExecutable; 196da0f069871343119251d6b0586be356dc2146a62Yang Ni} 197da0f069871343119251d6b0586be356dc2146a62Yang Ni 198da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 199da0f069871343119251d6b0586be356dc2146a62Yang Ni 200da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 201da0f069871343119251d6b0586be356dc2146a62Yang Ni 202edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 203edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = ""; 204edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 205edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // If we're debugging, use the debug library. 206edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 207edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_debug.bc"; 208edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni } 209edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 210edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // Check for a platform specific library 211edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 212edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 213edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // NEON-capable ARMv7a devices can use an accelerated math library 214edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // for all reduced precision scripts. 215edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // ARMv8 does not use NEON, as ASIMD can be used with all precision 216edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // levels. 217edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; 218edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 219edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 220edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__) 221edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // x86 devices will use an optimized library. 222edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_x86.bc"; 223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else 224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore.bc"; 225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni} 227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 228da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) { 229eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni unsigned found = path.find_last_of("/\\"); 230eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return path.substr(found + 1); 231da0f069871343119251d6b0586be356dc2146a62Yang Ni} 232da0f069871343119251d6b0586be356dc2146a62Yang Ni 233da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 234062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const vector<string>& inputs, const vector<string>& kernelBatches, 235062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const vector<string>& invokeBatches, 236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& output_dir, const string& output_filename, 237edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni const string& coreLibPath, const string& coreLibRelaxedPath, 238edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni vector<const char*>* args) { 239eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 240eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-fPIC"); 241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-embedRSInfo"); 242eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-mtriple"); 243eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 244eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-bclib"); 245edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni args->push_back(coreLibPath.c_str()); 246edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni args->push_back("-bclib_relaxed"); 247edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni args->push_back(coreLibRelaxedPath.c_str()); 248eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const string& input : inputs) { 249eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(input.c_str()); 250eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 251062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : kernelBatches) { 252062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-merge"); 253062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 254062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 255062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : invokeBatches) { 256062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-invoke"); 257062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 258eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 259eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-output_path"); 260eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(output_dir.c_str()); 261eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-o"); 262eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(output_filename.c_str()); 263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(nullptr); 264da0f069871343119251d6b0586be356dc2146a62Yang Ni} 265da0f069871343119251d6b0586be356dc2146a62Yang Ni 266062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid generateSourceSlot(const Closure& closure, 267062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const std::vector<std::string>& inputs, 268062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream& ss) { 269062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 270062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = funcID->mScript; 271062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (!script->isIntrinsic()); 273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 279062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputs.begin(); 280062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 281062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << index << "," << funcID->mSlot << "."; 282062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 283062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 284062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILTY_LIB 285da0f069871343119251d6b0586be356dc2146a62Yang Ni 286da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 287da0f069871343119251d6b0586be356dc2146a62Yang Ni 288062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) { 289da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 290062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mGroup->mClosures.size() < 2) { 291eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 292eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 293da0f069871343119251d6b0586be356dc2146a62Yang Ni 294eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 295eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Fuse the input kernels and generate native code in an object file 296eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 297da0f069871343119251d6b0586be356dc2146a62Yang Ni 298062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::set<string> inputSet; 299062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Closure* closure : mGroup->mClosures) { 300062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = closure->mFunctionID.get()->mScript; 301da0f069871343119251d6b0586be356dc2146a62Yang Ni 302062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // If any script is an intrinsic, give up trying fusing the kernels. 303eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (script->isIntrinsic()) { 304eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 305eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 306da0f069871343119251d6b0586be356dc2146a62Yang Ni 307eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const RsdCpuScriptImpl *cpuScript = 308eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 309eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 310062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputSet.insert(bitcodeFilename); 311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 312062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> inputs(inputSet.begin(), inputSet.end()); 314062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 315062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> kernelBatches; 316062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> invokeBatches; 317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& batch : mBatches) { 320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() > 0); 321da0f069871343119251d6b0586be356dc2146a62Yang Ni 322062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << batch->mName << ":"; 324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 325062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!batch->mClosures.front()->mClosure->mIsKernel) { 326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() == 1); 327062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); 328062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni invokeBatches.push_back(ss.str()); 329062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 330062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& cpuClosure : batch->mClosures) { 331062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni generateSourceSlot(*cpuClosure->mClosure, inputs, ss); 332062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 333062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni kernelBatches.push_back(ss.str()); 334062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 335eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 336da0f069871343119251d6b0586be356dc2146a62Yang Ni 337433558f0f9abbf07770db288183a15fd261cace2Yabin Cui rsAssert(cacheDir != nullptr); 338433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string objFilePath(cacheDir); 339433558f0f9abbf07770db288183a15fd261cace2Yabin Cui objFilePath.append("/fusedXXXXXX.o"); 340433558f0f9abbf07770db288183a15fd261cace2Yabin Cui // Find unique object file name, to make following file names unique. 341433558f0f9abbf07770db288183a15fd261cace2Yabin Cui int tempfd = mkstemps(&objFilePath[0], 2); 342433558f0f9abbf07770db288183a15fd261cace2Yabin Cui if (tempfd == -1) { 343433558f0f9abbf07770db288183a15fd261cace2Yabin Cui return; 344433558f0f9abbf07770db288183a15fd261cace2Yabin Cui } 345433558f0f9abbf07770db288183a15fd261cace2Yabin Cui TEMP_FAILURE_RETRY(close(tempfd)); 346433558f0f9abbf07770db288183a15fd261cace2Yabin Cui 347433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); 348edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni string coreLibRelaxedPath; 349edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 350edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni &coreLibRelaxedPath); 351eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni vector<const char*> arguments; 3522fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar string output_dir(cacheDir); 3532fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar setupCompileArguments(inputs, kernelBatches, invokeBatches, output_dir, 354edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni outputFileName, coreLibPath, coreLibRelaxedPath, &arguments); 355eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 3562fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, 3572fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar arguments.size()-1, 3582fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar arguments.data()); 3592fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar if (!compiled) { 360433558f0f9abbf07770db288183a15fd261cace2Yabin Cui unlink(objFilePath.c_str()); 361eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 362eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 363da0f069871343119251d6b0586be356dc2146a62Yang Ni 364eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Create and load the shared lib 366eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 367da0f069871343119251d6b0586be356dc2146a62Yang Ni 368eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const char* resName = outputFileName.c_str(); 369da0f069871343119251d6b0586be356dc2146a62Yang Ni 370eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { 371eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Failed to link object file '%s'", resName); 372eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 373eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 374da0f069871343119251d6b0586be356dc2146a62Yang Ni 375062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 376062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj == nullptr) { 377eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Unable to load '%s'", resName); 378eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 379eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 380da0f069871343119251d6b0586be356dc2146a62Yang Ni 381eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 382062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni nullptr, // RS context. Unused. 383062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj); 384da0f069871343119251d6b0586be356dc2146a62Yang Ni 385da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 3861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3871ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3881ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 389eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (auto batch : mBatches) { 390eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->setGlobalsForBatch(); 391eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->run(); 392eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 3931ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3941ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 395da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 397eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 398062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 399062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* s = funcID->mScript;; 400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& p : closure->mGlobals) { 401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void* value = p.second.first; 402eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int size = p.second.second; 403eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (value == nullptr && size == 0) { 404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This indicates the current closure depends on another closure for a 405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // global in their shared module (script). In this case we don't need to 406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // copy the value. For example, an invoke intializes a global variable 407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // which a kernel later reads. 408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni continue; 409eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 410ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni rsAssert(p.first != nullptr); 411ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)", 412ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure, p.first, p.first->mScript, p.first->mSlot); 413062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* script = p.first->mScript; 414062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 415062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 416062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int slot = p.first->mSlot; 417062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ScriptExecutable* exec = mGroup->getExecutable(); 418062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (exec != nullptr) { 419062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const char* varName = cpuScript->getFieldName(slot); 420062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni void* addr = exec->getFieldAddress(varName); 421062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 422062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 423062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (rs_object_base*)addr, (ObjectBase*)value); 424062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 425062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni memcpy(addr, (const void*)&value, size); 426062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 427eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 428062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 429062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 430062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVarObj(slot, (ObjectBase*)value); 431062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 432062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVar(slot, (const void*)&value, size); 433062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 434eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 4371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 439da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 440062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!mClosures.front()->mClosure->mIsKernel) { 441062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(mClosures.size() == 1); 442062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 443062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // This batch contains a single closure for an invoke function 444062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni CPUClosure* cc = mClosures.front(); 445062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Closure* c = cc->mClosure; 446062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 447062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: Need align pointers for x86_64. 449062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 450062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 451062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 452062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 453062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(invokeID != nullptr); 454062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 455062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 456062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 457062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni return; 458062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 459062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 460062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 461eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 462eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 463eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 464eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 465eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 466ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 467ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 468eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls); 470eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 471eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 472eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = nullptr; 473062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mtls.kernel = (ForEachFunc_t)mFunc; 474eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 475eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads( 476ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 477ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 478eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 479eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, &mtls); 480eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 481eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 482eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 483eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 484eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 485eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 486062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 487062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 488eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 489ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 490ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 491062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni nullptr, 0, nullptr); 492eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 493da0f069871343119251d6b0586be356dc2146a62Yang Ni 494eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 495eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 496eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 497da0f069871343119251d6b0586be356dc2146a62Yang Ni 498ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 499ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, 500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mReturnValue, 501eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls)) { 502da0f069871343119251d6b0586be356dc2146a62Yang Ni 503eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 504eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = (void (*)())&groupRoot; 505eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = &mClosures; 506da0f069871343119251d6b0586be356dc2146a62Yang Ni 507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 508eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 509eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 510eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 512062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 513062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 515ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 516ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr); 518eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 5191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 5201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 5211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 5221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 523