rsCpuScriptGroup2.cpp revision 8237638f87ca0e265d050fbb13725b41a795fe5f
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h> 5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h> 6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set> 9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream> 10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 12da0f069871343119251d6b0586be356dc2146a62Yang Ni 13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 16da0f069871343119251d6b0586be356dc2146a62Yang Ni 171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h" 221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 26da0f069871343119251d6b0586be356dc2146a62Yang Ni 27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const size_t oldInLen = mutable_kinfo->inLen; 43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross decltype(mutable_kinfo->inStride) oldInStride; 45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : closures) { 48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross // There had better be enough space in mutable_kinfo 51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (size_t i = 0; i < closure->mNumArg; i++) { 54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const void* arg = closure->mArgs[i]; 55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* a = (const Allocation*)arg; 56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni eStride * xstart; 59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inPtr[i] = ptr; 63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inStride[i] = eStride; 64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = closure->mNumArg; 661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* out = closure->mReturnValue; 68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ostep * xstart; 71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->outLen <= 1); 76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross cpuClosure->mFunc(kinfo, xstart, xend, ostep); 791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = oldInLen; 82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 85da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 86da0f069871343119251d6b0586be356dc2146a62Yang Ni 87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) : 88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mGroup(group), mFunc(nullptr) { 89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mName = strndup(name, strlen(name)); 90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete c; 95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni free(mName); 97da0f069871343119251d6b0586be356dc2146a62Yang Ni} 98da0f069871343119251d6b0586be356dc2146a62Yang Ni 99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const { 100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.empty()) { 101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 103da0f069871343119251d6b0586be356dc2146a62Yang Ni 104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* closure = cpuClosure->mClosure; 105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // closure. 1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& globalDeps = closure->mGlobalDeps; 113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& argDeps = closure->mArgDeps; 114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (CPUClosure* c : mClosures) { 116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* batched = c->mClosure; 117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (globalDeps.find(batched) != globalDeps.end()) { 118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& it = argDeps.find(batched); 121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (it != argDeps.end()) { 122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& args = (*it).second; 123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (const auto &p1 : *args) { 124bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni if (p1.second.get() != nullptr) { 125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 131eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 136062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 137062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mExecutable(nullptr), mScriptObj(nullptr) { 138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!mGroup->mClosures.empty()); 139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 140062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Batch* batch = new Batch(this, "Batch0"); 141062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Closure* closure: mGroup->mClosures) { 143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc; 144062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 145062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni RsdCpuScriptImpl* si = 146062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 147062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (closure->mIsKernel) { 148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 149062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni si->forEachKernelSetup(funcID->mSlot, &mtls); 150062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si); 153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (batch->conflict(cc)) { 156eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 157062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 158062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << "Batch" << ++i; 159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch = new Batch(this, ss.str().c_str()); 160eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 161da0f069871343119251d6b0586be356dc2146a62Yang Ni 162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->mClosures.push_back(cc); 163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 164da0f069871343119251d6b0586be356dc2146a62Yang Ni 165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!batch->mClosures.empty()); 166eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 167da0f069871343119251d6b0586be356dc2146a62Yang Ni 168da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni compile(mGroup->mCacheDir); 170062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr && mExecutable != nullptr) { 171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Batch* batch : mBatches) { 172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch->resolveFuncPtr(mScriptObj); 173062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 174eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILITY_LIB 176062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) { 179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::string funcName(mName); 180062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mClosures.front()->mClosure->mIsKernel) { 181062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni funcName.append(".expand"); 182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mFunc = dlsym(sharedObj, funcName.c_str()); 184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (mFunc != nullptr); 1851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1871ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 188eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 189eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete batch; 190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 191bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni delete mExecutable; 192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: move this dlclose into ~ScriptExecutable(). 193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr) { 194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni dlclose(mScriptObj); 195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 196da0f069871343119251d6b0586be356dc2146a62Yang Ni} 197da0f069871343119251d6b0586be356dc2146a62Yang Ni 198da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 199da0f069871343119251d6b0586be356dc2146a62Yang Ni 200da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 201da0f069871343119251d6b0586be356dc2146a62Yang Ni 202edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 203edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = ""; 204edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 205edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // If we're debugging, use the debug library. 206edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 207edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_debug.bc"; 208edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni } 209edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 210edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // Check for a platform specific library 211edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 212edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 213edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // NEON-capable ARMv7a devices can use an accelerated math library 214edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // for all reduced precision scripts. 215edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // ARMv8 does not use NEON, as ASIMD can be used with all precision 216edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // levels. 217edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; 218edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 219edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 220edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__) 221edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // x86 devices will use an optimized library. 222edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_x86.bc"; 223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else 224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore.bc"; 225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni} 227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 228da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 229cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const vector<const char*>& inputs, const vector<string>& kernelBatches, 230062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const vector<string>& invokeBatches, 231cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* outputDir, const char* outputFileName, 232cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* coreLibPath, const char* coreLibRelaxedPath, 2338237638f87ca0e265d050fbb13725b41a795fe5fYang Ni const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant, 234edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni vector<const char*>* args) { 235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-fPIC"); 237eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-embedRSInfo"); 2388237638f87ca0e265d050fbb13725b41a795fe5fYang Ni if (emitGlobalInfo) { 2398237638f87ca0e265d050fbb13725b41a795fe5fYang Ni args->push_back("-rs-global-info"); 2408237638f87ca0e265d050fbb13725b41a795fe5fYang Ni if (emitGlobalInfoSkipConstant) { 2418237638f87ca0e265d050fbb13725b41a795fe5fYang Ni args->push_back("-rs-global-info-skip-constant"); 2428237638f87ca0e265d050fbb13725b41a795fe5fYang Ni } 2438237638f87ca0e265d050fbb13725b41a795fe5fYang Ni } 244eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-mtriple"); 245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 246eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-bclib"); 247cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(coreLibPath); 248edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni args->push_back("-bclib_relaxed"); 249cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(coreLibRelaxedPath); 250cb17015fed6b11a5028f31cc804a3847e379945dYang Ni for (const char* input : inputs) { 251cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(input); 252eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 253062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : kernelBatches) { 254062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-merge"); 255062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 256062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 257062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : invokeBatches) { 258062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-invoke"); 259062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 260eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 261eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-output_path"); 262cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(outputDir); 263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-o"); 264cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(outputFileName); 265da0f069871343119251d6b0586be356dc2146a62Yang Ni} 266da0f069871343119251d6b0586be356dc2146a62Yang Ni 267062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid generateSourceSlot(const Closure& closure, 268cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const std::vector<const char*>& inputs, 269062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream& ss) { 270062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 271062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = funcID->mScript; 272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (!script->isIntrinsic()); 274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 279062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 280062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputs.begin(); 281062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 282062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << index << "," << funcID->mSlot << "."; 283062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 284062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 285062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILTY_LIB 286da0f069871343119251d6b0586be356dc2146a62Yang Ni 287da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 288da0f069871343119251d6b0586be356dc2146a62Yang Ni 289062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) { 290da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 291062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mGroup->mClosures.size() < 2) { 292eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 293eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 294da0f069871343119251d6b0586be356dc2146a62Yang Ni 295cb17015fed6b11a5028f31cc804a3847e379945dYang Ni auto comparator = [](const char* str1, const char* str2) -> bool { 296cb17015fed6b11a5028f31cc804a3847e379945dYang Ni return strcmp(str1, str2) < 0; 297cb17015fed6b11a5028f31cc804a3847e379945dYang Ni }; 298cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::set<const char*, decltype(comparator)> inputSet(comparator); 299cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 300062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Closure* closure : mGroup->mClosures) { 301062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = closure->mFunctionID.get()->mScript; 302da0f069871343119251d6b0586be356dc2146a62Yang Ni 303062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // If any script is an intrinsic, give up trying fusing the kernels. 304eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (script->isIntrinsic()) { 305eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 306eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 307da0f069871343119251d6b0586be356dc2146a62Yang Ni 308eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const RsdCpuScriptImpl *cpuScript = 309eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 310cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* bitcodeFilename = cpuScript->getBitcodeFilePath(); 311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputSet.insert(bitcodeFilename); 312062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 314cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::vector<const char*> inputs(inputSet.begin(), inputSet.end()); 315062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 316062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> kernelBatches; 317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> invokeBatches; 318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& batch : mBatches) { 321062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() > 0); 322da0f069871343119251d6b0586be356dc2146a62Yang Ni 323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << batch->mName << ":"; 325062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!batch->mClosures.front()->mClosure->mIsKernel) { 327062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() == 1); 328062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); 329062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni invokeBatches.push_back(ss.str()); 330062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 331062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& cpuClosure : batch->mClosures) { 332062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni generateSourceSlot(*cpuClosure->mClosure, inputs, ss); 333062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 334062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni kernelBatches.push_back(ss.str()); 335062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 336eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 337da0f069871343119251d6b0586be356dc2146a62Yang Ni 338433558f0f9abbf07770db288183a15fd261cace2Yabin Cui rsAssert(cacheDir != nullptr); 339433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string objFilePath(cacheDir); 340f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append("/"); 341f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append(mGroup->mName); 342f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append(".o"); 343433558f0f9abbf07770db288183a15fd261cace2Yabin Cui 344cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* resName = mGroup->mName; 345edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni string coreLibRelaxedPath; 346edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 347edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni &coreLibRelaxedPath); 348f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 349eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni vector<const char*> arguments; 3508237638f87ca0e265d050fbb13725b41a795fe5fYang Ni bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo(); 3518237638f87ca0e265d050fbb13725b41a795fe5fYang Ni bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant(); 352cb17015fed6b11a5028f31cc804a3847e379945dYang Ni setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 353cb17015fed6b11a5028f31cc804a3847e379945dYang Ni resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(), 3548237638f87ca0e265d050fbb13725b41a795fe5fYang Ni emitGlobalInfo, emitGlobalInfoSkipConstant, 355f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni &arguments); 356f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 357f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1, 358cb17015fed6b11a5028f31cc804a3847e379945dYang Ni arguments.data())); 359cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 360cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.push_back(coreLibPath.c_str()); 361cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.push_back(coreLibRelaxedPath.c_str()); 362cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 363cb17015fed6b11a5028f31cc804a3847e379945dYang Ni uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(), 364cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.data(), inputs.size()); 365f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 366cb17015fed6b11a5028f31cc804a3847e379945dYang Ni if (checksum == 0) { 367f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni return; 368f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 369f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 370cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::stringstream ss; 371cb17015fed6b11a5028f31cc804a3847e379945dYang Ni ss << std::hex << checksum; 372cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* checksumStr = ss.str().c_str(); 373f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 374f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 375f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni // Try to load a shared lib from code cache matching filename and checksum 376f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 377f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 378f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 379f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni if (mScriptObj != nullptr) { 380f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 381cb17015fed6b11a5028f31cc804a3847e379945dYang Ni getCpuRefImpl()->getContext(), mScriptObj, checksum); 382f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni if (mExecutable != nullptr) { 383cb17015fed6b11a5028f31cc804a3847e379945dYang Ni return; 384f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } else { 385f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni ALOGE("Failed to create an executable object from so file"); 386f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 387f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni dlclose(mScriptObj); 388f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mScriptObj = nullptr; 389f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 390f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 391f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 392f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni // Fuse the input kernels and generate native code in an object file 393f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 394f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 395f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.push_back("-build-checksum"); 396cb17015fed6b11a5028f31cc804a3847e379945dYang Ni arguments.push_back(checksumStr); 397f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.push_back(nullptr); 398eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 3992fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, 400f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.size()-1, 401f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.data()); 4022fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar if (!compiled) { 403eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 405da0f069871343119251d6b0586be356dc2146a62Yang Ni 406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Create and load the shared lib 408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 409da0f069871343119251d6b0586be356dc2146a62Yang Ni 4104c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines if (!SharedLibraryUtils::createSharedLibrary( 4114c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) { 412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Failed to link object file '%s'", resName); 4138b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni unlink(objFilePath.c_str()); 414eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 415eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 416da0f069871343119251d6b0586be356dc2146a62Yang Ni 4178b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni unlink(objFilePath.c_str()); 4188b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni 419062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 420062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj == nullptr) { 421eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Unable to load '%s'", resName); 422eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 423eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 424da0f069871343119251d6b0586be356dc2146a62Yang Ni 425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 426bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni getCpuRefImpl()->getContext(), 427062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj); 428da0f069871343119251d6b0586be356dc2146a62Yang Ni 429da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 4301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 4321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 433eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (auto batch : mBatches) { 434eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->setGlobalsForBatch(); 435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->run(); 436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 439da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 440eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 441eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 442062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 443062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* s = funcID->mScript;; 444eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& p : closure->mGlobals) { 445eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void* value = p.second.first; 446eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int size = p.second.second; 447eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (value == nullptr && size == 0) { 448eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This indicates the current closure depends on another closure for a 449eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // global in their shared module (script). In this case we don't need to 450eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // copy the value. For example, an invoke intializes a global variable 451eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // which a kernel later reads. 452eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni continue; 453eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 454ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni rsAssert(p.first != nullptr); 455062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* script = p.first->mScript; 456062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 457062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 458062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int slot = p.first->mSlot; 459062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ScriptExecutable* exec = mGroup->getExecutable(); 460062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (exec != nullptr) { 461062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const char* varName = cpuScript->getFieldName(slot); 462062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni void* addr = exec->getFieldAddress(varName); 463062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 464062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 465062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (rs_object_base*)addr, (ObjectBase*)value); 466062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 467062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni memcpy(addr, (const void*)&value, size); 468062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 470062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 471062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 472062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVarObj(slot, (ObjectBase*)value); 473062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 474062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVar(slot, (const void*)&value, size); 475062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 476eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 477eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4781ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 4791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 481da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 482062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!mClosures.front()->mClosure->mIsKernel) { 483062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(mClosures.size() == 1); 484062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 485062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // This batch contains a single closure for an invoke function 486062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni CPUClosure* cc = mClosures.front(); 487062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Closure* c = cc->mClosure; 488062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 489062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 490062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: Need align pointers for x86_64. 491062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 492062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 493062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 494062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 495062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(invokeID != nullptr); 496062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 497062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 498062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 499062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni return; 500062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 501062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 502062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 503eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 504eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 505eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 506eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 508ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 509ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 510eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls); 512eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 513eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = nullptr; 515062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mtls.kernel = (ForEachFunc_t)mFunc; 516eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads( 518ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 519ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 520eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 521eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, &mtls); 522eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 523eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 524eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 525eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 526eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 527eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 528062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 529062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 530eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 531ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 532ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 533062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni nullptr, 0, nullptr); 534eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 535da0f069871343119251d6b0586be356dc2146a62Yang Ni 536eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 537eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 538eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 539da0f069871343119251d6b0586be356dc2146a62Yang Ni 540ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 541ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, 542eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mReturnValue, 543eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls)) { 544da0f069871343119251d6b0586be356dc2146a62Yang Ni 545eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 546eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = (void (*)())&groupRoot; 547eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = &mClosures; 548da0f069871343119251d6b0586be356dc2146a62Yang Ni 549eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 550eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 551eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 552eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 553eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 554062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 555062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 556eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 557ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 558ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 559eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr); 560eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 5611ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 5621ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 5631ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 5641ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 565