rsCpuScriptGroup2.cpp revision 4c368af7e705f0bcb77fa99495b2e33ef20d2699
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h> 5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h> 6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set> 9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream> 10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 12da0f069871343119251d6b0586be356dc2146a62Yang Ni 13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 16da0f069871343119251d6b0586be356dc2146a62Yang Ni 171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h" 221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 26da0f069871343119251d6b0586be356dc2146a62Yang Ni 27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const size_t oldInLen = mutable_kinfo->inLen; 43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross decltype(mutable_kinfo->inStride) oldInStride; 45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : closures) { 48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross // There had better be enough space in mutable_kinfo 51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (size_t i = 0; i < closure->mNumArg; i++) { 54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const void* arg = closure->mArgs[i]; 55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* a = (const Allocation*)arg; 56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni eStride * xstart; 59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inPtr[i] = ptr; 63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inStride[i] = eStride; 64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = closure->mNumArg; 661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* out = closure->mReturnValue; 68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ostep * xstart; 71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->outLen <= 1); 76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross cpuClosure->mFunc(kinfo, xstart, xend, ostep); 791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = oldInLen; 82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 85da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 86da0f069871343119251d6b0586be356dc2146a62Yang Ni 87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) : 88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mGroup(group), mFunc(nullptr) { 89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mName = strndup(name, strlen(name)); 90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete c; 95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni free(mName); 97da0f069871343119251d6b0586be356dc2146a62Yang Ni} 98da0f069871343119251d6b0586be356dc2146a62Yang Ni 99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const { 100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.empty()) { 101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 103da0f069871343119251d6b0586be356dc2146a62Yang Ni 104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* closure = cpuClosure->mClosure; 105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // closure. 1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& globalDeps = closure->mGlobalDeps; 113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& argDeps = closure->mArgDeps; 114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (CPUClosure* c : mClosures) { 116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* batched = c->mClosure; 117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (globalDeps.find(batched) != globalDeps.end()) { 118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& it = argDeps.find(batched); 121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (it != argDeps.end()) { 122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& args = (*it).second; 123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (const auto &p1 : *args) { 124bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni if (p1.second.get() != nullptr) { 125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 131eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 136062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 137062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mExecutable(nullptr), mScriptObj(nullptr) { 138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!mGroup->mClosures.empty()); 139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 140062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Batch* batch = new Batch(this, "Batch0"); 141062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Closure* closure: mGroup->mClosures) { 143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc; 144062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 145062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni RsdCpuScriptImpl* si = 146062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 147062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (closure->mIsKernel) { 148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 149062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni si->forEachKernelSetup(funcID->mSlot, &mtls); 150062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si); 153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (batch->conflict(cc)) { 156eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 157062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 158062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << "Batch" << ++i; 159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch = new Batch(this, ss.str().c_str()); 160eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 161da0f069871343119251d6b0586be356dc2146a62Yang Ni 162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->mClosures.push_back(cc); 163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 164da0f069871343119251d6b0586be356dc2146a62Yang Ni 165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!batch->mClosures.empty()); 166eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 167da0f069871343119251d6b0586be356dc2146a62Yang Ni 168da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni compile(mGroup->mCacheDir); 170062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr && mExecutable != nullptr) { 171062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Batch* batch : mBatches) { 172062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch->resolveFuncPtr(mScriptObj); 173062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 174eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 175062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILITY_LIB 176062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) { 179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::string funcName(mName); 180062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mClosures.front()->mClosure->mIsKernel) { 181062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni funcName.append(".expand"); 182062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 183062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mFunc = dlsym(sharedObj, funcName.c_str()); 184062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (mFunc != nullptr); 1851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1871ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 188eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 189eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete batch; 190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 191bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni delete mExecutable; 192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: move this dlclose into ~ScriptExecutable(). 193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr) { 194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni dlclose(mScriptObj); 195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 196da0f069871343119251d6b0586be356dc2146a62Yang Ni} 197da0f069871343119251d6b0586be356dc2146a62Yang Ni 198da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 199da0f069871343119251d6b0586be356dc2146a62Yang Ni 200da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 201da0f069871343119251d6b0586be356dc2146a62Yang Ni 202edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 203edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = ""; 204edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 205edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // If we're debugging, use the debug library. 206edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 207edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_debug.bc"; 208edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni } 209edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 210edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // Check for a platform specific library 211edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 212edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 213edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // NEON-capable ARMv7a devices can use an accelerated math library 214edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // for all reduced precision scripts. 215edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // ARMv8 does not use NEON, as ASIMD can be used with all precision 216edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // levels. 217edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; 218edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 219edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 220edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__) 221edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // x86 devices will use an optimized library. 222edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_x86.bc"; 223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else 224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore.bc"; 225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni} 227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 228da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 229cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const vector<const char*>& inputs, const vector<string>& kernelBatches, 230062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const vector<string>& invokeBatches, 231cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* outputDir, const char* outputFileName, 232cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* coreLibPath, const char* coreLibRelaxedPath, 233edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni vector<const char*>* args) { 234eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-fPIC"); 236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-embedRSInfo"); 237eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-mtriple"); 238eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 239eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-bclib"); 240cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(coreLibPath); 241edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni args->push_back("-bclib_relaxed"); 242cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(coreLibRelaxedPath); 243cb17015fed6b11a5028f31cc804a3847e379945dYang Ni for (const char* input : inputs) { 244cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(input); 245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 246062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : kernelBatches) { 247062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-merge"); 248062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 249062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 250062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : invokeBatches) { 251062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-invoke"); 252062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 253eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-output_path"); 255cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(outputDir); 256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-o"); 257cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(outputFileName); 258da0f069871343119251d6b0586be356dc2146a62Yang Ni} 259da0f069871343119251d6b0586be356dc2146a62Yang Ni 260062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid generateSourceSlot(const Closure& closure, 261cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const std::vector<const char*>& inputs, 262062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream& ss) { 263062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 264062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = funcID->mScript; 265062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 266062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (!script->isIntrinsic()); 267062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 268062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 269062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 270062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 271062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputs.begin(); 274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << index << "," << funcID->mSlot << "."; 276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILTY_LIB 279da0f069871343119251d6b0586be356dc2146a62Yang Ni 280da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 281da0f069871343119251d6b0586be356dc2146a62Yang Ni 282062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) { 283da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 284062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mGroup->mClosures.size() < 2) { 285eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 286eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 287da0f069871343119251d6b0586be356dc2146a62Yang Ni 288cb17015fed6b11a5028f31cc804a3847e379945dYang Ni auto comparator = [](const char* str1, const char* str2) -> bool { 289cb17015fed6b11a5028f31cc804a3847e379945dYang Ni return strcmp(str1, str2) < 0; 290cb17015fed6b11a5028f31cc804a3847e379945dYang Ni }; 291cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::set<const char*, decltype(comparator)> inputSet(comparator); 292cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 293062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Closure* closure : mGroup->mClosures) { 294062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = closure->mFunctionID.get()->mScript; 295da0f069871343119251d6b0586be356dc2146a62Yang Ni 296062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // If any script is an intrinsic, give up trying fusing the kernels. 297eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (script->isIntrinsic()) { 298eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 299eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 300da0f069871343119251d6b0586be356dc2146a62Yang Ni 301eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const RsdCpuScriptImpl *cpuScript = 302eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 303cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* bitcodeFilename = cpuScript->getBitcodeFilePath(); 304062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputSet.insert(bitcodeFilename); 305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 306062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 307cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::vector<const char*> inputs(inputSet.begin(), inputSet.end()); 308062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> kernelBatches; 310062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> invokeBatches; 311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 312062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& batch : mBatches) { 314062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() > 0); 315da0f069871343119251d6b0586be356dc2146a62Yang Ni 316062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << batch->mName << ":"; 318062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!batch->mClosures.front()->mClosure->mIsKernel) { 320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() == 1); 321062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); 322062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni invokeBatches.push_back(ss.str()); 323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 324062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& cpuClosure : batch->mClosures) { 325062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni generateSourceSlot(*cpuClosure->mClosure, inputs, ss); 326062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 327062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni kernelBatches.push_back(ss.str()); 328062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 329eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 330da0f069871343119251d6b0586be356dc2146a62Yang Ni 331433558f0f9abbf07770db288183a15fd261cace2Yabin Cui rsAssert(cacheDir != nullptr); 332433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string objFilePath(cacheDir); 333f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append("/"); 334f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append(mGroup->mName); 335f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append(".o"); 336433558f0f9abbf07770db288183a15fd261cace2Yabin Cui 337cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* resName = mGroup->mName; 338edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni string coreLibRelaxedPath; 339edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 340edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni &coreLibRelaxedPath); 341f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 342eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni vector<const char*> arguments; 343cb17015fed6b11a5028f31cc804a3847e379945dYang Ni setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 344cb17015fed6b11a5028f31cc804a3847e379945dYang Ni resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(), 345f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni &arguments); 346f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 347f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1, 348cb17015fed6b11a5028f31cc804a3847e379945dYang Ni arguments.data())); 349cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 350cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.push_back(coreLibPath.c_str()); 351cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.push_back(coreLibRelaxedPath.c_str()); 352cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 353cb17015fed6b11a5028f31cc804a3847e379945dYang Ni uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(), 354cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.data(), inputs.size()); 355f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 356cb17015fed6b11a5028f31cc804a3847e379945dYang Ni if (checksum == 0) { 357f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni return; 358f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 359f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 360cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::stringstream ss; 361cb17015fed6b11a5028f31cc804a3847e379945dYang Ni ss << std::hex << checksum; 362cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* checksumStr = ss.str().c_str(); 363f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 364f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 365f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni // Try to load a shared lib from code cache matching filename and checksum 366f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 367f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 368f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 369f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni if (mScriptObj != nullptr) { 370f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 371cb17015fed6b11a5028f31cc804a3847e379945dYang Ni getCpuRefImpl()->getContext(), mScriptObj, checksum); 372f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni if (mExecutable != nullptr) { 373cb17015fed6b11a5028f31cc804a3847e379945dYang Ni return; 374f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } else { 375f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni ALOGE("Failed to create an executable object from so file"); 376f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 377f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni dlclose(mScriptObj); 378f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mScriptObj = nullptr; 379f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 380f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 381f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 382f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni // Fuse the input kernels and generate native code in an object file 383f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 384f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 385f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.push_back("-build-checksum"); 386cb17015fed6b11a5028f31cc804a3847e379945dYang Ni arguments.push_back(checksumStr); 387f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.push_back(nullptr); 388eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 3892fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, 390f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.size()-1, 391f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.data()); 3922fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar if (!compiled) { 393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 395da0f069871343119251d6b0586be356dc2146a62Yang Ni 396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 397eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Create and load the shared lib 398eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 399da0f069871343119251d6b0586be356dc2146a62Yang Ni 4004c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines if (!SharedLibraryUtils::createSharedLibrary( 4014c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) { 402eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Failed to link object file '%s'", resName); 4038b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni unlink(objFilePath.c_str()); 404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 406da0f069871343119251d6b0586be356dc2146a62Yang Ni 4078b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni unlink(objFilePath.c_str()); 4088b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni 409062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 410062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj == nullptr) { 411eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Unable to load '%s'", resName); 412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 413eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 414da0f069871343119251d6b0586be356dc2146a62Yang Ni 415eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 416bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni getCpuRefImpl()->getContext(), 417062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj); 418da0f069871343119251d6b0586be356dc2146a62Yang Ni 419da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 4201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 4221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 423eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (auto batch : mBatches) { 424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->setGlobalsForBatch(); 425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->run(); 426eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 429da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 430eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 431eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 432062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 433062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* s = funcID->mScript;; 434eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& p : closure->mGlobals) { 435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void* value = p.second.first; 436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int size = p.second.second; 437eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (value == nullptr && size == 0) { 438eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This indicates the current closure depends on another closure for a 439eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // global in their shared module (script). In this case we don't need to 440eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // copy the value. For example, an invoke intializes a global variable 441eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // which a kernel later reads. 442eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni continue; 443eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 444ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni rsAssert(p.first != nullptr); 445062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* script = p.first->mScript; 446062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 447062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int slot = p.first->mSlot; 449062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ScriptExecutable* exec = mGroup->getExecutable(); 450062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (exec != nullptr) { 451062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const char* varName = cpuScript->getFieldName(slot); 452062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni void* addr = exec->getFieldAddress(varName); 453062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 454062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 455062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (rs_object_base*)addr, (ObjectBase*)value); 456062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 457062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni memcpy(addr, (const void*)&value, size); 458062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 459eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 460062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 461062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 462062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVarObj(slot, (ObjectBase*)value); 463062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 464062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVar(slot, (const void*)&value, size); 465062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 466eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 467eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4681ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 4691ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4701ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 471da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 472062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!mClosures.front()->mClosure->mIsKernel) { 473062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(mClosures.size() == 1); 474062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 475062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // This batch contains a single closure for an invoke function 476062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni CPUClosure* cc = mClosures.front(); 477062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Closure* c = cc->mClosure; 478062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 479062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 480062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: Need align pointers for x86_64. 481062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 482062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 483062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 484062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 485062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(invokeID != nullptr); 486062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 487062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 488062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 489062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni return; 490062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 491062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 492062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 493eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 494eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 495eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 496eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 497eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 498ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 499ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 501eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls); 502eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 503eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 504eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = nullptr; 505062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mtls.kernel = (ForEachFunc_t)mFunc; 506eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads( 508ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 509ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 510eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, &mtls); 512eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 513eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 514eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 515eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 516eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 518062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 519062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 520eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 521ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 522ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 523062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni nullptr, 0, nullptr); 524eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 525da0f069871343119251d6b0586be356dc2146a62Yang Ni 526eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 527eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 528eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 529da0f069871343119251d6b0586be356dc2146a62Yang Ni 530ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 531ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, 532eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mReturnValue, 533eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls)) { 534da0f069871343119251d6b0586be356dc2146a62Yang Ni 535eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 536eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = (void (*)())&groupRoot; 537eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = &mClosures; 538da0f069871343119251d6b0586be356dc2146a62Yang Ni 539eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 540eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 541eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 542eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 543eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 544062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 545062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 546eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 547ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 548ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 549eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr); 550eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 5511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 5521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 5531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 5541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 555