rsCpuScriptGroup2.cpp revision 1c20667f7a174a7c0a1599d34a40c524fe24c615
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h> 5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h> 6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set> 9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream> 10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 12da0f069871343119251d6b0586be356dc2146a62Yang Ni 13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 16da0f069871343119251d6b0586be356dc2146a62Yang Ni 171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h" 221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 26da0f069871343119251d6b0586be356dc2146a62Yang Ni 27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const size_t oldInLen = mutable_kinfo->inLen; 43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross decltype(mutable_kinfo->inStride) oldInStride; 45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : closures) { 48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross // There had better be enough space in mutable_kinfo 51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (size_t i = 0; i < closure->mNumArg; i++) { 54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const void* arg = closure->mArgs[i]; 55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* a = (const Allocation*)arg; 56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni eStride * xstart; 59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inPtr[i] = ptr; 63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inStride[i] = eStride; 64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = closure->mNumArg; 661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* out = closure->mReturnValue; 68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ostep * xstart; 71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->outLen <= 1); 76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross cpuClosure->mFunc(kinfo, xstart, xend, ostep); 791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = oldInLen; 82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 85da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 86da0f069871343119251d6b0586be356dc2146a62Yang Ni 87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) : 88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mGroup(group), mFunc(nullptr) { 89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mName = strndup(name, strlen(name)); 90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete c; 95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni free(mName); 97da0f069871343119251d6b0586be356dc2146a62Yang Ni} 98da0f069871343119251d6b0586be356dc2146a62Yang Ni 99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const { 100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.empty()) { 101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 103da0f069871343119251d6b0586be356dc2146a62Yang Ni 104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* closure = cpuClosure->mClosure; 105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // closure. 1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& globalDeps = closure->mGlobalDeps; 113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& argDeps = closure->mArgDeps; 114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (CPUClosure* c : mClosures) { 116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* batched = c->mClosure; 117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (globalDeps.find(batched) != globalDeps.end()) { 118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& it = argDeps.find(batched); 121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (it != argDeps.end()) { 122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& args = (*it).second; 123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (const auto &p1 : *args) { 124bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni if (p1.second.get() != nullptr) { 125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 1311c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni // The compiler fusion pass in bcc expects that kernels chained up through 1321c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni // (1st) input and output. 1331c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni 1341c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni const Closure* lastBatched = mClosures.back()->mClosure; 1351c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni const auto& it = argDeps.find(lastBatched); 1361c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni 1371c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni if (it == argDeps.end()) { 1381c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni return true; 1391c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni } 1401c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni 1411c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni const auto& args = (*it).second; 1421c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni for (const auto &p1 : *args) { 1431c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni if (p1.first == 0 && p1.second.get() == nullptr) { 1441c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni // The new closure depends on the last batched closure's return 1451c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni // value (fieldId being nullptr) for its first argument (argument 0) 1461c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni return false; 1471c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni } 1481c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni } 1491c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni 1501c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni return true; 1511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1531ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 155062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 156062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mExecutable(nullptr), mScriptObj(nullptr) { 157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!mGroup->mClosures.empty()); 158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 159062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Batch* batch = new Batch(this, "Batch0"); 160062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 161eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Closure* closure: mGroup->mClosures) { 162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc; 163062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 164062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni RsdCpuScriptImpl* si = 165062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 166062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (closure->mIsKernel) { 167eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 168062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni si->forEachKernelSetup(funcID->mSlot, &mtls); 169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 170eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 171eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si); 172eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 174eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (batch->conflict(cc)) { 175eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 176062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << "Batch" << ++i; 178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch = new Batch(this, ss.str().c_str()); 179eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 180da0f069871343119251d6b0586be356dc2146a62Yang Ni 181eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->mClosures.push_back(cc); 182eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 183da0f069871343119251d6b0586be356dc2146a62Yang Ni 184eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!batch->mClosures.empty()); 185eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 186da0f069871343119251d6b0586be356dc2146a62Yang Ni 187da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 188062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni compile(mGroup->mCacheDir); 189062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr && mExecutable != nullptr) { 190062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Batch* batch : mBatches) { 191062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch->resolveFuncPtr(mScriptObj); 192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 193eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 194062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILITY_LIB 195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 196062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 197062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) { 198062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::string funcName(mName); 199062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mClosures.front()->mClosure->mIsKernel) { 200062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni funcName.append(".expand"); 201062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 202062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mFunc = dlsym(sharedObj, funcName.c_str()); 203062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (mFunc != nullptr); 2041ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 2051ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 2061ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 207eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 208eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete batch; 209eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 210bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni delete mExecutable; 211062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: move this dlclose into ~ScriptExecutable(). 212062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr) { 213062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni dlclose(mScriptObj); 214062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 215da0f069871343119251d6b0586be356dc2146a62Yang Ni} 216da0f069871343119251d6b0586be356dc2146a62Yang Ni 217da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 218da0f069871343119251d6b0586be356dc2146a62Yang Ni 219da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 220da0f069871343119251d6b0586be356dc2146a62Yang Ni 221edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 222edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = ""; 223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // If we're debugging, use the debug library. 225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_debug.bc"; 227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni } 228edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 229edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // Check for a platform specific library 230edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 231edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 232edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // NEON-capable ARMv7a devices can use an accelerated math library 233edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // for all reduced precision scripts. 234edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // ARMv8 does not use NEON, as ASIMD can be used with all precision 235edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // levels. 236edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; 237edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 238edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 239edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__) 240edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // x86 devices will use an optimized library. 241edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_x86.bc"; 242edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else 243edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore.bc"; 244edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 245edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni} 246edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 247da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 248cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const vector<const char*>& inputs, const vector<string>& kernelBatches, 249062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const vector<string>& invokeBatches, 250cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* outputDir, const char* outputFileName, 251cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* coreLibPath, const char* coreLibRelaxedPath, 2528237638f87ca0e265d050fbb13725b41a795fe5fYang Ni const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant, 253edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni vector<const char*>* args) { 254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 255eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-fPIC"); 256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-embedRSInfo"); 2578237638f87ca0e265d050fbb13725b41a795fe5fYang Ni if (emitGlobalInfo) { 2588237638f87ca0e265d050fbb13725b41a795fe5fYang Ni args->push_back("-rs-global-info"); 2598237638f87ca0e265d050fbb13725b41a795fe5fYang Ni if (emitGlobalInfoSkipConstant) { 2608237638f87ca0e265d050fbb13725b41a795fe5fYang Ni args->push_back("-rs-global-info-skip-constant"); 2618237638f87ca0e265d050fbb13725b41a795fe5fYang Ni } 2628237638f87ca0e265d050fbb13725b41a795fe5fYang Ni } 263eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-mtriple"); 264eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-bclib"); 266cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(coreLibPath); 267edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni args->push_back("-bclib_relaxed"); 268cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(coreLibRelaxedPath); 269cb17015fed6b11a5028f31cc804a3847e379945dYang Ni for (const char* input : inputs) { 270cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(input); 271eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 272062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : kernelBatches) { 273062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-merge"); 274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : invokeBatches) { 277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-invoke"); 278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 279eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 280eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-output_path"); 281cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(outputDir); 282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-o"); 283cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(outputFileName); 284da0f069871343119251d6b0586be356dc2146a62Yang Ni} 285da0f069871343119251d6b0586be356dc2146a62Yang Ni 286cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Nivoid generateSourceSlot(RsdCpuReferenceImpl* ctxt, 287cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni const Closure& closure, 288cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const std::vector<const char*>& inputs, 289062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream& ss) { 290062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 291062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = funcID->mScript; 292062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 293062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (!script->isIntrinsic()); 294062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 295062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 296cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni (const RsdCpuScriptImpl *)ctxt->lookupScript(script); 297062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 298062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 299062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 300062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputs.begin(); 301062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 302062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << index << "," << funcID->mSlot << "."; 303062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 304062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILTY_LIB 306da0f069871343119251d6b0586be356dc2146a62Yang Ni 307da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 308da0f069871343119251d6b0586be356dc2146a62Yang Ni 309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) { 310da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mGroup->mClosures.size() < 2) { 312eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 313eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 314da0f069871343119251d6b0586be356dc2146a62Yang Ni 315cb17015fed6b11a5028f31cc804a3847e379945dYang Ni auto comparator = [](const char* str1, const char* str2) -> bool { 316cb17015fed6b11a5028f31cc804a3847e379945dYang Ni return strcmp(str1, str2) < 0; 317cb17015fed6b11a5028f31cc804a3847e379945dYang Ni }; 318cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::set<const char*, decltype(comparator)> inputSet(comparator); 319cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 320062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Closure* closure : mGroup->mClosures) { 321062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = closure->mFunctionID.get()->mScript; 322da0f069871343119251d6b0586be356dc2146a62Yang Ni 323062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // If any script is an intrinsic, give up trying fusing the kernels. 324eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (script->isIntrinsic()) { 325eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 326eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 327da0f069871343119251d6b0586be356dc2146a62Yang Ni 328eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const RsdCpuScriptImpl *cpuScript = 329cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script); 330cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni 331cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* bitcodeFilename = cpuScript->getBitcodeFilePath(); 332062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputSet.insert(bitcodeFilename); 333062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 334062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 335cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::vector<const char*> inputs(inputSet.begin(), inputSet.end()); 336062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 337062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> kernelBatches; 338062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> invokeBatches; 339062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 340062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 341062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& batch : mBatches) { 342062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() > 0); 343da0f069871343119251d6b0586be356dc2146a62Yang Ni 344062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 345062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << batch->mName << ":"; 346062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 347062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!batch->mClosures.front()->mClosure->mIsKernel) { 348062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() == 1); 349cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss); 350062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni invokeBatches.push_back(ss.str()); 351062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 352062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& cpuClosure : batch->mClosures) { 353cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss); 354062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 355062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni kernelBatches.push_back(ss.str()); 356062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 357eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 358da0f069871343119251d6b0586be356dc2146a62Yang Ni 359433558f0f9abbf07770db288183a15fd261cace2Yabin Cui rsAssert(cacheDir != nullptr); 360433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string objFilePath(cacheDir); 361f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append("/"); 362f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append(mGroup->mName); 363f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append(".o"); 364433558f0f9abbf07770db288183a15fd261cace2Yabin Cui 365cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* resName = mGroup->mName; 366edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni string coreLibRelaxedPath; 367edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 368edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni &coreLibRelaxedPath); 369f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 370eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni vector<const char*> arguments; 3718237638f87ca0e265d050fbb13725b41a795fe5fYang Ni bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo(); 3728237638f87ca0e265d050fbb13725b41a795fe5fYang Ni bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant(); 373cb17015fed6b11a5028f31cc804a3847e379945dYang Ni setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 374cb17015fed6b11a5028f31cc804a3847e379945dYang Ni resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(), 3758237638f87ca0e265d050fbb13725b41a795fe5fYang Ni emitGlobalInfo, emitGlobalInfoSkipConstant, 376f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni &arguments); 377f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 378f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1, 379cb17015fed6b11a5028f31cc804a3847e379945dYang Ni arguments.data())); 380cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 381cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.push_back(coreLibPath.c_str()); 382cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.push_back(coreLibRelaxedPath.c_str()); 383cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 384cb17015fed6b11a5028f31cc804a3847e379945dYang Ni uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(), 385cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.data(), inputs.size()); 386f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 387cb17015fed6b11a5028f31cc804a3847e379945dYang Ni if (checksum == 0) { 388f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni return; 389f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 390f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 391cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::stringstream ss; 392cb17015fed6b11a5028f31cc804a3847e379945dYang Ni ss << std::hex << checksum; 393cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* checksumStr = ss.str().c_str(); 394f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 395f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 396f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni // Try to load a shared lib from code cache matching filename and checksum 397f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 398f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 399f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 400f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni if (mScriptObj != nullptr) { 401f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 402cb17015fed6b11a5028f31cc804a3847e379945dYang Ni getCpuRefImpl()->getContext(), mScriptObj, checksum); 403f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni if (mExecutable != nullptr) { 404cb17015fed6b11a5028f31cc804a3847e379945dYang Ni return; 405f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } else { 406f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni ALOGE("Failed to create an executable object from so file"); 407f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 408f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni dlclose(mScriptObj); 409f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mScriptObj = nullptr; 410f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 411f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 412f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 413f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni // Fuse the input kernels and generate native code in an object file 414f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 415f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 416f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.push_back("-build-checksum"); 417cb17015fed6b11a5028f31cc804a3847e379945dYang Ni arguments.push_back(checksumStr); 418f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.push_back(nullptr); 419eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 4202fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, 421f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.size()-1, 422f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.data()); 4232fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar if (!compiled) { 424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 426da0f069871343119251d6b0586be356dc2146a62Yang Ni 427eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 428eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Create and load the shared lib 429eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 430da0f069871343119251d6b0586be356dc2146a62Yang Ni 4314c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines if (!SharedLibraryUtils::createSharedLibrary( 4324c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) { 433eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Failed to link object file '%s'", resName); 4348b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni unlink(objFilePath.c_str()); 435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 437da0f069871343119251d6b0586be356dc2146a62Yang Ni 4388b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni unlink(objFilePath.c_str()); 4398b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni 440062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 441062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj == nullptr) { 442eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Unable to load '%s'", resName); 443eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 444eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 445da0f069871343119251d6b0586be356dc2146a62Yang Ni 446eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 447bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni getCpuRefImpl()->getContext(), 448062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj); 449da0f069871343119251d6b0586be356dc2146a62Yang Ni 450da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 4511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 4531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 454eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (auto batch : mBatches) { 455eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->setGlobalsForBatch(); 456eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->run(); 457eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4581ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4591ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 460da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 461eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 462eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 463062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 464062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* s = funcID->mScript;; 465eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& p : closure->mGlobals) { 466eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void* value = p.second.first; 467eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int size = p.second.second; 468eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (value == nullptr && size == 0) { 469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This indicates the current closure depends on another closure for a 470eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // global in their shared module (script). In this case we don't need to 471eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // copy the value. For example, an invoke intializes a global variable 472eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // which a kernel later reads. 473eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni continue; 474eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 475ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni rsAssert(p.first != nullptr); 476062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* script = p.first->mScript; 477cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl(); 478062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 479cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni (const RsdCpuScriptImpl *)ctxt->lookupScript(script); 480062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int slot = p.first->mSlot; 481062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ScriptExecutable* exec = mGroup->getExecutable(); 482062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (exec != nullptr) { 483062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const char* varName = cpuScript->getFieldName(slot); 484062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni void* addr = exec->getFieldAddress(varName); 485062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 486062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 487062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (rs_object_base*)addr, (ObjectBase*)value); 488062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 489062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni memcpy(addr, (const void*)&value, size); 490062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 491eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 492062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 493062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 494062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVarObj(slot, (ObjectBase*)value); 495062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 496062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVar(slot, (const void*)&value, size); 497062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 498eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 499eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 5001ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 5011ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 5021ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 503da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 504062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!mClosures.front()->mClosure->mIsKernel) { 505062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(mClosures.size() == 1); 506062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 507062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // This batch contains a single closure for an invoke function 508062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni CPUClosure* cc = mClosures.front(); 509062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Closure* c = cc->mClosure; 510062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 511062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 512062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: Need align pointers for x86_64. 513062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 514062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 515062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 516062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 517062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(invokeID != nullptr); 518062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 519062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 520062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 521062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni return; 522062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 523062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 524062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 525eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 526eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 527eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 528eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 529eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 530ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 531ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 532eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 533eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls); 534eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 535eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 536eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = nullptr; 537062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mtls.kernel = (ForEachFunc_t)mFunc; 538eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 539eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads( 540ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 541ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 542eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 543eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, &mtls); 544eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 545eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 546eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 547eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 548eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 549eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 550062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 551062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 552eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 553ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 554ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 555062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni nullptr, 0, nullptr); 556eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 557da0f069871343119251d6b0586be356dc2146a62Yang Ni 558eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 559eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 560eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 561da0f069871343119251d6b0586be356dc2146a62Yang Ni 562ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 563ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, 564eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mReturnValue, 565eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls)) { 566da0f069871343119251d6b0586be356dc2146a62Yang Ni 567eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 568eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = (void (*)())&groupRoot; 569eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = &mClosures; 570da0f069871343119251d6b0586be356dc2146a62Yang Ni 571eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 572eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 573eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 574eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 575eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 576062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 577062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 578eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 579ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 580ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 581eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr); 582eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 5831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 5841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 5851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 5861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 587