rsCpuScriptGroup2.cpp revision fef0cd45027f235126d4fb62bda5ea9037450d9c
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h> 5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h> 6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <set> 9062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#include <sstream> 10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 12da0f069871343119251d6b0586be356dc2146a62Yang Ni 13da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 14da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 15da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 16da0f069871343119251d6b0586be356dc2146a62Yang Ni 171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 212abfcc6d129fe3defddef4540aa95cc445c03a7aYang Ni#include "rsCpuExecutable.h" 221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 25da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 26da0f069871343119251d6b0586be356dc2146a62Yang Ni 27da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 28da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 35da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 37b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 39b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 40b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 42b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross const size_t oldInLen = mutable_kinfo->inLen; 43b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross 44b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross decltype(mutable_kinfo->inStride) oldInStride; 45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : closures) { 48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross // There had better be enough space in mutable_kinfo 51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 53ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (size_t i = 0; i < closure->mNumArg; i++) { 54ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const void* arg = closure->mArgs[i]; 55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* a = (const Allocation*)arg; 56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni eStride * xstart; 59b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inPtr[i] = ptr; 63b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inStride[i] = eStride; 64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = closure->mNumArg; 661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* out = closure->mReturnValue; 68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ostep * xstart; 71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (kinfo->dim.y > 1) { 72b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 75b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsAssert(kinfo->outLen <= 1); 76b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 78b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross cpuClosure->mFunc(kinfo, xstart, xend, ostep); 791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 81b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross mutable_kinfo->inLen = oldInLen; 82b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 85da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 86da0f069871343119251d6b0586be356dc2146a62Yang Ni 87062c287f573ecc06c38ee4295e5627e12c52ac3dYang NiBatch::Batch(CpuScriptGroup2Impl* group, const char* name) : 88062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mGroup(group), mFunc(nullptr) { 89062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mName = strndup(name, strlen(name)); 90062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 91062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 92da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete c; 95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 96062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni free(mName); 97da0f069871343119251d6b0586be356dc2146a62Yang Ni} 98da0f069871343119251d6b0586be356dc2146a62Yang Ni 99ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const { 100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.empty()) { 101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 103da0f069871343119251d6b0586be356dc2146a62Yang Ni 104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* closure = cpuClosure->mClosure; 105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 106062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // closure. 1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& globalDeps = closure->mGlobalDeps; 113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& argDeps = closure->mArgDeps; 114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (CPUClosure* c : mClosures) { 116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* batched = c->mClosure; 117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (globalDeps.find(batched) != globalDeps.end()) { 118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& it = argDeps.find(batched); 121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (it != argDeps.end()) { 122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& args = (*it).second; 123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (const auto &p1 : *args) { 124bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni if (p1.second.get() != nullptr) { 125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 1311c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni // The compiler fusion pass in bcc expects that kernels chained up through 1321c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni // (1st) input and output. 1331c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni 1341c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni const Closure* lastBatched = mClosures.back()->mClosure; 1351c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni const auto& it = argDeps.find(lastBatched); 1361c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni 1371c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni if (it == argDeps.end()) { 1381c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni return true; 1391c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni } 1401c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni 1411c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni const auto& args = (*it).second; 1421c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni for (const auto &p1 : *args) { 1431c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni if (p1.first == 0 && p1.second.get() == nullptr) { 1441c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni // The new closure depends on the last batched closure's return 1451c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni // value (fieldId being nullptr) for its first argument (argument 0) 1461c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni return false; 1471c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni } 1481c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni } 1491c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni 1501c20667f7a174a7c0a1599d34a40c524fe24c615Yang Ni return true; 1511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1531ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 155062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 156062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mExecutable(nullptr), mScriptObj(nullptr) { 157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!mGroup->mClosures.empty()); 158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 1591efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni mCpuRefImpl->lockMutex(); 160062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Batch* batch = new Batch(this, "Batch0"); 161062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Closure* closure: mGroup->mClosures) { 163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc; 164062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 165062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni RsdCpuScriptImpl* si = 166062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 167062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (closure->mIsKernel) { 16814ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala MTLaunchStructForEach mtls; 169062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni si->forEachKernelSetup(funcID->mSlot, &mtls); 170062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 171eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 172eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si); 173eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 175eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (batch->conflict(cc)) { 176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 177062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 178062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << "Batch" << ++i; 179062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch = new Batch(this, ss.str().c_str()); 180eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 181da0f069871343119251d6b0586be356dc2146a62Yang Ni 182eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->mClosures.push_back(cc); 183eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 184da0f069871343119251d6b0586be356dc2146a62Yang Ni 185eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!batch->mClosures.empty()); 186eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 187da0f069871343119251d6b0586be356dc2146a62Yang Ni 188da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 189062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni compile(mGroup->mCacheDir); 190062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr && mExecutable != nullptr) { 191062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Batch* batch : mBatches) { 192062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni batch->resolveFuncPtr(mScriptObj); 193062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 194eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 195062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILITY_LIB 1961efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni mCpuRefImpl->unlockMutex(); 197062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 198062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 199062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid Batch::resolveFuncPtr(void* sharedObj) { 200062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::string funcName(mName); 201062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mClosures.front()->mClosure->mIsKernel) { 202062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni funcName.append(".expand"); 203062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 204062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mFunc = dlsym(sharedObj, funcName.c_str()); 205062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (mFunc != nullptr); 2061ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 2071ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 2081ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 209eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 210eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete batch; 211eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 212bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni delete mExecutable; 213062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: move this dlclose into ~ScriptExecutable(). 214062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj != nullptr) { 215062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni dlclose(mScriptObj); 216062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 217da0f069871343119251d6b0586be356dc2146a62Yang Ni} 218da0f069871343119251d6b0586be356dc2146a62Yang Ni 219da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 220da0f069871343119251d6b0586be356dc2146a62Yang Ni 221da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 222da0f069871343119251d6b0586be356dc2146a62Yang Ni 223edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Nistring getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 224edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = ""; 225edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 226edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // If we're debugging, use the debug library. 227edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 228edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_debug.bc"; 229edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni } 230edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 231edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // Check for a platform specific library 232edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 233edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 234edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // NEON-capable ARMv7a devices can use an accelerated math library 235edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // for all reduced precision scripts. 236edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // ARMv8 does not use NEON, as ASIMD can be used with all precision 237edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // levels. 238edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; 239edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 240edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 241edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#if defined(__i386__) || defined(__x86_64__) 242edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni // x86 devices will use an optimized library. 243edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore_x86.bc"; 244edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#else 245edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni return SYSLIBPATH"/libclcore.bc"; 246edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni#endif 247edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni} 248edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni 249da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 250cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const vector<const char*>& inputs, const vector<string>& kernelBatches, 251062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const vector<string>& invokeBatches, 252cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* outputDir, const char* outputFileName, 253cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* coreLibPath, const char* coreLibRelaxedPath, 2548237638f87ca0e265d050fbb13725b41a795fe5fYang Ni const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant, 255f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham int optLevel, vector<const char*>* args) { 256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 257eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-fPIC"); 258eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-embedRSInfo"); 2598237638f87ca0e265d050fbb13725b41a795fe5fYang Ni if (emitGlobalInfo) { 2608237638f87ca0e265d050fbb13725b41a795fe5fYang Ni args->push_back("-rs-global-info"); 2618237638f87ca0e265d050fbb13725b41a795fe5fYang Ni if (emitGlobalInfoSkipConstant) { 2628237638f87ca0e265d050fbb13725b41a795fe5fYang Ni args->push_back("-rs-global-info-skip-constant"); 2638237638f87ca0e265d050fbb13725b41a795fe5fYang Ni } 2648237638f87ca0e265d050fbb13725b41a795fe5fYang Ni } 265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-mtriple"); 266eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 267eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-bclib"); 268cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(coreLibPath); 269edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni args->push_back("-bclib_relaxed"); 270cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(coreLibRelaxedPath); 271cb17015fed6b11a5028f31cc804a3847e379945dYang Ni for (const char* input : inputs) { 272cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(input); 273eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 274062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : kernelBatches) { 275062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-merge"); 276062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 277062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 278062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const string& batch : invokeBatches) { 279062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back("-invoke"); 280062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni args->push_back(batch.c_str()); 281eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-output_path"); 283cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(outputDir); 2841efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 285f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham args->push_back("-O"); 286f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham args->push_back(std::to_string(optLevel).c_str()); 287f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham 2881efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // The output filename has to be the last, in case we need to pop it out and 2891efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // replace with a different name. 290eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-o"); 291cb17015fed6b11a5028f31cc804a3847e379945dYang Ni args->push_back(outputFileName); 292da0f069871343119251d6b0586be356dc2146a62Yang Ni} 293da0f069871343119251d6b0586be356dc2146a62Yang Ni 294cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Nivoid generateSourceSlot(RsdCpuReferenceImpl* ctxt, 295cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni const Closure& closure, 296cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const std::vector<const char*>& inputs, 297062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream& ss) { 298062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 299062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = funcID->mScript; 300062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 301062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert (!script->isIntrinsic()); 302062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 303062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 304cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni (const RsdCpuScriptImpl *)ctxt->lookupScript(script); 305062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 306062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 307062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 308062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputs.begin(); 309062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 310062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << index << "," << funcID->mSlot << "."; 311062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni} 312062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 313062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni#endif // RS_COMPATIBILTY_LIB 314da0f069871343119251d6b0586be356dc2146a62Yang Ni 315da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 316da0f069871343119251d6b0586be356dc2146a62Yang Ni 317062c287f573ecc06c38ee4295e5627e12c52ac3dYang Nivoid CpuScriptGroup2Impl::compile(const char* cacheDir) { 318da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 319062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mGroup->mClosures.size() < 2) { 320eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 321eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 322da0f069871343119251d6b0586be356dc2146a62Yang Ni 323cb17015fed6b11a5028f31cc804a3847e379945dYang Ni auto comparator = [](const char* str1, const char* str2) -> bool { 324cb17015fed6b11a5028f31cc804a3847e379945dYang Ni return strcmp(str1, str2) < 0; 325cb17015fed6b11a5028f31cc804a3847e379945dYang Ni }; 326cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::set<const char*, decltype(comparator)> inputSet(comparator); 327cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 328062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (Closure* closure : mGroup->mClosures) { 329062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Script* script = closure->mFunctionID.get()->mScript; 330da0f069871343119251d6b0586be356dc2146a62Yang Ni 331062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // If any script is an intrinsic, give up trying fusing the kernels. 332eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (script->isIntrinsic()) { 333eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 334eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 335da0f069871343119251d6b0586be356dc2146a62Yang Ni 336eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const RsdCpuScriptImpl *cpuScript = 337cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script); 338cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni 339cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* bitcodeFilename = cpuScript->getBitcodeFilePath(); 340062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni inputSet.insert(bitcodeFilename); 341062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 342062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 343cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::vector<const char*> inputs(inputSet.begin(), inputSet.end()); 344062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 345062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> kernelBatches; 346062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::vector<string> invokeBatches; 347062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 348062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int i = 0; 349062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& batch : mBatches) { 350062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() > 0); 351da0f069871343119251d6b0586be356dc2146a62Yang Ni 352062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni std::stringstream ss; 353062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ss << batch->mName << ":"; 354062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 355062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!batch->mClosures.front()->mClosure->mIsKernel) { 356062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(batch->size() == 1); 357cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss); 358062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni invokeBatches.push_back(ss.str()); 359062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 360062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni for (const auto& cpuClosure : batch->mClosures) { 361cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss); 362062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 363062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni kernelBatches.push_back(ss.str()); 364062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 366da0f069871343119251d6b0586be356dc2146a62Yang Ni 367433558f0f9abbf07770db288183a15fd261cace2Yabin Cui rsAssert(cacheDir != nullptr); 368433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string objFilePath(cacheDir); 369f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append("/"); 370f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append(mGroup->mName); 371f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni objFilePath.append(".o"); 372433558f0f9abbf07770db288183a15fd261cace2Yabin Cui 373cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* resName = mGroup->mName; 374edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni string coreLibRelaxedPath; 375edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 376edf4ea312cc3f7dd4373f8db5aaf9325ff054c8eYang Ni &coreLibRelaxedPath); 377f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 378f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham int optLevel = getCpuRefImpl()->getContext()->getOptLevel(); 379f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham 380eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni vector<const char*> arguments; 3818237638f87ca0e265d050fbb13725b41a795fe5fYang Ni bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo(); 3828237638f87ca0e265d050fbb13725b41a795fe5fYang Ni bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant(); 383cb17015fed6b11a5028f31cc804a3847e379945dYang Ni setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 384cb17015fed6b11a5028f31cc804a3847e379945dYang Ni resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(), 3858237638f87ca0e265d050fbb13725b41a795fe5fYang Ni emitGlobalInfo, emitGlobalInfoSkipConstant, 386f5029803ae6ce9d92d70b76e7a7cdd8d484f31caverena beckham optLevel, &arguments); 387f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 388f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1, 389cb17015fed6b11a5028f31cc804a3847e379945dYang Ni arguments.data())); 390cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 391cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.push_back(coreLibPath.c_str()); 392cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.push_back(coreLibRelaxedPath.c_str()); 393cb17015fed6b11a5028f31cc804a3847e379945dYang Ni 394cb17015fed6b11a5028f31cc804a3847e379945dYang Ni uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(), 395cb17015fed6b11a5028f31cc804a3847e379945dYang Ni inputs.data(), inputs.size()); 396f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 397cb17015fed6b11a5028f31cc804a3847e379945dYang Ni if (checksum == 0) { 398f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni return; 399f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 400f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 401cb17015fed6b11a5028f31cc804a3847e379945dYang Ni std::stringstream ss; 402cb17015fed6b11a5028f31cc804a3847e379945dYang Ni ss << std::hex << checksum; 403cb17015fed6b11a5028f31cc804a3847e379945dYang Ni const char* checksumStr = ss.str().c_str(); 404f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 405f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 406f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni // Try to load a shared lib from code cache matching filename and checksum 407f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 408f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 4091efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni bool alreadyLoaded = false; 4101efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni std::string cloneName; 4111efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 4121efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr, 4131efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni &alreadyLoaded); 414f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni if (mScriptObj != nullptr) { 4151efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // A shared library named resName is found in code cache directory 4161efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // cacheDir, and loaded with the handle stored in mScriptObj. 4171efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 418f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 419cb17015fed6b11a5028f31cc804a3847e379945dYang Ni getCpuRefImpl()->getContext(), mScriptObj, checksum); 4201efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 421f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni if (mExecutable != nullptr) { 4221efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // The loaded shared library in mScriptObj has a matching checksum. 4231efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // An executable object has been created. 424cb17015fed6b11a5028f31cc804a3847e379945dYang Ni return; 425f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 4261efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 4271efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni ALOGV("Failed to create an executable object from so file due to " 4281efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni "mismatching checksum"); 4291efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 4301efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni if (alreadyLoaded) { 4311efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // The shared object found in code cache has already been loaded. 4321efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // A different file name is needed for the new shared library, to 4331efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // avoid corrupting the currently loaded instance. 4341efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 4351efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni cloneName.append(resName); 4361efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni cloneName.append("#"); 4371efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni cloneName.append(SharedLibraryUtils::getRandomString(6).string()); 4381efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 4391efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // The last element in arguments is the output filename. 4401efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni arguments.pop_back(); 4411efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni arguments.push_back(cloneName.c_str()); 4421efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni } 4431efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 444f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni dlclose(mScriptObj); 445f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni mScriptObj = nullptr; 446f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni } 447f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 448f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 449f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni // Fuse the input kernels and generate native code in an object file 450f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni //===--------------------------------------------------------------------===// 451f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni 452f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.push_back("-build-checksum"); 453cb17015fed6b11a5028f31cc804a3847e379945dYang Ni arguments.push_back(checksumStr); 454f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.push_back(nullptr); 455eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 4562fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, 457f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.size()-1, 458f02a2b0a2749d4a4f07edbc23eddff2e51d11b72Yang Ni arguments.data()); 4592fa8a238dd69afebdeb757adcb1d674043d78e32Pirama Arumuga Nainar if (!compiled) { 460eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 461eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 462da0f069871343119251d6b0586be356dc2146a62Yang Ni 463eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 464eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Create and load the shared lib 465eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 466da0f069871343119251d6b0586be356dc2146a62Yang Ni 4674c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines if (!SharedLibraryUtils::createSharedLibrary( 4684c368af7e705f0bcb77fa99495b2e33ef20d2699Stephen Hines getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) { 469eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Failed to link object file '%s'", resName); 4708b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni unlink(objFilePath.c_str()); 471eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 472eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 473da0f069871343119251d6b0586be356dc2146a62Yang Ni 4748b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni unlink(objFilePath.c_str()); 4758b94222cc8ea83c780c98b22dd1921f392a2bcf6Yang Ni 476062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 477062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mScriptObj == nullptr) { 478eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Unable to load '%s'", resName); 479eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 480eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 481da0f069871343119251d6b0586be356dc2146a62Yang Ni 4821efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni if (alreadyLoaded) { 4831efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // Delete the temporary, random-named file that we created to avoid 4841efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni // interfering with an already loaded shared library. 4851efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni string cloneFilePath(cacheDir); 4861efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni cloneFilePath.append("/"); 4871efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni cloneFilePath.append(cloneName.c_str()); 4881efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni cloneFilePath.append(".so"); 4891efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni unlink(cloneFilePath.c_str()); 4901efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni } 4911efae29f4bbe6c165caf6dfc4b89cf8a5f8c469bYang Ni 492eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 493bd0af2d161e36e52e6782ccb2d15dd5a36467704Yang Ni getCpuRefImpl()->getContext(), 494062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mScriptObj); 495da0f069871343119251d6b0586be356dc2146a62Yang Ni 496da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 4971ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4981ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 4991ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 500eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (auto batch : mBatches) { 501eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->setGlobalsForBatch(); 502eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->run(); 503eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 5041ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 5051ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 506da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 507eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 508eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 509062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const IDBase* funcID = closure->mFunctionID.get(); 510062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* s = funcID->mScript;; 511eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& p : closure->mGlobals) { 512fef0cd45027f235126d4fb62bda5ea9037450d9cYang Ni const int64_t value = p.second.first; 513eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int size = p.second.second; 514fef0cd45027f235126d4fb62bda5ea9037450d9cYang Ni if (value == 0 && size == 0) { 515eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This indicates the current closure depends on another closure for a 516eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // global in their shared module (script). In this case we don't need to 517eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // copy the value. For example, an invoke intializes a global variable 518eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // which a kernel later reads. 519eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni continue; 520eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 521ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni rsAssert(p.first != nullptr); 522062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni Script* script = p.first->mScript; 523cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl(); 524062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const RsdCpuScriptImpl *cpuScript = 525cbff7bcc4aacdc39d56628fa5c7c50518d52748cYang Ni (const RsdCpuScriptImpl *)ctxt->lookupScript(script); 526062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni int slot = p.first->mSlot; 527062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ScriptExecutable* exec = mGroup->getExecutable(); 528062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (exec != nullptr) { 529062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const char* varName = cpuScript->getFieldName(slot); 530062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni void* addr = exec->getFieldAddress(varName); 531062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 532062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 533062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (rs_object_base*)addr, (ObjectBase*)value); 534062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 535062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni memcpy(addr, (const void*)&value, size); 536062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 537eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 538062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 539062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (size < 0) { 540062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVarObj(slot, (ObjectBase*)value); 541062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 542062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni s->setVar(slot, (const void*)&value, size); 543062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 544eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 545eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 5461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 5471ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 5481ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 549da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 550062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (!mClosures.front()->mClosure->mIsKernel) { 551062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(mClosures.size() == 1); 552062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 553062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // This batch contains a single closure for an invoke function 554062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni CPUClosure* cc = mClosures.front(); 555062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const Closure* c = cc->mClosure; 556062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 557062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 558062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // TODO: Need align pointers for x86_64. 559062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 560062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 561062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } else { 562062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 563062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni rsAssert(invokeID != nullptr); 564062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 565062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 566062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 567062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni return; 568062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni } 569062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni 570062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni if (mFunc != nullptr) { 57114ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala MTLaunchStructForEach mtls; 572eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 573eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 574eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 575eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 576ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 577ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 578eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 579eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls); 580eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 581eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 582eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = nullptr; 583062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni mtls.kernel = (ForEachFunc_t)mFunc; 584eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 58514ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala mGroup->getCpuRefImpl()->launchForEach( 586ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 587ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 588eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 589eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, &mtls); 590eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 591eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 592eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 593eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 594eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 595eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 596062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 597062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 598eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 599ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 600ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 601062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni nullptr, 0, nullptr); 602eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 603da0f069871343119251d6b0586be356dc2146a62Yang Ni 604eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 605eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 60614ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala MTLaunchStructForEach mtls; 607da0f069871343119251d6b0586be356dc2146a62Yang Ni 608ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 609ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, 610eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mReturnValue, 611eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls)) { 612da0f069871343119251d6b0586be356dc2146a62Yang Ni 613eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 61414ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala mtls.kernel = &groupRoot; 615eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = &mClosures; 616da0f069871343119251d6b0586be356dc2146a62Yang Ni 61714ce007a633b10e3b9a3fae29d8f53a7e8c9b59fMatt Wala mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls); 618eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 619eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 620eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 621eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 622062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni const ScriptKernelID* kernelID = 623062c287f573ecc06c38ee4295e5627e12c52ac3dYang Ni (const ScriptKernelID*)closure->mFunctionID.get(); 624eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 625ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 626ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 627eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr); 628eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 6291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 6301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 6311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 6321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 633