rsCpuScriptGroup2.cpp revision 433558f0f9abbf07770db288183a15fd261cace2
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdio.h> 5433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <stdlib.h> 6433558f0f9abbf07770db288183a15fd261cace2Yabin Cui#include <unistd.h> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 9da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 10da0f069871343119251d6b0586be356dc2146a62Yang Ni 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 12da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 13da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h> 14da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 15da0f069871343119251d6b0586be356dc2146a62Yang Ni 161ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 23da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 24da0f069871343119251d6b0586be356dc2146a62Yang Ni 25da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 26da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 33da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, 361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 37ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr; 38eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; 39eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void **oldIns = kparams->ins; 40eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni uint32_t *oldStrides = kparams->inEStrides; 41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 42eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<const void*> ins(DefaultKernelArgCount); 43eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<uint32_t> strides(DefaultKernelArgCount); 44eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 45eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : closures) { 46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni auto in_iter = ins.begin(); 49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni auto stride_iter = strides.begin(); 50eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 51ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (size_t i = 0; i < closure->mNumArg; i++) { 52ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const void* arg = closure->mArgs[i]; 53eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* a = (const Allocation*)arg; 54eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni eStride * xstart; 57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kparams->dimY > 1) { 58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ptr += a->mHal.drvState.lod[0].stride * kparams->y; 59eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 60eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni *in_iter++ = ptr; 61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni *stride_iter++ = eStride; 62eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 631ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->ins = &ins[0]; 65eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->inEStrides = &strides[0]; 661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* out = closure->mReturnValue; 68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ostep * xstart; 71eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kparams->dimY > 1) { 72eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ptr += out->mHal.drvState.lod[0].stride * kparams->y; 73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 75eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->out = (void*)ptr; 761ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 77eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->usr = cpuClosure->mUsrPtr; 781ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 79eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mFunc(kparams, xstart, xend, ostep); 801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 82eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->ins = oldIns; 83eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->inEStrides = oldStrides; 84eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->usr = &closures; 851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 861ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 87da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 88da0f069871343119251d6b0586be356dc2146a62Yang Ni 89da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 90eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 91eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete c; 92eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 93eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mScriptObj) { 94eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni dlclose(mScriptObj); 95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 96da0f069871343119251d6b0586be356dc2146a62Yang Ni} 97da0f069871343119251d6b0586be356dc2146a62Yang Ni 98ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Nibool Batch::conflict(CPUClosure* cpuClosure) const { 99eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.empty()) { 100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 102da0f069871343119251d6b0586be356dc2146a62Yang Ni 103ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* closure = cpuClosure->mClosure; 104ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 105ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (closure->mKernelID.get() == nullptr || 106eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mClosures.front()->mClosure->mKernelID.get() == nullptr) { 107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // closure. 1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1101ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 112ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& globalDeps = closure->mGlobalDeps; 113ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& argDeps = closure->mArgDeps; 114ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 115ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (CPUClosure* c : mClosures) { 116ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const Closure* batched = c->mClosure; 117ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (globalDeps.find(batched) != globalDeps.end()) { 118ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 120ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& it = argDeps.find(batched); 121ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (it != argDeps.end()) { 122ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni const auto& args = (*it).second; 123ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni for (const auto &p1 : *args) { 124ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (p1.second->get() != nullptr) { 125ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni return true; 126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 130ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni 131eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 1361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { 137eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!mGroup->mClosures.empty()); 138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni Batch* batch = new Batch(this); 140eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Closure* closure: mGroup->mClosures) { 141eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni RsdCpuScriptImpl* si; 143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc; 144eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kernelID != nullptr) { 145eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); 146eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 147eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni si->forEachKernelSetup(kernelID->mSlot, &mtls); 148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // TODO: Is mtls.fep.usrLen ever used? 149eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, 150eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr, mtls.fep.usrLen); 151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript( 153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mInvokeID->mScript); 154eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si); 155eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1561ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (batch->conflict(cc)) { 158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 159eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch = new Batch(this); 160eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 161da0f069871343119251d6b0586be356dc2146a62Yang Ni 162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->mClosures.push_back(cc); 163eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 164da0f069871343119251d6b0586be356dc2146a62Yang Ni 165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!batch->mClosures.empty()); 166eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 167da0f069871343119251d6b0586be356dc2146a62Yang Ni 168da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 169eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 170ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni batch->tryToCreateFusedKernel(mGroup->mCacheDir); 171eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 172da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 1731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1751ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 177eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete batch; 178eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 179da0f069871343119251d6b0586be356dc2146a62Yang Ni} 180da0f069871343119251d6b0586be356dc2146a62Yang Ni 181da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 182da0f069871343119251d6b0586be356dc2146a62Yang Ni 183da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 184da0f069871343119251d6b0586be356dc2146a62Yang Ni 185da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) { 186eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni unsigned found = path.find_last_of("/\\"); 187eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return path.substr(found + 1); 188da0f069871343119251d6b0586be356dc2146a62Yang Ni} 189da0f069871343119251d6b0586be356dc2146a62Yang Ni 190da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 191eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const vector<string>& inputs, const vector<int>& kernels, 192eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& output_dir, const string& output_filename, 193eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& rsLib, vector<const char*>* args) { 194eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 195eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-fPIC"); 196eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-embedRSInfo"); 197eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-mtriple"); 198eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 199eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-bclib"); 200eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(rsLib.c_str()); 201eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const string& input : inputs) { 202eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(input.c_str()); 203eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 204eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (int kernel : kernels) { 205eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-k"); 206eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string strKernel = std::to_string(kernel); 207eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(strKernel.c_str()); 208eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 209eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-output_path"); 210eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(output_dir.c_str()); 211eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-o"); 212eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(output_filename.c_str()); 213eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(nullptr); 214da0f069871343119251d6b0586be356dc2146a62Yang Ni} 215da0f069871343119251d6b0586be356dc2146a62Yang Ni 216da0f069871343119251d6b0586be356dc2146a62Yang Nistring convertListToString(int n, const char* const* strs) { 217eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string ret; 218eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ret.append(strs[0]); 219eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (int i = 1; i < n; i++) { 220eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ret.append(" "); 221eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ret.append(strs[i]); 222eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 223eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return ret; 224da0f069871343119251d6b0586be356dc2146a62Yang Ni} 225da0f069871343119251d6b0586be356dc2146a62Yang Ni 226da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments, 227da0f069871343119251d6b0586be356dc2146a62Yang Ni const string& commandLine) { 228eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const pid_t pid = fork(); 229da0f069871343119251d6b0586be356dc2146a62Yang Ni 230eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (pid == -1) { 231eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Couldn't fork for bcc execution"); 232eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 233eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 234da0f069871343119251d6b0586be356dc2146a62Yang Ni 235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (pid == 0) { 236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Child process 237eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGV("Invoking BCC with: %s", commandLine.c_str()); 238eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); 239da0f069871343119251d6b0586be356dc2146a62Yang Ni 240eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("execv() failed: %s", strerror(errno)); 241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni abort(); 242eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 243eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 244da0f069871343119251d6b0586be356dc2146a62Yang Ni 245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Parent process 246eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int status = 0; 247eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const pid_t w = waitpid(pid, &status, 0); 248eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (w == -1) { 249eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 250eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 251da0f069871343119251d6b0586be356dc2146a62Yang Ni 252eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { 253eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("bcc terminated unexpectedly"); 254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 255eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 256da0f069871343119251d6b0586be356dc2146a62Yang Ni 257eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return true; 258da0f069871343119251d6b0586be356dc2146a62Yang Ni} 259da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 260da0f069871343119251d6b0586be356dc2146a62Yang Ni 261da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 262da0f069871343119251d6b0586be356dc2146a62Yang Ni 263da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::tryToCreateFusedKernel(const char *cacheDir) { 264da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.size() < 2) { 266eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 267eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 268da0f069871343119251d6b0586be356dc2146a62Yang Ni 269eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 270eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Fuse the input kernels and generate native code in an object file 271eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 272da0f069871343119251d6b0586be356dc2146a62Yang Ni 273eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<string> inputFiles; 274eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<int> slots; 275da0f069871343119251d6b0586be356dc2146a62Yang Ni 276eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 277eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 278eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 279eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Script* script = kernelID->mScript; 280da0f069871343119251d6b0586be356dc2146a62Yang Ni 281eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (script->isIntrinsic()) { 282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 283eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 284da0f069871343119251d6b0586be356dc2146a62Yang Ni 285eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const RsdCpuScriptImpl *cpuScript = 286eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 287da0f069871343119251d6b0586be356dc2146a62Yang Ni 288eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 289da0f069871343119251d6b0586be356dc2146a62Yang Ni 290eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni inputFiles.push_back(bitcodeFilename); 291eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni slots.push_back(kernelID->mSlot); 292eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 293da0f069871343119251d6b0586be356dc2146a62Yang Ni 294433558f0f9abbf07770db288183a15fd261cace2Yabin Cui rsAssert(cacheDir != nullptr); 295433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string objFilePath(cacheDir); 296433558f0f9abbf07770db288183a15fd261cace2Yabin Cui objFilePath.append("/fusedXXXXXX.o"); 297433558f0f9abbf07770db288183a15fd261cace2Yabin Cui // Find unique object file name, to make following file names unique. 298433558f0f9abbf07770db288183a15fd261cace2Yabin Cui int tempfd = mkstemps(&objFilePath[0], 2); 299433558f0f9abbf07770db288183a15fd261cace2Yabin Cui if (tempfd == -1) { 300433558f0f9abbf07770db288183a15fd261cace2Yabin Cui return; 301433558f0f9abbf07770db288183a15fd261cace2Yabin Cui } 302433558f0f9abbf07770db288183a15fd261cace2Yabin Cui TEMP_FAILURE_RETRY(close(tempfd)); 303433558f0f9abbf07770db288183a15fd261cace2Yabin Cui 304433558f0f9abbf07770db288183a15fd261cace2Yabin Cui string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); 305eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string rsLibPath(SYSLIBPATH"/libclcore.bc"); 306eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni vector<const char*> arguments; 307eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath, 308eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni &arguments); 309eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string commandLine = 310eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni convertListToString(arguments.size() - 1, arguments.data()); 311eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 312eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!fuseAndCompile(arguments.data(), commandLine)) { 313433558f0f9abbf07770db288183a15fd261cace2Yabin Cui unlink(objFilePath.c_str()); 314eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 315eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 316da0f069871343119251d6b0586be356dc2146a62Yang Ni 317eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 318eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Create and load the shared lib 319eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 320da0f069871343119251d6b0586be356dc2146a62Yang Ni 321eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const char* resName = outputFileName.c_str(); 322da0f069871343119251d6b0586be356dc2146a62Yang Ni 323eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { 324eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Failed to link object file '%s'", resName); 325eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 326eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 327da0f069871343119251d6b0586be356dc2146a62Yang Ni 328eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 329eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mSharedObj == nullptr) { 330eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Unable to load '%s'", resName); 331eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 332eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 333da0f069871343119251d6b0586be356dc2146a62Yang Ni 334eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 335eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, // RS context. Unused. 336eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mSharedObj); 337da0f069871343119251d6b0586be356dc2146a62Yang Ni 338da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 3391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 342eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (auto batch : mBatches) { 343eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->setGlobalsForBatch(); 344eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->run(); 345eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 3461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3471ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 348da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 349eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 350eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 351eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 352eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni Script* s; 353eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kernelID != nullptr) { 354eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni s = kernelID->mScript; 355eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 356eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni s = cpuClosure->mClosure->mInvokeID->mScript; 357eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 358eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& p : closure->mGlobals) { 359eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void* value = p.second.first; 360eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int size = p.second.second; 361eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (value == nullptr && size == 0) { 362eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This indicates the current closure depends on another closure for a 363eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // global in their shared module (script). In this case we don't need to 364eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // copy the value. For example, an invoke intializes a global variable 365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // which a kernel later reads. 366eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni continue; 367eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 368ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni rsAssert(p.first != nullptr); 369ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)", 370ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure, p.first, p.first->mScript, p.first->mSlot); 371eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 372eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (size < 0) { 373eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni s->setVarObj(p.first->mSlot, (ObjectBase*)value); 374eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 375eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni s->setVar(p.first->mSlot, (const void*)&value, size); 376eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 377eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 3781ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 3791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 381da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 382eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mExecutable != nullptr) { 383eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 384eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 385eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 386eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 387eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 388ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 389ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 390eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 391eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls); 392eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = nullptr; 395eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = mExecutable->getForEachFunction(0); 396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 397eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads( 398ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)firstCpuClosure->mClosure->mArgs, 399ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni firstCpuClosure->mClosure->mNumArg, 400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, &mtls); 402eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 403eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 404eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.size() == 1 && 407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mClosures.front()->mClosure->mKernelID.get() == nullptr) { 408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This closure is for an invoke function 409eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc = mClosures.front(); 410eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* c = cc->mClosure; 411eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptInvokeID* invokeID = c->mInvokeID; 412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(invokeID != nullptr); 413eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 414eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 415eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 416da0f069871343119251d6b0586be356dc2146a62Yang Ni 417eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 418eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 419eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 420eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 421ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 422ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 423eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mUsrPtr, cpuClosure->mUsrSize, 424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr); 425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 426da0f069871343119251d6b0586be356dc2146a62Yang Ni 427eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 428eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 429eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 430da0f069871343119251d6b0586be356dc2146a62Yang Ni 431ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 432ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, 433eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mReturnValue, 434eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls)) { 435da0f069871343119251d6b0586be356dc2146a62Yang Ni 436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 437eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = (void (*)())&groupRoot; 438eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = &mClosures; 439da0f069871343119251d6b0586be356dc2146a62Yang Ni 440eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 441eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 442eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 443eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 444eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 445eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 446eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 447ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni (const Allocation**)closure->mArgs, 448ff2bb54ebf593b1d19d3a2e4cfa70a8ea4432c0dYang Ni closure->mNumArg, closure->mReturnValue, 449eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr); 450eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 4531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 4541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 455