rsCpuScriptGroup2.cpp revision da0f069871343119251d6b0586be356dc2146a62
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4da0f069871343119251d6b0586be356dc2146a62Yang Ni 5da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 6da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 9da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h> 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 12da0f069871343119251d6b0586be356dc2146a62Yang Ni 131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 141ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 151ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 161ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 20da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 21da0f069871343119251d6b0586be356dc2146a62Yang Ni 22da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 23da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 30da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, 331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr; 351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; 361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const void **oldIns = kparams->ins; 371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t *oldStrides = kparams->inEStrides; 381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni std::vector<const void*> ins(DefaultKernelArgCount); 401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni std::vector<uint32_t> strides(DefaultKernelArgCount); 411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni for (CPUClosure* cpuClosure : closures) { 431ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Closure* closure = cpuClosure->mClosure; 441ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 451ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni auto in_iter = ins.begin(); 461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni auto stride_iter = strides.begin(); 471ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 481ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni for (const auto& arg : closure->mArgs) { 491ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Allocation* a = (const Allocation*)arg; 501ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 521ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni eStride * xstart; 531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni if (kparams->dimY > 1) { 541ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni ptr += a->mHal.drvState.lod[0].stride * kparams->y; 551ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 561ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni *in_iter++ = ptr; 571ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni *stride_iter++ = eStride; 581ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 591ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 601ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mutable_kparams->ins = &ins[0]; 611ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mutable_kparams->inEStrides = &strides[0]; 621ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 631ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Allocation* out = closure->mReturnValue; 641ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 651ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni ostep * xstart; 671ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni if (kparams->dimY > 1) { 681ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni ptr += out->mHal.drvState.lod[0].stride * kparams->y; 691ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 701ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 711ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mutable_kparams->out = (void*)ptr; 721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mutable_kparams->usr = cpuClosure->mUsrPtr; 741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 751ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni cpuClosure->mFunc(kparams, xstart, xend, ostep); 761ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 781ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mutable_kparams->ins = oldIns; 791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mutable_kparams->inEStrides = oldStrides; 801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mutable_kparams->usr = &closures; 811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 821ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 83da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 84da0f069871343119251d6b0586be356dc2146a62Yang Ni 85da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 86da0f069871343119251d6b0586be356dc2146a62Yang Ni for (CPUClosure* c : mClosures) { 87da0f069871343119251d6b0586be356dc2146a62Yang Ni delete c; 88da0f069871343119251d6b0586be356dc2146a62Yang Ni } 89da0f069871343119251d6b0586be356dc2146a62Yang Ni if (mScriptObj) { 90da0f069871343119251d6b0586be356dc2146a62Yang Ni dlclose(mScriptObj); 91da0f069871343119251d6b0586be356dc2146a62Yang Ni } 92da0f069871343119251d6b0586be356dc2146a62Yang Ni} 93da0f069871343119251d6b0586be356dc2146a62Yang Ni 94da0f069871343119251d6b0586be356dc2146a62Yang Nibool Batch::conflict(CPUClosure* closure) const { 95da0f069871343119251d6b0586be356dc2146a62Yang Ni if (mClosures.empty()) { 96da0f069871343119251d6b0586be356dc2146a62Yang Ni return false; 97da0f069871343119251d6b0586be356dc2146a62Yang Ni } 98da0f069871343119251d6b0586be356dc2146a62Yang Ni 99da0f069871343119251d6b0586be356dc2146a62Yang Ni if (closure->mClosure->mKernelID.get() == nullptr || 100da0f069871343119251d6b0586be356dc2146a62Yang Ni mClosures.front()->mClosure->mKernelID.get() == nullptr) { 101da0f069871343119251d6b0586be356dc2146a62Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 102da0f069871343119251d6b0586be356dc2146a62Yang Ni // closure. 103da0f069871343119251d6b0586be356dc2146a62Yang Ni return true; 104da0f069871343119251d6b0586be356dc2146a62Yang Ni } 105da0f069871343119251d6b0586be356dc2146a62Yang Ni 1061ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni for (const auto &p : closure->mClosure->mGlobalDeps) { 1071ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Closure* dep = p.first; 108da0f069871343119251d6b0586be356dc2146a62Yang Ni for (CPUClosure* c : mClosures) { 1091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni if (c->mClosure == dep) { 110da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global", closure, dep); 1111ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1121ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1141ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1151ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni for (const auto &p : closure->mClosure->mArgDeps) { 1161ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Closure* dep = p.first; 117da0f069871343119251d6b0586be356dc2146a62Yang Ni for (CPUClosure* c : mClosures) { 1181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni if (c->mClosure == dep) { 1191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni for (const auto &p1 : *p.second) { 120da0f069871343119251d6b0586be356dc2146a62Yang Ni if (p1.second->get() != nullptr) { 121da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg", closure, dep); 1221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1231ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return false; 1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1311ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { 134da0f069871343119251d6b0586be356dc2146a62Yang Ni Batch* batch = new Batch(this); 1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni for (Closure* closure: mGroup->mClosures) { 1361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 1371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni RsdCpuScriptImpl* si = 1381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); 1391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni MTLaunchStruct mtls; 1411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni si->forEachKernelSetup(kernelID->mSlot, &mtls); 1421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni // TODO: Is mtls.fep.usrLen ever used? 1431ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, 1441ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mtls.fep.usr, mtls.fep.usrLen); 145da0f069871343119251d6b0586be356dc2146a62Yang Ni if (batch->conflict(cc)) { 1461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mBatches.push_back(batch); 147da0f069871343119251d6b0586be356dc2146a62Yang Ni batch = new Batch(this); 1481ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 149da0f069871343119251d6b0586be356dc2146a62Yang Ni 150da0f069871343119251d6b0586be356dc2146a62Yang Ni batch->mClosures.push_back(cc); 1511ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 152da0f069871343119251d6b0586be356dc2146a62Yang Ni 1531ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mBatches.push_back(batch); 154da0f069871343119251d6b0586be356dc2146a62Yang Ni 155da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 156da0f069871343119251d6b0586be356dc2146a62Yang Ni for (Batch* batch : mBatches) { 157da0f069871343119251d6b0586be356dc2146a62Yang Ni batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str()); 158da0f069871343119251d6b0586be356dc2146a62Yang Ni } 159da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 1601ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1611ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1621ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 163da0f069871343119251d6b0586be356dc2146a62Yang Ni for (Batch* batch : mBatches) { 164da0f069871343119251d6b0586be356dc2146a62Yang Ni delete batch; 165da0f069871343119251d6b0586be356dc2146a62Yang Ni } 166da0f069871343119251d6b0586be356dc2146a62Yang Ni} 167da0f069871343119251d6b0586be356dc2146a62Yang Ni 168da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 169da0f069871343119251d6b0586be356dc2146a62Yang Ni 170da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 171da0f069871343119251d6b0586be356dc2146a62Yang Ni 172da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) { 173da0f069871343119251d6b0586be356dc2146a62Yang Ni unsigned found = path.find_last_of("/\\"); 174da0f069871343119251d6b0586be356dc2146a62Yang Ni return path.substr(found + 1); 175da0f069871343119251d6b0586be356dc2146a62Yang Ni} 176da0f069871343119251d6b0586be356dc2146a62Yang Ni 177da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 178da0f069871343119251d6b0586be356dc2146a62Yang Ni const vector<string>& inputs, const vector<int>& kernels, 179da0f069871343119251d6b0586be356dc2146a62Yang Ni const string& output_dir, const string& output_filename, 180da0f069871343119251d6b0586be356dc2146a62Yang Ni const string& rsLib, vector<const char*>* args) { 181da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 182da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back("-fPIC"); 183da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back("-embedRSInfo"); 184da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back("-mtriple"); 185da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 186da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back("-bclib"); 187da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back(rsLib.c_str()); 188da0f069871343119251d6b0586be356dc2146a62Yang Ni for (const string& input : inputs) { 189da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back(input.c_str()); 190da0f069871343119251d6b0586be356dc2146a62Yang Ni } 191da0f069871343119251d6b0586be356dc2146a62Yang Ni for (int kernel : kernels) { 192da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back("-k"); 193da0f069871343119251d6b0586be356dc2146a62Yang Ni string strKernel = std::to_string(kernel); 194da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back(strKernel.c_str()); 195da0f069871343119251d6b0586be356dc2146a62Yang Ni } 196da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back("-output_path"); 197da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back(output_dir.c_str()); 198da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back("-o"); 199da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back(output_filename.c_str()); 200da0f069871343119251d6b0586be356dc2146a62Yang Ni args->push_back(nullptr); 201da0f069871343119251d6b0586be356dc2146a62Yang Ni} 202da0f069871343119251d6b0586be356dc2146a62Yang Ni 203da0f069871343119251d6b0586be356dc2146a62Yang Nistring convertListToString(int n, const char* const* strs) { 204da0f069871343119251d6b0586be356dc2146a62Yang Ni string ret; 205da0f069871343119251d6b0586be356dc2146a62Yang Ni ret.append(strs[0]); 206da0f069871343119251d6b0586be356dc2146a62Yang Ni for (int i = 1; i < n; i++) { 207da0f069871343119251d6b0586be356dc2146a62Yang Ni ret.append(" "); 208da0f069871343119251d6b0586be356dc2146a62Yang Ni ret.append(strs[i]); 209da0f069871343119251d6b0586be356dc2146a62Yang Ni } 210da0f069871343119251d6b0586be356dc2146a62Yang Ni return ret; 211da0f069871343119251d6b0586be356dc2146a62Yang Ni} 212da0f069871343119251d6b0586be356dc2146a62Yang Ni 213da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments, 214da0f069871343119251d6b0586be356dc2146a62Yang Ni const string& commandLine) { 215da0f069871343119251d6b0586be356dc2146a62Yang Ni const pid_t pid = fork(); 216da0f069871343119251d6b0586be356dc2146a62Yang Ni 217da0f069871343119251d6b0586be356dc2146a62Yang Ni if (pid == -1) { 218da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGE("Couldn't fork for bcc execution"); 219da0f069871343119251d6b0586be356dc2146a62Yang Ni return false; 220da0f069871343119251d6b0586be356dc2146a62Yang Ni } 221da0f069871343119251d6b0586be356dc2146a62Yang Ni 222da0f069871343119251d6b0586be356dc2146a62Yang Ni if (pid == 0) { 223da0f069871343119251d6b0586be356dc2146a62Yang Ni // Child process 224da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGV("Invoking BCC with: %s", commandLine.c_str()); 225da0f069871343119251d6b0586be356dc2146a62Yang Ni execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); 226da0f069871343119251d6b0586be356dc2146a62Yang Ni 227da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGE("execv() failed: %s", strerror(errno)); 228da0f069871343119251d6b0586be356dc2146a62Yang Ni abort(); 229da0f069871343119251d6b0586be356dc2146a62Yang Ni return false; 230da0f069871343119251d6b0586be356dc2146a62Yang Ni } 231da0f069871343119251d6b0586be356dc2146a62Yang Ni 232da0f069871343119251d6b0586be356dc2146a62Yang Ni // Parent process 233da0f069871343119251d6b0586be356dc2146a62Yang Ni int status = 0; 234da0f069871343119251d6b0586be356dc2146a62Yang Ni const pid_t w = waitpid(pid, &status, 0); 235da0f069871343119251d6b0586be356dc2146a62Yang Ni if (w == -1) { 236da0f069871343119251d6b0586be356dc2146a62Yang Ni return false; 237da0f069871343119251d6b0586be356dc2146a62Yang Ni } 238da0f069871343119251d6b0586be356dc2146a62Yang Ni 239da0f069871343119251d6b0586be356dc2146a62Yang Ni if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { 240da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGE("bcc terminated unexpectedly"); 241da0f069871343119251d6b0586be356dc2146a62Yang Ni return false; 242da0f069871343119251d6b0586be356dc2146a62Yang Ni } 243da0f069871343119251d6b0586be356dc2146a62Yang Ni 244da0f069871343119251d6b0586be356dc2146a62Yang Ni return true; 245da0f069871343119251d6b0586be356dc2146a62Yang Ni} 246da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 247da0f069871343119251d6b0586be356dc2146a62Yang Ni 248da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 249da0f069871343119251d6b0586be356dc2146a62Yang Ni 250da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::tryToCreateFusedKernel(const char *cacheDir) { 251da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 252da0f069871343119251d6b0586be356dc2146a62Yang Ni if (mClosures.size() < 2) { 253da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGV("Compiler kernel fusion skipped due to only one or zero kernel in" 254da0f069871343119251d6b0586be356dc2146a62Yang Ni " a script group batch."); 255da0f069871343119251d6b0586be356dc2146a62Yang Ni return; 256da0f069871343119251d6b0586be356dc2146a62Yang Ni } 257da0f069871343119251d6b0586be356dc2146a62Yang Ni 258da0f069871343119251d6b0586be356dc2146a62Yang Ni //===--------------------------------------------------------------------===// 259da0f069871343119251d6b0586be356dc2146a62Yang Ni // Fuse the input kernels and generate native code in an object file 260da0f069871343119251d6b0586be356dc2146a62Yang Ni //===--------------------------------------------------------------------===// 261da0f069871343119251d6b0586be356dc2146a62Yang Ni 262da0f069871343119251d6b0586be356dc2146a62Yang Ni std::vector<string> inputFiles; 263da0f069871343119251d6b0586be356dc2146a62Yang Ni std::vector<int> slots; 264da0f069871343119251d6b0586be356dc2146a62Yang Ni 265da0f069871343119251d6b0586be356dc2146a62Yang Ni for (CPUClosure* cpuClosure : mClosures) { 266da0f069871343119251d6b0586be356dc2146a62Yang Ni const Closure* closure = cpuClosure->mClosure; 267da0f069871343119251d6b0586be356dc2146a62Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 268da0f069871343119251d6b0586be356dc2146a62Yang Ni const Script* script = kernelID->mScript; 269da0f069871343119251d6b0586be356dc2146a62Yang Ni 270da0f069871343119251d6b0586be356dc2146a62Yang Ni if (script->isIntrinsic()) { 271da0f069871343119251d6b0586be356dc2146a62Yang Ni return; 2721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 273da0f069871343119251d6b0586be356dc2146a62Yang Ni 274da0f069871343119251d6b0586be356dc2146a62Yang Ni const RsdCpuScriptImpl *cpuScript = 275da0f069871343119251d6b0586be356dc2146a62Yang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 276da0f069871343119251d6b0586be356dc2146a62Yang Ni 277da0f069871343119251d6b0586be356dc2146a62Yang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 278da0f069871343119251d6b0586be356dc2146a62Yang Ni 279da0f069871343119251d6b0586be356dc2146a62Yang Ni inputFiles.push_back(bitcodeFilename); 280da0f069871343119251d6b0586be356dc2146a62Yang Ni slots.push_back(kernelID->mSlot); 281da0f069871343119251d6b0586be356dc2146a62Yang Ni } 282da0f069871343119251d6b0586be356dc2146a62Yang Ni 283da0f069871343119251d6b0586be356dc2146a62Yang Ni string outputPath(tempnam(cacheDir, "fused")); 284da0f069871343119251d6b0586be356dc2146a62Yang Ni string outputFileName = getFileName(outputPath); 285da0f069871343119251d6b0586be356dc2146a62Yang Ni string objFilePath(outputPath); 286da0f069871343119251d6b0586be356dc2146a62Yang Ni objFilePath.append(".o"); 287da0f069871343119251d6b0586be356dc2146a62Yang Ni string rsLibPath(SYSLIBPATH"/libclcore.bc"); 288da0f069871343119251d6b0586be356dc2146a62Yang Ni vector<const char*> arguments; 289da0f069871343119251d6b0586be356dc2146a62Yang Ni setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath, 290da0f069871343119251d6b0586be356dc2146a62Yang Ni &arguments); 291da0f069871343119251d6b0586be356dc2146a62Yang Ni string commandLine = 292da0f069871343119251d6b0586be356dc2146a62Yang Ni convertListToString(arguments.size() - 1, arguments.data()); 293da0f069871343119251d6b0586be356dc2146a62Yang Ni 294da0f069871343119251d6b0586be356dc2146a62Yang Ni if (!fuseAndCompile(arguments.data(), commandLine)) { 295da0f069871343119251d6b0586be356dc2146a62Yang Ni return; 296da0f069871343119251d6b0586be356dc2146a62Yang Ni } 297da0f069871343119251d6b0586be356dc2146a62Yang Ni 298da0f069871343119251d6b0586be356dc2146a62Yang Ni //===--------------------------------------------------------------------===// 299da0f069871343119251d6b0586be356dc2146a62Yang Ni // Create and load the shared lib 300da0f069871343119251d6b0586be356dc2146a62Yang Ni //===--------------------------------------------------------------------===// 301da0f069871343119251d6b0586be356dc2146a62Yang Ni 302da0f069871343119251d6b0586be356dc2146a62Yang Ni const char* resName = outputFileName.c_str(); 303da0f069871343119251d6b0586be356dc2146a62Yang Ni 304da0f069871343119251d6b0586be356dc2146a62Yang Ni if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { 305da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGE("Failed to link object file '%s'", resName); 306da0f069871343119251d6b0586be356dc2146a62Yang Ni return; 307da0f069871343119251d6b0586be356dc2146a62Yang Ni } 308da0f069871343119251d6b0586be356dc2146a62Yang Ni 309da0f069871343119251d6b0586be356dc2146a62Yang Ni void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 310da0f069871343119251d6b0586be356dc2146a62Yang Ni if (mSharedObj == nullptr) { 311da0f069871343119251d6b0586be356dc2146a62Yang Ni ALOGE("Unable to load '%s'", resName); 312da0f069871343119251d6b0586be356dc2146a62Yang Ni return; 3131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 314da0f069871343119251d6b0586be356dc2146a62Yang Ni 315da0f069871343119251d6b0586be356dc2146a62Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 316da0f069871343119251d6b0586be356dc2146a62Yang Ni nullptr, // RS context. Unused. 317da0f069871343119251d6b0586be356dc2146a62Yang Ni mSharedObj); 318da0f069871343119251d6b0586be356dc2146a62Yang Ni 319da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 3201ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3211ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3221ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 323da0f069871343119251d6b0586be356dc2146a62Yang Ni for (auto batch : mBatches) { 324da0f069871343119251d6b0586be356dc2146a62Yang Ni batch->setGlobalsForBatch(); 325da0f069871343119251d6b0586be356dc2146a62Yang Ni batch->run(); 3261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 3271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 329da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 330da0f069871343119251d6b0586be356dc2146a62Yang Ni for (CPUClosure* cpuClosure : mClosures) { 3311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Closure* closure = cpuClosure->mClosure; 3321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 3331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni Script* s = kernelID->mScript; 3341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni for (const auto& p : closure->mGlobals) { 3351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const void* value = p.second.first; 3361ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni int size = p.second.second; 3371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 3381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni if (size < 0) { 3391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni s->setVarObj(p.first->mSlot, (ObjectBase*)value); 3401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } else { 3411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni s->setVar(p.first->mSlot, (const void*)&value, size); 3421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 3431ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 3441ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 3451ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3461ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 347da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 348da0f069871343119251d6b0586be356dc2146a62Yang Ni if (mExecutable != nullptr) { 349da0f069871343119251d6b0586be356dc2146a62Yang Ni MTLaunchStruct mtls; 350da0f069871343119251d6b0586be356dc2146a62Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 351da0f069871343119251d6b0586be356dc2146a62Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 352da0f069871343119251d6b0586be356dc2146a62Yang Ni 353da0f069871343119251d6b0586be356dc2146a62Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 354da0f069871343119251d6b0586be356dc2146a62Yang Ni (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], 355da0f069871343119251d6b0586be356dc2146a62Yang Ni firstCpuClosure->mClosure->mArgs.size(), 356da0f069871343119251d6b0586be356dc2146a62Yang Ni lastCpuClosure->mClosure->mReturnValue, 357da0f069871343119251d6b0586be356dc2146a62Yang Ni nullptr, 0, nullptr, &mtls); 358da0f069871343119251d6b0586be356dc2146a62Yang Ni 359da0f069871343119251d6b0586be356dc2146a62Yang Ni mtls.script = nullptr; 360da0f069871343119251d6b0586be356dc2146a62Yang Ni mtls.fep.usr = nullptr; 361da0f069871343119251d6b0586be356dc2146a62Yang Ni mtls.kernel = mExecutable->getForEachFunction(0); 362da0f069871343119251d6b0586be356dc2146a62Yang Ni 363da0f069871343119251d6b0586be356dc2146a62Yang Ni mGroup->getCpuRefImpl()->launchThreads( 364da0f069871343119251d6b0586be356dc2146a62Yang Ni (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], 365da0f069871343119251d6b0586be356dc2146a62Yang Ni firstCpuClosure->mClosure->mArgs.size(), 366da0f069871343119251d6b0586be356dc2146a62Yang Ni lastCpuClosure->mClosure->mReturnValue, 367da0f069871343119251d6b0586be356dc2146a62Yang Ni nullptr, &mtls); 368da0f069871343119251d6b0586be356dc2146a62Yang Ni 369da0f069871343119251d6b0586be356dc2146a62Yang Ni return; 370da0f069871343119251d6b0586be356dc2146a62Yang Ni } 371da0f069871343119251d6b0586be356dc2146a62Yang Ni 372da0f069871343119251d6b0586be356dc2146a62Yang Ni for (CPUClosure* cpuClosure : mClosures) { 3731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Closure* closure = cpuClosure->mClosure; 3741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 3751ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 3761ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni (const Allocation**)&closure->mArgs[0], 3771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni closure->mArgs.size(), closure->mReturnValue, 3781ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni cpuClosure->mUsrPtr, cpuClosure->mUsrSize, 3791ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni nullptr); 3801ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 3811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 382da0f069871343119251d6b0586be356dc2146a62Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 3831ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Closure* closure = cpuClosure->mClosure; 3841ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni MTLaunchStruct mtls; 3851ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 386bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0], 387bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams closure->mArgs.size(), 388bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams closure->mReturnValue, 389bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams nullptr, 0, nullptr, &mtls)) { 3901ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 391bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams mtls.script = nullptr; 392bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams mtls.kernel = (void (*)())&groupRoot; 393da0f069871343119251d6b0586be356dc2146a62Yang Ni mtls.fep.usr = &mClosures; 3941ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 395da0f069871343119251d6b0586be356dc2146a62Yang Ni mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 396bf2111d3b3de310932099514f06924e48fa1d7b2Jason Sams } 3971ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 398da0f069871343119251d6b0586be356dc2146a62Yang Ni for (CPUClosure* cpuClosure : mClosures) { 3991ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const Closure* closure = cpuClosure->mClosure; 4001ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 4011ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 4021ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni (const Allocation**)&closure->mArgs[0], 4031ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni closure->mArgs.size(), closure->mReturnValue, 4041ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni nullptr, 0, nullptr); 4051ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 4061ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4071ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 4081ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 4091ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 410