rsCpuScriptGroup2.cpp revision eb9aa675754c49f613c6ad71d41472b30f38b007
11ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScriptGroup2.h" 21ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <dlfcn.h> 4da0f069871343119251d6b0586be356dc2146a62Yang Ni 5da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <string> 6da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <vector> 7da0f069871343119251d6b0586be356dc2146a62Yang Ni 8da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 9da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "bcc/Config/Config.h" 10da0f069871343119251d6b0586be356dc2146a62Yang Ni#include <sys/wait.h> 11da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 12da0f069871343119251d6b0586be356dc2146a62Yang Ni 131ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "cpu_ref/rsCpuCore.h" 141ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsClosure.h" 151ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsContext.h" 161ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuCore.h" 171ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsCpuScript.h" 181ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScript.h" 191ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni#include "rsScriptGroup2.h" 20da0f069871343119251d6b0586be356dc2146a62Yang Ni#include "rsScriptIntrinsic.h" 21da0f069871343119251d6b0586be356dc2146a62Yang Ni 22da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::string; 23da0f069871343119251d6b0586be356dc2146a62Yang Niusing std::vector; 241ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 251ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace android { 261ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace renderscript { 271ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ninamespace { 291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 30da0f069871343119251d6b0586be356dc2146a62Yang Niconst size_t DefaultKernelArgCount = 2; 311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, 331ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni uint32_t xend, uint32_t outstep) { 34eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr; 35eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; 36eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void **oldIns = kparams->ins; 37eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni uint32_t *oldStrides = kparams->inEStrides; 38eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 39eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<const void*> ins(DefaultKernelArgCount); 40eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<uint32_t> strides(DefaultKernelArgCount); 41eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 42eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : closures) { 43eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 44eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 45eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni auto in_iter = ins.begin(); 46eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni auto stride_iter = strides.begin(); 47eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 48eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& arg : closure->mArgs) { 49eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* a = (const Allocation*)arg; 50eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t eStride = a->mHal.state.elementSizeBytes; 51eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 52eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni eStride * xstart; 53eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kparams->dimY > 1) { 54eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ptr += a->mHal.drvState.lod[0].stride * kparams->y; 55eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 56eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni *in_iter++ = ptr; 57eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni *stride_iter++ = eStride; 58eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 591ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 60eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->ins = &ins[0]; 61eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->inEStrides = &strides[0]; 621ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 63eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Allocation* out = closure->mReturnValue; 64eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint32_t ostep = out->mHal.state.elementSizeBytes; 65eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 66eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ostep * xstart; 67eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kparams->dimY > 1) { 68eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ptr += out->mHal.drvState.lod[0].stride * kparams->y; 69eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 701ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 71eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->out = (void*)ptr; 721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 73eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->usr = cpuClosure->mUsrPtr; 741ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 75eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mFunc(kparams, xstart, xend, ostep); 761ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 771ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 78eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->ins = oldIns; 79eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->inEStrides = oldStrides; 80eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mutable_kparams->usr = &closures; 811ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 821ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 83da0f069871343119251d6b0586be356dc2146a62Yang Ni} // namespace 84da0f069871343119251d6b0586be356dc2146a62Yang Ni 85da0f069871343119251d6b0586be356dc2146a62Yang NiBatch::~Batch() { 86eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 87eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete c; 88eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 89eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mScriptObj) { 90eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni dlclose(mScriptObj); 91eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 92da0f069871343119251d6b0586be356dc2146a62Yang Ni} 93da0f069871343119251d6b0586be356dc2146a62Yang Ni 94da0f069871343119251d6b0586be356dc2146a62Yang Nibool Batch::conflict(CPUClosure* closure) const { 95eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.empty()) { 96eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 97eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 98da0f069871343119251d6b0586be356dc2146a62Yang Ni 99eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (closure->mClosure->mKernelID.get() == nullptr || 100eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mClosures.front()->mClosure->mKernelID.get() == nullptr) { 101eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // An invoke should be in a batch by itself, so it conflicts with any other 102eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // closure. 1031ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni return true; 1041ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 105eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 106eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto &p : closure->mClosure->mGlobalDeps) { 107eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* dep = p.first; 108eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 109eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (c->mClosure == dep) { 110eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global", 111eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure, dep); 112eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return true; 113eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 114eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 115eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 116eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto &p : closure->mClosure->mArgDeps) { 117eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* dep = p.first; 118eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* c : mClosures) { 119eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (c->mClosure == dep) { 120eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto &p1 : *p.second) { 121eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (p1.second->get() != nullptr) { 122eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg", 123eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure, dep); 124eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return true; 125eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 126eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 127eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1281ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 1291ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 130eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 1311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1331ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 1341ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni const ScriptGroupBase *sg) : 1351ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { 136eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!mGroup->mClosures.empty()); 137eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 138eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni Batch* batch = new Batch(this); 139eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Closure* closure: mGroup->mClosures) { 140eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 141eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni RsdCpuScriptImpl* si; 142eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc; 143eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kernelID != nullptr) { 144eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); 145eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 146eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni si->forEachKernelSetup(kernelID->mSlot, &mtls); 147eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // TODO: Is mtls.fep.usrLen ever used? 148eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, 149eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr, mtls.fep.usrLen); 150eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 151eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript( 152eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mInvokeID->mScript); 153eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc = new CPUClosure(closure, si); 154eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 1551ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 156eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (batch->conflict(cc)) { 157eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 158eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch = new Batch(this); 159eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 160da0f069871343119251d6b0586be356dc2146a62Yang Ni 161eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->mClosures.push_back(cc); 162eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 163da0f069871343119251d6b0586be356dc2146a62Yang Ni 164eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(!batch->mClosures.empty()); 165eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mBatches.push_back(batch); 166da0f069871343119251d6b0586be356dc2146a62Yang Ni 167da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 168eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 169eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str()); 170eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 171da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 1721ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 1731ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 1741ffd86b448d78366190c540f98f8b6d641cdb6cfYang NiCpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 175eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (Batch* batch : mBatches) { 176eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni delete batch; 177eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 178da0f069871343119251d6b0586be356dc2146a62Yang Ni} 179da0f069871343119251d6b0586be356dc2146a62Yang Ni 180da0f069871343119251d6b0586be356dc2146a62Yang Ninamespace { 181da0f069871343119251d6b0586be356dc2146a62Yang Ni 182da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 183da0f069871343119251d6b0586be356dc2146a62Yang Ni 184da0f069871343119251d6b0586be356dc2146a62Yang Nistring getFileName(string path) { 185eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni unsigned found = path.find_last_of("/\\"); 186eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return path.substr(found + 1); 187da0f069871343119251d6b0586be356dc2146a62Yang Ni} 188da0f069871343119251d6b0586be356dc2146a62Yang Ni 189da0f069871343119251d6b0586be356dc2146a62Yang Nivoid setupCompileArguments( 190eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const vector<string>& inputs, const vector<int>& kernels, 191eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& output_dir, const string& output_filename, 192eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& rsLib, vector<const char*>* args) { 193eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 194eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-fPIC"); 195eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-embedRSInfo"); 196eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-mtriple"); 197eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 198eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-bclib"); 199eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(rsLib.c_str()); 200eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const string& input : inputs) { 201eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(input.c_str()); 202eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 203eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (int kernel : kernels) { 204eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-k"); 205eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string strKernel = std::to_string(kernel); 206eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(strKernel.c_str()); 207eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 208eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-output_path"); 209eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(output_dir.c_str()); 210eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back("-o"); 211eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(output_filename.c_str()); 212eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni args->push_back(nullptr); 213da0f069871343119251d6b0586be356dc2146a62Yang Ni} 214da0f069871343119251d6b0586be356dc2146a62Yang Ni 215da0f069871343119251d6b0586be356dc2146a62Yang Nistring convertListToString(int n, const char* const* strs) { 216eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string ret; 217eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ret.append(strs[0]); 218eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (int i = 1; i < n; i++) { 219eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ret.append(" "); 220eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ret.append(strs[i]); 221eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 222eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return ret; 223da0f069871343119251d6b0586be356dc2146a62Yang Ni} 224da0f069871343119251d6b0586be356dc2146a62Yang Ni 225da0f069871343119251d6b0586be356dc2146a62Yang Nibool fuseAndCompile(const char** arguments, 226da0f069871343119251d6b0586be356dc2146a62Yang Ni const string& commandLine) { 227eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const pid_t pid = fork(); 228da0f069871343119251d6b0586be356dc2146a62Yang Ni 229eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (pid == -1) { 230eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Couldn't fork for bcc execution"); 231eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 232eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 233da0f069871343119251d6b0586be356dc2146a62Yang Ni 234eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (pid == 0) { 235eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Child process 236eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGV("Invoking BCC with: %s", commandLine.c_str()); 237eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); 238da0f069871343119251d6b0586be356dc2146a62Yang Ni 239eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("execv() failed: %s", strerror(errno)); 240eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni abort(); 241eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 242eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 243da0f069871343119251d6b0586be356dc2146a62Yang Ni 244eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Parent process 245eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int status = 0; 246eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const pid_t w = waitpid(pid, &status, 0); 247eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (w == -1) { 248eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 249eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 250da0f069871343119251d6b0586be356dc2146a62Yang Ni 251eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { 252eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("bcc terminated unexpectedly"); 253eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return false; 254eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 255da0f069871343119251d6b0586be356dc2146a62Yang Ni 256eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return true; 257da0f069871343119251d6b0586be356dc2146a62Yang Ni} 258da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif 259da0f069871343119251d6b0586be356dc2146a62Yang Ni 260da0f069871343119251d6b0586be356dc2146a62Yang Ni} // anonymous namespace 261da0f069871343119251d6b0586be356dc2146a62Yang Ni 262da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::tryToCreateFusedKernel(const char *cacheDir) { 263da0f069871343119251d6b0586be356dc2146a62Yang Ni#ifndef RS_COMPATIBILITY_LIB 264eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.size() < 2) { 265eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 266eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 267da0f069871343119251d6b0586be356dc2146a62Yang Ni 268eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 269eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Fuse the input kernels and generate native code in an object file 270eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 271da0f069871343119251d6b0586be356dc2146a62Yang Ni 272eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<string> inputFiles; 273eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni std::vector<int> slots; 274da0f069871343119251d6b0586be356dc2146a62Yang Ni 275eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 276eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 277eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 278eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Script* script = kernelID->mScript; 279da0f069871343119251d6b0586be356dc2146a62Yang Ni 280eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (script->isIntrinsic()) { 281eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 282eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 283da0f069871343119251d6b0586be356dc2146a62Yang Ni 284eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const RsdCpuScriptImpl *cpuScript = 285eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const RsdCpuScriptImpl*)script->mHal.drv; 286da0f069871343119251d6b0586be356dc2146a62Yang Ni 287eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 288da0f069871343119251d6b0586be356dc2146a62Yang Ni 289eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni inputFiles.push_back(bitcodeFilename); 290eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni slots.push_back(kernelID->mSlot); 291eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 292da0f069871343119251d6b0586be356dc2146a62Yang Ni 293eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string outputPath(tempnam(cacheDir, "fused")); 294eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string outputFileName = getFileName(outputPath); 295eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string objFilePath(outputPath); 296eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni objFilePath.append(".o"); 297eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string rsLibPath(SYSLIBPATH"/libclcore.bc"); 298eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni vector<const char*> arguments; 299eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath, 300eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni &arguments); 301eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni string commandLine = 302eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni convertListToString(arguments.size() - 1, arguments.data()); 303eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 304eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!fuseAndCompile(arguments.data(), commandLine)) { 305eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 306eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 307da0f069871343119251d6b0586be356dc2146a62Yang Ni 308eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 309eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // Create and load the shared lib 310eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni //===--------------------------------------------------------------------===// 311da0f069871343119251d6b0586be356dc2146a62Yang Ni 312eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const char* resName = outputFileName.c_str(); 313da0f069871343119251d6b0586be356dc2146a62Yang Ni 314eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { 315eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Failed to link object file '%s'", resName); 316eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 317eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 318da0f069871343119251d6b0586be356dc2146a62Yang Ni 319eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 320eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mSharedObj == nullptr) { 321eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni ALOGE("Unable to load '%s'", resName); 322eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 323eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 324da0f069871343119251d6b0586be356dc2146a62Yang Ni 325eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mExecutable = ScriptExecutable::createFromSharedObject( 326eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, // RS context. Unused. 327eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mSharedObj); 328da0f069871343119251d6b0586be356dc2146a62Yang Ni 329da0f069871343119251d6b0586be356dc2146a62Yang Ni#endif // RS_COMPATIBILITY_LIB 3301ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3311ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 3321ffd86b448d78366190c540f98f8b6d641cdb6cfYang Nivoid CpuScriptGroup2Impl::execute() { 333eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (auto batch : mBatches) { 334eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->setGlobalsForBatch(); 335eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni batch->run(); 336eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 3371ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3381ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 339da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::setGlobalsForBatch() { 340eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 341eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 342eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 343eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni Script* s; 344eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (kernelID != nullptr) { 345eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni s = kernelID->mScript; 346eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 347eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni s = cpuClosure->mClosure->mInvokeID->mScript; 348eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 349eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (const auto& p : closure->mGlobals) { 350eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const void* value = p.second.first; 351eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni int size = p.second.second; 352eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (value == nullptr && size == 0) { 353eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This indicates the current closure depends on another closure for a 354eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // global in their shared module (script). In this case we don't need to 355eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // copy the value. For example, an invoke intializes a global variable 356eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // which a kernel later reads. 357eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni continue; 358eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 359eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // We use -1 size to indicate an ObjectBase rather than a primitive type 360eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (size < 0) { 361eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni s->setVarObj(p.first->mSlot, (ObjectBase*)value); 362eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } else { 363eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni s->setVar(p.first->mSlot, (const void*)&value, size); 364eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 365eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 3661ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni } 3671ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 3681ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 369da0f069871343119251d6b0586be356dc2146a62Yang Nivoid Batch::run() { 370eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mExecutable != nullptr) { 371eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 372eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* firstCpuClosure = mClosures.front(); 373eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* lastCpuClosure = mClosures.back(); 374eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 375eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mSi->forEachMtlsSetup( 376eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], 377eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mClosure->mArgs.size(), 378eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 379eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls); 380eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 381eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 382eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = nullptr; 383eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = mExecutable->getForEachFunction(0); 384eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 385eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads( 386eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], 387eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni firstCpuClosure->mClosure->mArgs.size(), 388eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni lastCpuClosure->mClosure->mReturnValue, 389eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, &mtls); 390eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 391eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 392eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 393eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 394eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (mClosures.size() == 1 && 395eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mClosures.front()->mClosure->mKernelID.get() == nullptr) { 396eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni // This closure is for an invoke function 397eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni CPUClosure* cc = mClosures.front(); 398eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* c = cc->mClosure; 399eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptInvokeID* invokeID = c->mInvokeID; 400eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni rsAssert(invokeID != nullptr); 401eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 402eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni return; 403eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 404da0f069871343119251d6b0586be356dc2146a62Yang Ni 405eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 406eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 407eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 408eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->preLaunch(kernelID->mSlot, 409eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const Allocation**)&closure->mArgs[0], 410eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mArgs.size(), closure->mReturnValue, 411eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mUsrPtr, cpuClosure->mUsrSize, 412eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr); 413eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 414da0f069871343119251d6b0586be356dc2146a62Yang Ni 415eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const CPUClosure* cpuClosure = mClosures.front(); 416eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 417eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni MTLaunchStruct mtls; 418da0f069871343119251d6b0586be356dc2146a62Yang Ni 419eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0], 420eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mArgs.size(), 421eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mReturnValue, 422eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr, &mtls)) { 423da0f069871343119251d6b0586be356dc2146a62Yang Ni 424eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.script = nullptr; 425eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.kernel = (void (*)())&groupRoot; 426eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mtls.fep.usr = &mClosures; 427da0f069871343119251d6b0586be356dc2146a62Yang Ni 428eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 429eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 430eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni 431eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni for (CPUClosure* cpuClosure : mClosures) { 432eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const Closure* closure = cpuClosure->mClosure; 433eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni const ScriptKernelID* kernelID = closure->mKernelID.get(); 434eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni cpuClosure->mSi->postLaunch(kernelID->mSlot, 435eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni (const Allocation**)&closure->mArgs[0], 436eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni closure->mArgs.size(), closure->mReturnValue, 437eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni nullptr, 0, nullptr); 438eb9aa675754c49f613c6ad71d41472b30f38b007Yang Ni } 4391ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} 4401ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni 4411ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace renderscript 4421ffd86b448d78366190c540f98f8b6d641cdb6cfYang Ni} // namespace android 443