rsCpuScriptGroup2.cpp revision 4c368af7e705f0bcb77fa99495b2e33ef20d2699
166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsCpuScriptGroup2.h" 266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <dlfcn.h> 466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <stdio.h> 566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <stdlib.h> 666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <unistd.h> 766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <set> 966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <sstream> 1066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <string> 1166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include <vector> 1266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 1366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#ifndef RS_COMPATIBILITY_LIB 1466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "bcc/Config/Config.h" 1566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#endif 1666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 1766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "cpu_ref/rsCpuCore.h" 1866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsClosure.h" 1966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsContext.h" 2066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsCpuCore.h" 2166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsCpuExecutable.h" 2266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsCpuScript.h" 2366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsScript.h" 2466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsScriptGroup2.h" 2566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman#include "rsScriptIntrinsic.h" 2666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 2766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanusing std::string; 2866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanusing std::vector; 2966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 3066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumannamespace android { 3166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumannamespace renderscript { 3266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 3366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumannamespace { 3466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 3566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanconst size_t DefaultKernelArgCount = 2; 3666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 3766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanvoid groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, 3866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman uint32_t xend, uint32_t outstep) { 3966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; 4066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 4166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 4266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const size_t oldInLen = mutable_kinfo->inLen; 4366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 4466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman decltype(mutable_kinfo->inStride) oldInStride; 4566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); 4666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 4766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman for (CPUClosure* cpuClosure : closures) { 4866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const Closure* closure = cpuClosure->mClosure; 4966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 5066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman // There had better be enough space in mutable_kinfo 5166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); 5266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 5366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman for (size_t i = 0; i < closure->mNumArg; i++) { 5466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const void* arg = closure->mArgs[i]; 5566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const Allocation* a = (const Allocation*)arg; 5666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const uint32_t eStride = a->mHal.state.elementSizeBytes; 5766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 5866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman eStride * xstart; 5966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman if (kinfo->dim.y > 1) { 6066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; 6166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 6266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mutable_kinfo->inPtr[i] = ptr; 6366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mutable_kinfo->inStride[i] = eStride; 6466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 6566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mutable_kinfo->inLen = closure->mNumArg; 6666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 6766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const Allocation* out = closure->mReturnValue; 6866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const uint32_t ostep = out->mHal.state.elementSizeBytes; 6966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 7066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman ostep * xstart; 7166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman if (kinfo->dim.y > 1) { 7266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; 7366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 7466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 7566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman rsAssert(kinfo->outLen <= 1); 7666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); 7766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 7866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman cpuClosure->mFunc(kinfo, xstart, xend, ostep); 7966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 8066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 8166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mutable_kinfo->inLen = oldInLen; 8266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); 8366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman} 8466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 8566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman} // namespace 8666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 8766b8ab22586debccb1f787d4d52b7f042d4ddeb8John BaumanBatch::Batch(CpuScriptGroup2Impl* group, const char* name) : 8866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mGroup(group), mFunc(nullptr) { 8966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mName = strndup(name, strlen(name)); 9066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman} 9166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 9266b8ab22586debccb1f787d4d52b7f042d4ddeb8John BaumanBatch::~Batch() { 9366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman for (CPUClosure* c : mClosures) { 9466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman delete c; 9566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 9666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman free(mName); 9766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman} 9866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 9966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Baumanbool Batch::conflict(CPUClosure* cpuClosure) const { 10066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman if (mClosures.empty()) { 10166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman return false; 10266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 10366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 10466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const Closure* closure = cpuClosure->mClosure; 10566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 10666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { 10766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman // An invoke should be in a batch by itself, so it conflicts with any other 10866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman // closure. 10966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman return true; 11066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 11166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 11266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const auto& globalDeps = closure->mGlobalDeps; 11366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const auto& argDeps = closure->mArgDeps; 11466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 11566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman for (CPUClosure* c : mClosures) { 11666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const Closure* batched = c->mClosure; 11766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman if (globalDeps.find(batched) != globalDeps.end()) { 11866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman return true; 11966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 12066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const auto& it = argDeps.find(batched); 12166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman if (it != argDeps.end()) { 12266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const auto& args = (*it).second; 12366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman for (const auto &p1 : *args) { 12466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman if (p1.second.get() != nullptr) { 12566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman return true; 12666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 12766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 12866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 12966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman } 13066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 13166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman return false; 13266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman} 13366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 13466b8ab22586debccb1f787d4d52b7f042d4ddeb8John BaumanCpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 13566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const ScriptGroupBase *sg) : 13666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), 13766b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman mExecutable(nullptr), mScriptObj(nullptr) { 13866b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman rsAssert(!mGroup->mClosures.empty()); 13966b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman 14066b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman Batch* batch = new Batch(this, "Batch0"); 14166b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman int i = 0; 14266b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman for (Closure* closure: mGroup->mClosures) { 14366b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman CPUClosure* cc; 14466b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman const IDBase* funcID = closure->mFunctionID.get(); 14566b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman RsdCpuScriptImpl* si = 14666b8ab22586debccb1f787d4d52b7f042d4ddeb8John Bauman (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); 147 if (closure->mIsKernel) { 148 MTLaunchStruct mtls; 149 si->forEachKernelSetup(funcID->mSlot, &mtls); 150 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); 151 } else { 152 cc = new CPUClosure(closure, si); 153 } 154 155 if (batch->conflict(cc)) { 156 mBatches.push_back(batch); 157 std::stringstream ss; 158 ss << "Batch" << ++i; 159 batch = new Batch(this, ss.str().c_str()); 160 } 161 162 batch->mClosures.push_back(cc); 163 } 164 165 rsAssert(!batch->mClosures.empty()); 166 mBatches.push_back(batch); 167 168#ifndef RS_COMPATIBILITY_LIB 169 compile(mGroup->mCacheDir); 170 if (mScriptObj != nullptr && mExecutable != nullptr) { 171 for (Batch* batch : mBatches) { 172 batch->resolveFuncPtr(mScriptObj); 173 } 174 } 175#endif // RS_COMPATIBILITY_LIB 176} 177 178void Batch::resolveFuncPtr(void* sharedObj) { 179 std::string funcName(mName); 180 if (mClosures.front()->mClosure->mIsKernel) { 181 funcName.append(".expand"); 182 } 183 mFunc = dlsym(sharedObj, funcName.c_str()); 184 rsAssert (mFunc != nullptr); 185} 186 187CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 188 for (Batch* batch : mBatches) { 189 delete batch; 190 } 191 delete mExecutable; 192 // TODO: move this dlclose into ~ScriptExecutable(). 193 if (mScriptObj != nullptr) { 194 dlclose(mScriptObj); 195 } 196} 197 198namespace { 199 200#ifndef RS_COMPATIBILITY_LIB 201 202string getCoreLibPath(Context* context, string* coreLibRelaxedPath) { 203 *coreLibRelaxedPath = ""; 204 205 // If we're debugging, use the debug library. 206 if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 207 return SYSLIBPATH"/libclcore_debug.bc"; 208 } 209 210 // Check for a platform specific library 211 212#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 213 // NEON-capable ARMv7a devices can use an accelerated math library 214 // for all reduced precision scripts. 215 // ARMv8 does not use NEON, as ASIMD can be used with all precision 216 // levels. 217 *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; 218#endif 219 220#if defined(__i386__) || defined(__x86_64__) 221 // x86 devices will use an optimized library. 222 return SYSLIBPATH"/libclcore_x86.bc"; 223#else 224 return SYSLIBPATH"/libclcore.bc"; 225#endif 226} 227 228void setupCompileArguments( 229 const vector<const char*>& inputs, const vector<string>& kernelBatches, 230 const vector<string>& invokeBatches, 231 const char* outputDir, const char* outputFileName, 232 const char* coreLibPath, const char* coreLibRelaxedPath, 233 vector<const char*>* args) { 234 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 235 args->push_back("-fPIC"); 236 args->push_back("-embedRSInfo"); 237 args->push_back("-mtriple"); 238 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 239 args->push_back("-bclib"); 240 args->push_back(coreLibPath); 241 args->push_back("-bclib_relaxed"); 242 args->push_back(coreLibRelaxedPath); 243 for (const char* input : inputs) { 244 args->push_back(input); 245 } 246 for (const string& batch : kernelBatches) { 247 args->push_back("-merge"); 248 args->push_back(batch.c_str()); 249 } 250 for (const string& batch : invokeBatches) { 251 args->push_back("-invoke"); 252 args->push_back(batch.c_str()); 253 } 254 args->push_back("-output_path"); 255 args->push_back(outputDir); 256 args->push_back("-o"); 257 args->push_back(outputFileName); 258} 259 260void generateSourceSlot(const Closure& closure, 261 const std::vector<const char*>& inputs, 262 std::stringstream& ss) { 263 const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); 264 const Script* script = funcID->mScript; 265 266 rsAssert (!script->isIntrinsic()); 267 268 const RsdCpuScriptImpl *cpuScript = 269 (const RsdCpuScriptImpl*)script->mHal.drv; 270 const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 271 272 const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - 273 inputs.begin(); 274 275 ss << index << "," << funcID->mSlot << "."; 276} 277 278#endif // RS_COMPATIBILTY_LIB 279 280} // anonymous namespace 281 282void CpuScriptGroup2Impl::compile(const char* cacheDir) { 283#ifndef RS_COMPATIBILITY_LIB 284 if (mGroup->mClosures.size() < 2) { 285 return; 286 } 287 288 auto comparator = [](const char* str1, const char* str2) -> bool { 289 return strcmp(str1, str2) < 0; 290 }; 291 std::set<const char*, decltype(comparator)> inputSet(comparator); 292 293 for (Closure* closure : mGroup->mClosures) { 294 const Script* script = closure->mFunctionID.get()->mScript; 295 296 // If any script is an intrinsic, give up trying fusing the kernels. 297 if (script->isIntrinsic()) { 298 return; 299 } 300 301 const RsdCpuScriptImpl *cpuScript = 302 (const RsdCpuScriptImpl*)script->mHal.drv; 303 const char* bitcodeFilename = cpuScript->getBitcodeFilePath(); 304 inputSet.insert(bitcodeFilename); 305 } 306 307 std::vector<const char*> inputs(inputSet.begin(), inputSet.end()); 308 309 std::vector<string> kernelBatches; 310 std::vector<string> invokeBatches; 311 312 int i = 0; 313 for (const auto& batch : mBatches) { 314 rsAssert(batch->size() > 0); 315 316 std::stringstream ss; 317 ss << batch->mName << ":"; 318 319 if (!batch->mClosures.front()->mClosure->mIsKernel) { 320 rsAssert(batch->size() == 1); 321 generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); 322 invokeBatches.push_back(ss.str()); 323 } else { 324 for (const auto& cpuClosure : batch->mClosures) { 325 generateSourceSlot(*cpuClosure->mClosure, inputs, ss); 326 } 327 kernelBatches.push_back(ss.str()); 328 } 329 } 330 331 rsAssert(cacheDir != nullptr); 332 string objFilePath(cacheDir); 333 objFilePath.append("/"); 334 objFilePath.append(mGroup->mName); 335 objFilePath.append(".o"); 336 337 const char* resName = mGroup->mName; 338 string coreLibRelaxedPath; 339 const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), 340 &coreLibRelaxedPath); 341 342 vector<const char*> arguments; 343 setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, 344 resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(), 345 &arguments); 346 347 std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1, 348 arguments.data())); 349 350 inputs.push_back(coreLibPath.c_str()); 351 inputs.push_back(coreLibRelaxedPath.c_str()); 352 353 uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(), 354 inputs.data(), inputs.size()); 355 356 if (checksum == 0) { 357 return; 358 } 359 360 std::stringstream ss; 361 ss << std::hex << checksum; 362 const char* checksumStr = ss.str().c_str(); 363 364 //===--------------------------------------------------------------------===// 365 // Try to load a shared lib from code cache matching filename and checksum 366 //===--------------------------------------------------------------------===// 367 368 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 369 if (mScriptObj != nullptr) { 370 mExecutable = ScriptExecutable::createFromSharedObject( 371 getCpuRefImpl()->getContext(), mScriptObj, checksum); 372 if (mExecutable != nullptr) { 373 return; 374 } else { 375 ALOGE("Failed to create an executable object from so file"); 376 } 377 dlclose(mScriptObj); 378 mScriptObj = nullptr; 379 } 380 381 //===--------------------------------------------------------------------===// 382 // Fuse the input kernels and generate native code in an object file 383 //===--------------------------------------------------------------------===// 384 385 arguments.push_back("-build-checksum"); 386 arguments.push_back(checksumStr); 387 arguments.push_back(nullptr); 388 389 bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, 390 arguments.size()-1, 391 arguments.data()); 392 if (!compiled) { 393 return; 394 } 395 396 //===--------------------------------------------------------------------===// 397 // Create and load the shared lib 398 //===--------------------------------------------------------------------===// 399 400 if (!SharedLibraryUtils::createSharedLibrary( 401 getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName)) { 402 ALOGE("Failed to link object file '%s'", resName); 403 unlink(objFilePath.c_str()); 404 return; 405 } 406 407 unlink(objFilePath.c_str()); 408 409 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 410 if (mScriptObj == nullptr) { 411 ALOGE("Unable to load '%s'", resName); 412 return; 413 } 414 415 mExecutable = ScriptExecutable::createFromSharedObject( 416 getCpuRefImpl()->getContext(), 417 mScriptObj); 418 419#endif // RS_COMPATIBILITY_LIB 420} 421 422void CpuScriptGroup2Impl::execute() { 423 for (auto batch : mBatches) { 424 batch->setGlobalsForBatch(); 425 batch->run(); 426 } 427} 428 429void Batch::setGlobalsForBatch() { 430 for (CPUClosure* cpuClosure : mClosures) { 431 const Closure* closure = cpuClosure->mClosure; 432 const IDBase* funcID = closure->mFunctionID.get(); 433 Script* s = funcID->mScript;; 434 for (const auto& p : closure->mGlobals) { 435 const void* value = p.second.first; 436 int size = p.second.second; 437 if (value == nullptr && size == 0) { 438 // This indicates the current closure depends on another closure for a 439 // global in their shared module (script). In this case we don't need to 440 // copy the value. For example, an invoke intializes a global variable 441 // which a kernel later reads. 442 continue; 443 } 444 rsAssert(p.first != nullptr); 445 Script* script = p.first->mScript; 446 const RsdCpuScriptImpl *cpuScript = 447 (const RsdCpuScriptImpl*)script->mHal.drv; 448 int slot = p.first->mSlot; 449 ScriptExecutable* exec = mGroup->getExecutable(); 450 if (exec != nullptr) { 451 const char* varName = cpuScript->getFieldName(slot); 452 void* addr = exec->getFieldAddress(varName); 453 if (size < 0) { 454 rsrSetObject(mGroup->getCpuRefImpl()->getContext(), 455 (rs_object_base*)addr, (ObjectBase*)value); 456 } else { 457 memcpy(addr, (const void*)&value, size); 458 } 459 } else { 460 // We use -1 size to indicate an ObjectBase rather than a primitive type 461 if (size < 0) { 462 s->setVarObj(slot, (ObjectBase*)value); 463 } else { 464 s->setVar(slot, (const void*)&value, size); 465 } 466 } 467 } 468 } 469} 470 471void Batch::run() { 472 if (!mClosures.front()->mClosure->mIsKernel) { 473 rsAssert(mClosures.size() == 1); 474 475 // This batch contains a single closure for an invoke function 476 CPUClosure* cc = mClosures.front(); 477 const Closure* c = cc->mClosure; 478 479 if (mFunc != nullptr) { 480 // TODO: Need align pointers for x86_64. 481 // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp 482 ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); 483 } else { 484 const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); 485 rsAssert(invokeID != nullptr); 486 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 487 } 488 489 return; 490 } 491 492 if (mFunc != nullptr) { 493 MTLaunchStruct mtls; 494 const CPUClosure* firstCpuClosure = mClosures.front(); 495 const CPUClosure* lastCpuClosure = mClosures.back(); 496 497 firstCpuClosure->mSi->forEachMtlsSetup( 498 (const Allocation**)firstCpuClosure->mClosure->mArgs, 499 firstCpuClosure->mClosure->mNumArg, 500 lastCpuClosure->mClosure->mReturnValue, 501 nullptr, 0, nullptr, &mtls); 502 503 mtls.script = nullptr; 504 mtls.fep.usr = nullptr; 505 mtls.kernel = (ForEachFunc_t)mFunc; 506 507 mGroup->getCpuRefImpl()->launchThreads( 508 (const Allocation**)firstCpuClosure->mClosure->mArgs, 509 firstCpuClosure->mClosure->mNumArg, 510 lastCpuClosure->mClosure->mReturnValue, 511 nullptr, &mtls); 512 513 return; 514 } 515 516 for (CPUClosure* cpuClosure : mClosures) { 517 const Closure* closure = cpuClosure->mClosure; 518 const ScriptKernelID* kernelID = 519 (const ScriptKernelID*)closure->mFunctionID.get(); 520 cpuClosure->mSi->preLaunch(kernelID->mSlot, 521 (const Allocation**)closure->mArgs, 522 closure->mNumArg, closure->mReturnValue, 523 nullptr, 0, nullptr); 524 } 525 526 const CPUClosure* cpuClosure = mClosures.front(); 527 const Closure* closure = cpuClosure->mClosure; 528 MTLaunchStruct mtls; 529 530 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 531 closure->mNumArg, 532 closure->mReturnValue, 533 nullptr, 0, nullptr, &mtls)) { 534 535 mtls.script = nullptr; 536 mtls.kernel = (void (*)())&groupRoot; 537 mtls.fep.usr = &mClosures; 538 539 mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 540 } 541 542 for (CPUClosure* cpuClosure : mClosures) { 543 const Closure* closure = cpuClosure->mClosure; 544 const ScriptKernelID* kernelID = 545 (const ScriptKernelID*)closure->mFunctionID.get(); 546 cpuClosure->mSi->postLaunch(kernelID->mSlot, 547 (const Allocation**)closure->mArgs, 548 closure->mNumArg, closure->mReturnValue, 549 nullptr, 0, nullptr); 550 } 551} 552 553} // namespace renderscript 554} // namespace android 555