rsCpuScriptGroup2.cpp revision 433558f0f9abbf07770db288183a15fd261cace2
1#include "rsCpuScriptGroup2.h" 2 3#include <dlfcn.h> 4#include <stdio.h> 5#include <stdlib.h> 6#include <unistd.h> 7 8#include <string> 9#include <vector> 10 11#ifndef RS_COMPATIBILITY_LIB 12#include "bcc/Config/Config.h" 13#include <sys/wait.h> 14#endif 15 16#include "cpu_ref/rsCpuCore.h" 17#include "rsClosure.h" 18#include "rsContext.h" 19#include "rsCpuCore.h" 20#include "rsCpuScript.h" 21#include "rsScript.h" 22#include "rsScriptGroup2.h" 23#include "rsScriptIntrinsic.h" 24 25using std::string; 26using std::vector; 27 28namespace android { 29namespace renderscript { 30 31namespace { 32 33const size_t DefaultKernelArgCount = 2; 34 35void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, 36 uint32_t xend, uint32_t outstep) { 37 const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr; 38 RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; 39 const void **oldIns = kparams->ins; 40 uint32_t *oldStrides = kparams->inEStrides; 41 42 std::vector<const void*> ins(DefaultKernelArgCount); 43 std::vector<uint32_t> strides(DefaultKernelArgCount); 44 45 for (CPUClosure* cpuClosure : closures) { 46 const Closure* closure = cpuClosure->mClosure; 47 48 auto in_iter = ins.begin(); 49 auto stride_iter = strides.begin(); 50 51 for (size_t i = 0; i < closure->mNumArg; i++) { 52 const void* arg = closure->mArgs[i]; 53 const Allocation* a = (const Allocation*)arg; 54 const uint32_t eStride = a->mHal.state.elementSizeBytes; 55 const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + 56 eStride * xstart; 57 if (kparams->dimY > 1) { 58 ptr += a->mHal.drvState.lod[0].stride * kparams->y; 59 } 60 *in_iter++ = ptr; 61 *stride_iter++ = eStride; 62 } 63 64 mutable_kparams->ins = &ins[0]; 65 mutable_kparams->inEStrides = &strides[0]; 66 67 const Allocation* out = closure->mReturnValue; 68 const uint32_t ostep = out->mHal.state.elementSizeBytes; 69 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + 70 ostep * xstart; 71 if (kparams->dimY > 1) { 72 ptr += out->mHal.drvState.lod[0].stride * kparams->y; 73 } 74 75 mutable_kparams->out = (void*)ptr; 76 77 mutable_kparams->usr = cpuClosure->mUsrPtr; 78 79 cpuClosure->mFunc(kparams, xstart, xend, ostep); 80 } 81 82 mutable_kparams->ins = oldIns; 83 mutable_kparams->inEStrides = oldStrides; 84 mutable_kparams->usr = &closures; 85} 86 87} // namespace 88 89Batch::~Batch() { 90 for (CPUClosure* c : mClosures) { 91 delete c; 92 } 93 if (mScriptObj) { 94 dlclose(mScriptObj); 95 } 96} 97 98bool Batch::conflict(CPUClosure* cpuClosure) const { 99 if (mClosures.empty()) { 100 return false; 101 } 102 103 const Closure* closure = cpuClosure->mClosure; 104 105 if (closure->mKernelID.get() == nullptr || 106 mClosures.front()->mClosure->mKernelID.get() == nullptr) { 107 // An invoke should be in a batch by itself, so it conflicts with any other 108 // closure. 109 return true; 110 } 111 112 const auto& globalDeps = closure->mGlobalDeps; 113 const auto& argDeps = closure->mArgDeps; 114 115 for (CPUClosure* c : mClosures) { 116 const Closure* batched = c->mClosure; 117 if (globalDeps.find(batched) != globalDeps.end()) { 118 return true; 119 } 120 const auto& it = argDeps.find(batched); 121 if (it != argDeps.end()) { 122 const auto& args = (*it).second; 123 for (const auto &p1 : *args) { 124 if (p1.second->get() != nullptr) { 125 return true; 126 } 127 } 128 } 129 } 130 131 return false; 132} 133 134CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, 135 const ScriptGroupBase *sg) : 136 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { 137 rsAssert(!mGroup->mClosures.empty()); 138 139 Batch* batch = new Batch(this); 140 for (Closure* closure: mGroup->mClosures) { 141 const ScriptKernelID* kernelID = closure->mKernelID.get(); 142 RsdCpuScriptImpl* si; 143 CPUClosure* cc; 144 if (kernelID != nullptr) { 145 si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); 146 MTLaunchStruct mtls; 147 si->forEachKernelSetup(kernelID->mSlot, &mtls); 148 // TODO: Is mtls.fep.usrLen ever used? 149 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, 150 mtls.fep.usr, mtls.fep.usrLen); 151 } else { 152 si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript( 153 closure->mInvokeID->mScript); 154 cc = new CPUClosure(closure, si); 155 } 156 157 if (batch->conflict(cc)) { 158 mBatches.push_back(batch); 159 batch = new Batch(this); 160 } 161 162 batch->mClosures.push_back(cc); 163 } 164 165 rsAssert(!batch->mClosures.empty()); 166 mBatches.push_back(batch); 167 168#ifndef RS_COMPATIBILITY_LIB 169 for (Batch* batch : mBatches) { 170 batch->tryToCreateFusedKernel(mGroup->mCacheDir); 171 } 172#endif 173} 174 175CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { 176 for (Batch* batch : mBatches) { 177 delete batch; 178 } 179} 180 181namespace { 182 183#ifndef RS_COMPATIBILITY_LIB 184 185string getFileName(string path) { 186 unsigned found = path.find_last_of("/\\"); 187 return path.substr(found + 1); 188} 189 190void setupCompileArguments( 191 const vector<string>& inputs, const vector<int>& kernels, 192 const string& output_dir, const string& output_filename, 193 const string& rsLib, vector<const char*>* args) { 194 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); 195 args->push_back("-fPIC"); 196 args->push_back("-embedRSInfo"); 197 args->push_back("-mtriple"); 198 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 199 args->push_back("-bclib"); 200 args->push_back(rsLib.c_str()); 201 for (const string& input : inputs) { 202 args->push_back(input.c_str()); 203 } 204 for (int kernel : kernels) { 205 args->push_back("-k"); 206 string strKernel = std::to_string(kernel); 207 args->push_back(strKernel.c_str()); 208 } 209 args->push_back("-output_path"); 210 args->push_back(output_dir.c_str()); 211 args->push_back("-o"); 212 args->push_back(output_filename.c_str()); 213 args->push_back(nullptr); 214} 215 216string convertListToString(int n, const char* const* strs) { 217 string ret; 218 ret.append(strs[0]); 219 for (int i = 1; i < n; i++) { 220 ret.append(" "); 221 ret.append(strs[i]); 222 } 223 return ret; 224} 225 226bool fuseAndCompile(const char** arguments, 227 const string& commandLine) { 228 const pid_t pid = fork(); 229 230 if (pid == -1) { 231 ALOGE("Couldn't fork for bcc execution"); 232 return false; 233 } 234 235 if (pid == 0) { 236 // Child process 237 ALOGV("Invoking BCC with: %s", commandLine.c_str()); 238 execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); 239 240 ALOGE("execv() failed: %s", strerror(errno)); 241 abort(); 242 return false; 243 } 244 245 // Parent process 246 int status = 0; 247 const pid_t w = waitpid(pid, &status, 0); 248 if (w == -1) { 249 return false; 250 } 251 252 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { 253 ALOGE("bcc terminated unexpectedly"); 254 return false; 255 } 256 257 return true; 258} 259#endif 260 261} // anonymous namespace 262 263void Batch::tryToCreateFusedKernel(const char *cacheDir) { 264#ifndef RS_COMPATIBILITY_LIB 265 if (mClosures.size() < 2) { 266 return; 267 } 268 269 //===--------------------------------------------------------------------===// 270 // Fuse the input kernels and generate native code in an object file 271 //===--------------------------------------------------------------------===// 272 273 std::vector<string> inputFiles; 274 std::vector<int> slots; 275 276 for (CPUClosure* cpuClosure : mClosures) { 277 const Closure* closure = cpuClosure->mClosure; 278 const ScriptKernelID* kernelID = closure->mKernelID.get(); 279 const Script* script = kernelID->mScript; 280 281 if (script->isIntrinsic()) { 282 return; 283 } 284 285 const RsdCpuScriptImpl *cpuScript = 286 (const RsdCpuScriptImpl*)script->mHal.drv; 287 288 const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); 289 290 inputFiles.push_back(bitcodeFilename); 291 slots.push_back(kernelID->mSlot); 292 } 293 294 rsAssert(cacheDir != nullptr); 295 string objFilePath(cacheDir); 296 objFilePath.append("/fusedXXXXXX.o"); 297 // Find unique object file name, to make following file names unique. 298 int tempfd = mkstemps(&objFilePath[0], 2); 299 if (tempfd == -1) { 300 return; 301 } 302 TEMP_FAILURE_RETRY(close(tempfd)); 303 304 string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); 305 string rsLibPath(SYSLIBPATH"/libclcore.bc"); 306 vector<const char*> arguments; 307 setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath, 308 &arguments); 309 string commandLine = 310 convertListToString(arguments.size() - 1, arguments.data()); 311 312 if (!fuseAndCompile(arguments.data(), commandLine)) { 313 unlink(objFilePath.c_str()); 314 return; 315 } 316 317 //===--------------------------------------------------------------------===// 318 // Create and load the shared lib 319 //===--------------------------------------------------------------------===// 320 321 const char* resName = outputFileName.c_str(); 322 323 if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { 324 ALOGE("Failed to link object file '%s'", resName); 325 return; 326 } 327 328 void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 329 if (mSharedObj == nullptr) { 330 ALOGE("Unable to load '%s'", resName); 331 return; 332 } 333 334 mExecutable = ScriptExecutable::createFromSharedObject( 335 nullptr, // RS context. Unused. 336 mSharedObj); 337 338#endif // RS_COMPATIBILITY_LIB 339} 340 341void CpuScriptGroup2Impl::execute() { 342 for (auto batch : mBatches) { 343 batch->setGlobalsForBatch(); 344 batch->run(); 345 } 346} 347 348void Batch::setGlobalsForBatch() { 349 for (CPUClosure* cpuClosure : mClosures) { 350 const Closure* closure = cpuClosure->mClosure; 351 const ScriptKernelID* kernelID = closure->mKernelID.get(); 352 Script* s; 353 if (kernelID != nullptr) { 354 s = kernelID->mScript; 355 } else { 356 s = cpuClosure->mClosure->mInvokeID->mScript; 357 } 358 for (const auto& p : closure->mGlobals) { 359 const void* value = p.second.first; 360 int size = p.second.second; 361 if (value == nullptr && size == 0) { 362 // This indicates the current closure depends on another closure for a 363 // global in their shared module (script). In this case we don't need to 364 // copy the value. For example, an invoke intializes a global variable 365 // which a kernel later reads. 366 continue; 367 } 368 rsAssert(p.first != nullptr); 369 ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)", 370 closure, p.first, p.first->mScript, p.first->mSlot); 371 // We use -1 size to indicate an ObjectBase rather than a primitive type 372 if (size < 0) { 373 s->setVarObj(p.first->mSlot, (ObjectBase*)value); 374 } else { 375 s->setVar(p.first->mSlot, (const void*)&value, size); 376 } 377 } 378 } 379} 380 381void Batch::run() { 382 if (mExecutable != nullptr) { 383 MTLaunchStruct mtls; 384 const CPUClosure* firstCpuClosure = mClosures.front(); 385 const CPUClosure* lastCpuClosure = mClosures.back(); 386 387 firstCpuClosure->mSi->forEachMtlsSetup( 388 (const Allocation**)firstCpuClosure->mClosure->mArgs, 389 firstCpuClosure->mClosure->mNumArg, 390 lastCpuClosure->mClosure->mReturnValue, 391 nullptr, 0, nullptr, &mtls); 392 393 mtls.script = nullptr; 394 mtls.fep.usr = nullptr; 395 mtls.kernel = mExecutable->getForEachFunction(0); 396 397 mGroup->getCpuRefImpl()->launchThreads( 398 (const Allocation**)firstCpuClosure->mClosure->mArgs, 399 firstCpuClosure->mClosure->mNumArg, 400 lastCpuClosure->mClosure->mReturnValue, 401 nullptr, &mtls); 402 403 return; 404 } 405 406 if (mClosures.size() == 1 && 407 mClosures.front()->mClosure->mKernelID.get() == nullptr) { 408 // This closure is for an invoke function 409 CPUClosure* cc = mClosures.front(); 410 const Closure* c = cc->mClosure; 411 const ScriptInvokeID* invokeID = c->mInvokeID; 412 rsAssert(invokeID != nullptr); 413 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); 414 return; 415 } 416 417 for (CPUClosure* cpuClosure : mClosures) { 418 const Closure* closure = cpuClosure->mClosure; 419 const ScriptKernelID* kernelID = closure->mKernelID.get(); 420 cpuClosure->mSi->preLaunch(kernelID->mSlot, 421 (const Allocation**)closure->mArgs, 422 closure->mNumArg, closure->mReturnValue, 423 cpuClosure->mUsrPtr, cpuClosure->mUsrSize, 424 nullptr); 425 } 426 427 const CPUClosure* cpuClosure = mClosures.front(); 428 const Closure* closure = cpuClosure->mClosure; 429 MTLaunchStruct mtls; 430 431 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, 432 closure->mNumArg, 433 closure->mReturnValue, 434 nullptr, 0, nullptr, &mtls)) { 435 436 mtls.script = nullptr; 437 mtls.kernel = (void (*)())&groupRoot; 438 mtls.fep.usr = &mClosures; 439 440 mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); 441 } 442 443 for (CPUClosure* cpuClosure : mClosures) { 444 const Closure* closure = cpuClosure->mClosure; 445 const ScriptKernelID* kernelID = closure->mKernelID.get(); 446 cpuClosure->mSi->postLaunch(kernelID->mSlot, 447 (const Allocation**)closure->mArgs, 448 closure->mNumArg, closure->mReturnValue, 449 nullptr, 0, nullptr); 450 } 451} 452 453} // namespace renderscript 454} // namespace android 455