rsCpuScript.cpp revision d4ecb17adc9b099351f2ca1779a74f5283f20a3d
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 19#include "rsCpuCore.h" 20 21#include "rsCpuScript.h" 22//#include "rsdRuntime.h" 23//#include "rsdAllocation.h" 24//#include "rsCpuIntrinsics.h" 25 26 27#include "utils/Vector.h" 28#include "utils/Timers.h" 29#include "utils/StopWatch.h" 30 31 32#include <bcc/BCCContext.h> 33#include <bcc/Renderscript/RSCompilerDriver.h> 34#include <bcc/Renderscript/RSExecutable.h> 35#include <bcc/Renderscript/RSInfo.h> 36 37namespace android { 38namespace renderscript { 39 40 41 42RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) { 43 mCtx = ctx; 44 mScript = s; 45 46 mRoot = NULL; 47 mRootExpand = NULL; 48 mInit = NULL; 49 mFreeChildren = NULL; 50 51 mCompilerContext = NULL; 52 mCompilerDriver = NULL; 53 mExecutable = NULL; 54 55 mBoundAllocs = NULL; 56 mIntrinsicData = NULL; 57 mIsThreadable = true; 58} 59 60 61bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir, 62 uint8_t const *bitcode, size_t bitcodeSize, 63 uint32_t flags) { 64 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 65 //ALOGE("rsdScriptInit %p %p", rsc, script); 66 67 mCtx->lockMutex(); 68 69 bcc::RSExecutable *exec; 70 const bcc::RSInfo *info; 71 72 mCompilerContext = NULL; 73 mCompilerDriver = NULL; 74 mExecutable = NULL; 75 76 mCompilerContext = new bcc::BCCContext(); 77 if (mCompilerContext == NULL) { 78 ALOGE("bcc: FAILS to create compiler context (out of memory)"); 79 mCtx->unlockMutex(); 80 return false; 81 } 82 83 mCompilerDriver = new bcc::RSCompilerDriver(); 84 if (mCompilerDriver == NULL) { 85 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 86 mCtx->unlockMutex(); 87 return false; 88 } 89 90 mCompilerDriver->setRSRuntimeLookupFunction(lookupRuntimeStub); 91 mCompilerDriver->setRSRuntimeLookupContext(this); 92 93 exec = mCompilerDriver->build(*mCompilerContext, cacheDir, resName, 94 (const char *)bitcode, bitcodeSize, NULL); 95 96 if (exec == NULL) { 97 ALOGE("bcc: FAILS to prepare executable for '%s'", resName); 98 mCtx->unlockMutex(); 99 return false; 100 } 101 102 mExecutable = exec; 103 104 exec->setThreadable(mIsThreadable); 105 if (!exec->syncInfo()) { 106 ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); 107 } 108 109 mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root")); 110 mRootExpand = 111 reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand")); 112 mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init")); 113 mFreeChildren = 114 reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor")); 115 116 117 info = &mExecutable->getInfo(); 118 if (info->getExportVarNames().size()) { 119 mBoundAllocs = new Allocation *[info->getExportVarNames().size()]; 120 memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size()); 121 } 122 123 mCtx->unlockMutex(); 124 return true; 125} 126 127void RsdCpuScriptImpl::populateScript(Script *script) { 128 const bcc::RSInfo *info = &mExecutable->getInfo(); 129 130 // Copy info over to runtime 131 script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size(); 132 script->mHal.info.exportedVariableCount = info->getExportVarNames().size(); 133 script->mHal.info.exportedPragmaCount = info->getPragmas().size(); 134 script->mHal.info.exportedPragmaKeyList = 135 const_cast<const char**>(mExecutable->getPragmaKeys().array()); 136 script->mHal.info.exportedPragmaValueList = 137 const_cast<const char**>(mExecutable->getPragmaValues().array()); 138 139 if (mRootExpand) { 140 script->mHal.info.root = mRootExpand; 141 } else { 142 script->mHal.info.root = mRoot; 143 } 144} 145 146/* 147bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) { 148 pthread_mutex_lock(&rsdgInitMutex); 149 150 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 151 if (drv == NULL) { 152 goto error; 153 } 154 s->mHal.drv = drv; 155 drv->mIntrinsicID = iid; 156 drv->mIntrinsicData = rsdIntrinsic_Init(rsc, s, iid, &drv->mIntrinsicFuncs); 157 s->mHal.info.isThreadable = true; 158 159 pthread_mutex_unlock(&rsdgInitMutex); 160 return true; 161 162error: 163 pthread_mutex_unlock(&rsdgInitMutex); 164 return false; 165} 166*/ 167 168typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 169 170void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout, 171 const void * usr, uint32_t usrLen, 172 const RsScriptCall *sc, 173 MTLaunchStruct *mtls) { 174 175 memset(mtls, 0, sizeof(MTLaunchStruct)); 176 177 if (ain) { 178 mtls->fep.dimX = ain->getType()->getDimX(); 179 mtls->fep.dimY = ain->getType()->getDimY(); 180 mtls->fep.dimZ = ain->getType()->getDimZ(); 181 //mtls->dimArray = ain->getType()->getDimArray(); 182 } else if (aout) { 183 mtls->fep.dimX = aout->getType()->getDimX(); 184 mtls->fep.dimY = aout->getType()->getDimY(); 185 mtls->fep.dimZ = aout->getType()->getDimZ(); 186 //mtls->dimArray = aout->getType()->getDimArray(); 187 } else { 188 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 189 return; 190 } 191 192 if (!sc || (sc->xEnd == 0)) { 193 mtls->xEnd = mtls->fep.dimX; 194 } else { 195 rsAssert(sc->xStart < mtls->fep.dimX); 196 rsAssert(sc->xEnd <= mtls->fep.dimX); 197 rsAssert(sc->xStart < sc->xEnd); 198 mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); 199 mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); 200 if (mtls->xStart >= mtls->xEnd) return; 201 } 202 203 if (!sc || (sc->yEnd == 0)) { 204 mtls->yEnd = mtls->fep.dimY; 205 } else { 206 rsAssert(sc->yStart < mtls->fep.dimY); 207 rsAssert(sc->yEnd <= mtls->fep.dimY); 208 rsAssert(sc->yStart < sc->yEnd); 209 mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); 210 mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); 211 if (mtls->yStart >= mtls->yEnd) return; 212 } 213 214 if (!sc || (sc->zEnd == 0)) { 215 mtls->zEnd = mtls->fep.dimZ; 216 } else { 217 rsAssert(sc->zStart < mtls->fep.dimZ); 218 rsAssert(sc->zEnd <= mtls->fep.dimZ); 219 rsAssert(sc->zStart < sc->zEnd); 220 mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart); 221 mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd); 222 if (mtls->zStart >= mtls->zEnd) return; 223 } 224 225 mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); 226 mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); 227 mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); 228 mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); 229 230 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 231 232 mtls->rsc = mCtx; 233 mtls->ain = ain; 234 mtls->aout = aout; 235 mtls->fep.usr = usr; 236 mtls->fep.usrLen = usrLen; 237 mtls->mSliceSize = 1; 238 mtls->mSliceNum = 0; 239 240 mtls->fep.ptrIn = NULL; 241 mtls->fep.eStrideIn = 0; 242 mtls->isThreadable = mIsThreadable; 243 244 if (ain) { 245 mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr; 246 mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes(); 247 mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride; 248 } 249 250 mtls->fep.ptrOut = NULL; 251 mtls->fep.eStrideOut = 0; 252 if (aout) { 253 mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; 254 mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); 255 mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; 256 } 257} 258 259 260void RsdCpuScriptImpl::invokeForEach(uint32_t slot, 261 const Allocation * ain, 262 Allocation * aout, 263 const void * usr, 264 uint32_t usrLen, 265 const RsScriptCall *sc) { 266 267 MTLaunchStruct mtls; 268 forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls); 269 forEachKernelSetup(slot, &mtls); 270 271 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 272 mCtx->launchThreads(ain, aout, sc, &mtls); 273 mCtx->setTLS(oldTLS); 274} 275 276void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { 277 278 mtls->script = this; 279 mtls->fep.slot = slot; 280 281 rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size()); 282 mtls->kernel = reinterpret_cast<ForEachFunc_t>( 283 mExecutable->getExportForeachFuncAddrs()[slot]); 284 rsAssert(mtls->kernel != NULL); 285 mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second; 286} 287 288int RsdCpuScriptImpl::invokeRoot() { 289 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 290 int ret = mRoot(); 291 mCtx->setTLS(oldTLS); 292 return ret; 293} 294 295void RsdCpuScriptImpl::invokeInit() { 296 if (mInit) { 297 mInit(); 298 } 299} 300 301void RsdCpuScriptImpl::invokeFreeChildren() { 302 if (mFreeChildren) { 303 mFreeChildren(); 304 } 305} 306 307void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, 308 size_t paramLength) { 309 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 310 311 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 312 reinterpret_cast<void (*)(const void *, uint32_t)>( 313 mExecutable->getExportFuncAddrs()[slot])(params, paramLength); 314 mCtx->setTLS(oldTLS); 315} 316 317void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 318 //rsAssert(!script->mFieldIsObject[slot]); 319 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 320 321 //if (mIntrinsicID) { 322 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); 323 //return; 324 //} 325 326 int32_t *destPtr = reinterpret_cast<int32_t *>( 327 mExecutable->getExportVarAddrs()[slot]); 328 if (!destPtr) { 329 //ALOGV("Calling setVar on slot = %i which is null", slot); 330 return; 331 } 332 333 memcpy(destPtr, data, dataLength); 334} 335 336void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, 337 const Element *elem, 338 const size_t *dims, size_t dimLength) { 339 340 int32_t *destPtr = reinterpret_cast<int32_t *>( 341 mExecutable->getExportVarAddrs()[slot]); 342 if (!destPtr) { 343 //ALOGV("Calling setVar on slot = %i which is null", slot); 344 return; 345 } 346 347 // We want to look at dimension in terms of integer components, 348 // but dimLength is given in terms of bytes. 349 dimLength /= sizeof(int); 350 351 // Only a single dimension is currently supported. 352 rsAssert(dimLength == 1); 353 if (dimLength == 1) { 354 // First do the increment loop. 355 size_t stride = elem->getSizeBytes(); 356 const char *cVal = reinterpret_cast<const char *>(data); 357 for (size_t i = 0; i < dims[0]; i++) { 358 elem->incRefs(cVal); 359 cVal += stride; 360 } 361 362 // Decrement loop comes after (to prevent race conditions). 363 char *oldVal = reinterpret_cast<char *>(destPtr); 364 for (size_t i = 0; i < dims[0]; i++) { 365 elem->decRefs(oldVal); 366 oldVal += stride; 367 } 368 } 369 370 memcpy(destPtr, data, dataLength); 371} 372 373void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) { 374 375 //rsAssert(!script->mFieldIsObject[slot]); 376 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 377 378 int32_t *destPtr = reinterpret_cast<int32_t *>( 379 mExecutable->getExportVarAddrs()[slot]); 380 if (!destPtr) { 381 //ALOGV("Calling setVar on slot = %i which is null", slot); 382 return; 383 } 384 385 void *ptr = NULL; 386 mBoundAllocs[slot] = data; 387 if(data) { 388 ptr = data->mHal.drvState.lod[0].mallocPtr; 389 } 390 memcpy(destPtr, &ptr, sizeof(void *)); 391} 392 393void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { 394 395 //rsAssert(script->mFieldIsObject[slot]); 396 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 397 398 //if (mIntrinsicID) { 399 //mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, alloc); 400 //return; 401 //} 402 403 int32_t *destPtr = reinterpret_cast<int32_t *>( 404 mExecutable->getExportVarAddrs()[slot]); 405 if (!destPtr) { 406 //ALOGV("Calling setVar on slot = %i which is null", slot); 407 return; 408 } 409 410 rsrSetObject(mCtx->getContext(), (ObjectBase **)destPtr, data); 411} 412 413RsdCpuScriptImpl::~RsdCpuScriptImpl() { 414 415 if (mExecutable) { 416 Vector<void *>::const_iterator var_addr_iter = 417 mExecutable->getExportVarAddrs().begin(); 418 Vector<void *>::const_iterator var_addr_end = 419 mExecutable->getExportVarAddrs().end(); 420 421 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = 422 mExecutable->getInfo().getObjectSlots().begin(); 423 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = 424 mExecutable->getInfo().getObjectSlots().end(); 425 426 while ((var_addr_iter != var_addr_end) && 427 (is_object_iter != is_object_end)) { 428 // The field address can be NULL if the script-side has optimized 429 // the corresponding global variable away. 430 ObjectBase **obj_addr = 431 reinterpret_cast<ObjectBase **>(*var_addr_iter); 432 if (*is_object_iter) { 433 if (*var_addr_iter != NULL) { 434 rsrClearObject(mCtx->getContext(), obj_addr); 435 } 436 } 437 var_addr_iter++; 438 is_object_iter++; 439 } 440 } 441 442 if (mCompilerContext) { 443 delete mCompilerContext; 444 } 445 if (mCompilerDriver) { 446 delete mCompilerDriver; 447 } 448 if (mExecutable) { 449 delete mExecutable; 450 } 451 if (mBoundAllocs) { 452 delete[] mBoundAllocs; 453 } 454} 455 456Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { 457 if (!ptr) { 458 return NULL; 459 } 460 461 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) { 462 Allocation *a = mBoundAllocs[ct]; 463 if (!a) continue; 464 if (a->mHal.drvState.lod[0].mallocPtr == ptr) { 465 return a; 466 } 467 } 468 ALOGE("rsGetAllocation, failed to find %p", ptr); 469 return NULL; 470} 471 472 473} 474} 475