rsCpuScript.cpp revision cadfac411e6690e39de36c4f9e94deb9b7d2d08e
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 19#include "rsCpuCore.h" 20 21#include "rsCpuScript.h" 22//#include "rsdRuntime.h" 23//#include "rsdAllocation.h" 24//#include "rsCpuIntrinsics.h" 25 26 27#include "utils/Vector.h" 28#include "utils/Timers.h" 29#include "utils/StopWatch.h" 30 31 32#include <bcc/BCCContext.h> 33#include <bcc/Renderscript/RSCompilerDriver.h> 34#include <bcc/Renderscript/RSExecutable.h> 35#include <bcc/Renderscript/RSInfo.h> 36 37namespace android { 38namespace renderscript { 39 40 41 42RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) { 43 mCtx = ctx; 44 mScript = s; 45 46 mRoot = NULL; 47 mRootExpand = NULL; 48 mInit = NULL; 49 mFreeChildren = NULL; 50 51 mCompilerContext = NULL; 52 mCompilerDriver = NULL; 53 mExecutable = NULL; 54 55 mBoundAllocs = NULL; 56 mIntrinsicData = NULL; 57 mIsThreadable = true; 58} 59 60 61bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir, 62 uint8_t const *bitcode, size_t bitcodeSize, 63 uint32_t flags) { 64 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 65 //ALOGE("rsdScriptInit %p %p", rsc, script); 66 67 mCtx->lockMutex(); 68 69 bcc::RSExecutable *exec; 70 const bcc::RSInfo *info; 71 72 mCompilerContext = NULL; 73 mCompilerDriver = NULL; 74 mExecutable = NULL; 75 76 mCompilerContext = new bcc::BCCContext(); 77 if (mCompilerContext == NULL) { 78 ALOGE("bcc: FAILS to create compiler context (out of memory)"); 79 mCtx->unlockMutex(); 80 return false; 81 } 82 83 mCompilerDriver = new bcc::RSCompilerDriver(); 84 if (mCompilerDriver == NULL) { 85 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 86 mCtx->unlockMutex(); 87 return false; 88 } 89 90 mCompilerDriver->setRSRuntimeLookupFunction(lookupRuntimeStub); 91 mCompilerDriver->setRSRuntimeLookupContext(this); 92 93 exec = mCompilerDriver->build(*mCompilerContext, cacheDir, resName, 94 (const char *)bitcode, bitcodeSize, NULL, 95 mCtx->getLinkRuntimeCallback()); 96 97 if (exec == NULL) { 98 ALOGE("bcc: FAILS to prepare executable for '%s'", resName); 99 mCtx->unlockMutex(); 100 return false; 101 } 102 103 mExecutable = exec; 104 105 exec->setThreadable(mIsThreadable); 106 if (!exec->syncInfo()) { 107 ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); 108 } 109 110 mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root")); 111 mRootExpand = 112 reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand")); 113 mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init")); 114 mFreeChildren = 115 reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor")); 116 117 118 info = &mExecutable->getInfo(); 119 if (info->getExportVarNames().size()) { 120 mBoundAllocs = new Allocation *[info->getExportVarNames().size()]; 121 memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size()); 122 } 123 124 mCtx->unlockMutex(); 125 return true; 126} 127 128void RsdCpuScriptImpl::populateScript(Script *script) { 129 const bcc::RSInfo *info = &mExecutable->getInfo(); 130 131 // Copy info over to runtime 132 script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size(); 133 script->mHal.info.exportedVariableCount = info->getExportVarNames().size(); 134 script->mHal.info.exportedPragmaCount = info->getPragmas().size(); 135 script->mHal.info.exportedPragmaKeyList = 136 const_cast<const char**>(mExecutable->getPragmaKeys().array()); 137 script->mHal.info.exportedPragmaValueList = 138 const_cast<const char**>(mExecutable->getPragmaValues().array()); 139 140 if (mRootExpand) { 141 script->mHal.info.root = mRootExpand; 142 } else { 143 script->mHal.info.root = mRoot; 144 } 145} 146 147 148typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 149 150void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout, 151 const void * usr, uint32_t usrLen, 152 const RsScriptCall *sc, 153 MTLaunchStruct *mtls) { 154 155 memset(mtls, 0, sizeof(MTLaunchStruct)); 156 157 // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface 158 if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) { 159 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 160 return; 161 } 162 if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) { 163 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 164 return; 165 } 166 167 if (ain) { 168 mtls->fep.dimX = ain->getType()->getDimX(); 169 mtls->fep.dimY = ain->getType()->getDimY(); 170 mtls->fep.dimZ = ain->getType()->getDimZ(); 171 //mtls->dimArray = ain->getType()->getDimArray(); 172 } else if (aout) { 173 mtls->fep.dimX = aout->getType()->getDimX(); 174 mtls->fep.dimY = aout->getType()->getDimY(); 175 mtls->fep.dimZ = aout->getType()->getDimZ(); 176 //mtls->dimArray = aout->getType()->getDimArray(); 177 } else { 178 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 179 return; 180 } 181 182 if (!sc || (sc->xEnd == 0)) { 183 mtls->xEnd = mtls->fep.dimX; 184 } else { 185 rsAssert(sc->xStart < mtls->fep.dimX); 186 rsAssert(sc->xEnd <= mtls->fep.dimX); 187 rsAssert(sc->xStart < sc->xEnd); 188 mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); 189 mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); 190 if (mtls->xStart >= mtls->xEnd) return; 191 } 192 193 if (!sc || (sc->yEnd == 0)) { 194 mtls->yEnd = mtls->fep.dimY; 195 } else { 196 rsAssert(sc->yStart < mtls->fep.dimY); 197 rsAssert(sc->yEnd <= mtls->fep.dimY); 198 rsAssert(sc->yStart < sc->yEnd); 199 mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); 200 mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); 201 if (mtls->yStart >= mtls->yEnd) return; 202 } 203 204 if (!sc || (sc->zEnd == 0)) { 205 mtls->zEnd = mtls->fep.dimZ; 206 } else { 207 rsAssert(sc->zStart < mtls->fep.dimZ); 208 rsAssert(sc->zEnd <= mtls->fep.dimZ); 209 rsAssert(sc->zStart < sc->zEnd); 210 mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart); 211 mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd); 212 if (mtls->zStart >= mtls->zEnd) return; 213 } 214 215 mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); 216 mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); 217 mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); 218 mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); 219 220 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 221 222 mtls->rsc = mCtx; 223 mtls->ain = ain; 224 mtls->aout = aout; 225 mtls->fep.usr = usr; 226 mtls->fep.usrLen = usrLen; 227 mtls->mSliceSize = 1; 228 mtls->mSliceNum = 0; 229 230 mtls->fep.ptrIn = NULL; 231 mtls->fep.eStrideIn = 0; 232 mtls->isThreadable = mIsThreadable; 233 234 if (ain) { 235 mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr; 236 mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes(); 237 mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride; 238 } 239 240 mtls->fep.ptrOut = NULL; 241 mtls->fep.eStrideOut = 0; 242 if (aout) { 243 mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; 244 mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); 245 mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; 246 } 247} 248 249 250void RsdCpuScriptImpl::invokeForEach(uint32_t slot, 251 const Allocation * ain, 252 Allocation * aout, 253 const void * usr, 254 uint32_t usrLen, 255 const RsScriptCall *sc) { 256 257 MTLaunchStruct mtls; 258 forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls); 259 forEachKernelSetup(slot, &mtls); 260 261 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 262 mCtx->launchThreads(ain, aout, sc, &mtls); 263 mCtx->setTLS(oldTLS); 264} 265 266void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { 267 268 mtls->script = this; 269 mtls->fep.slot = slot; 270 271 rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size()); 272 mtls->kernel = reinterpret_cast<ForEachFunc_t>( 273 mExecutable->getExportForeachFuncAddrs()[slot]); 274 rsAssert(mtls->kernel != NULL); 275 mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second; 276} 277 278int RsdCpuScriptImpl::invokeRoot() { 279 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 280 int ret = mRoot(); 281 mCtx->setTLS(oldTLS); 282 return ret; 283} 284 285void RsdCpuScriptImpl::invokeInit() { 286 if (mInit) { 287 mInit(); 288 } 289} 290 291void RsdCpuScriptImpl::invokeFreeChildren() { 292 if (mFreeChildren) { 293 mFreeChildren(); 294 } 295} 296 297void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, 298 size_t paramLength) { 299 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 300 301 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 302 reinterpret_cast<void (*)(const void *, uint32_t)>( 303 mExecutable->getExportFuncAddrs()[slot])(params, paramLength); 304 mCtx->setTLS(oldTLS); 305} 306 307void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 308 //rsAssert(!script->mFieldIsObject[slot]); 309 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 310 311 //if (mIntrinsicID) { 312 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); 313 //return; 314 //} 315 316 int32_t *destPtr = reinterpret_cast<int32_t *>( 317 mExecutable->getExportVarAddrs()[slot]); 318 if (!destPtr) { 319 //ALOGV("Calling setVar on slot = %i which is null", slot); 320 return; 321 } 322 323 memcpy(destPtr, data, dataLength); 324} 325 326void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, 327 const Element *elem, 328 const size_t *dims, size_t dimLength) { 329 330 int32_t *destPtr = reinterpret_cast<int32_t *>( 331 mExecutable->getExportVarAddrs()[slot]); 332 if (!destPtr) { 333 //ALOGV("Calling setVar on slot = %i which is null", slot); 334 return; 335 } 336 337 // We want to look at dimension in terms of integer components, 338 // but dimLength is given in terms of bytes. 339 dimLength /= sizeof(int); 340 341 // Only a single dimension is currently supported. 342 rsAssert(dimLength == 1); 343 if (dimLength == 1) { 344 // First do the increment loop. 345 size_t stride = elem->getSizeBytes(); 346 const char *cVal = reinterpret_cast<const char *>(data); 347 for (size_t i = 0; i < dims[0]; i++) { 348 elem->incRefs(cVal); 349 cVal += stride; 350 } 351 352 // Decrement loop comes after (to prevent race conditions). 353 char *oldVal = reinterpret_cast<char *>(destPtr); 354 for (size_t i = 0; i < dims[0]; i++) { 355 elem->decRefs(oldVal); 356 oldVal += stride; 357 } 358 } 359 360 memcpy(destPtr, data, dataLength); 361} 362 363void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) { 364 365 //rsAssert(!script->mFieldIsObject[slot]); 366 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 367 368 int32_t *destPtr = reinterpret_cast<int32_t *>( 369 mExecutable->getExportVarAddrs()[slot]); 370 if (!destPtr) { 371 //ALOGV("Calling setVar on slot = %i which is null", slot); 372 return; 373 } 374 375 void *ptr = NULL; 376 mBoundAllocs[slot] = data; 377 if(data) { 378 ptr = data->mHal.drvState.lod[0].mallocPtr; 379 } 380 memcpy(destPtr, &ptr, sizeof(void *)); 381} 382 383void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { 384 385 //rsAssert(script->mFieldIsObject[slot]); 386 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 387 388 //if (mIntrinsicID) { 389 //mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, alloc); 390 //return; 391 //} 392 393 int32_t *destPtr = reinterpret_cast<int32_t *>( 394 mExecutable->getExportVarAddrs()[slot]); 395 if (!destPtr) { 396 //ALOGV("Calling setVar on slot = %i which is null", slot); 397 return; 398 } 399 400 rsrSetObject(mCtx->getContext(), (ObjectBase **)destPtr, data); 401} 402 403RsdCpuScriptImpl::~RsdCpuScriptImpl() { 404 405 if (mExecutable) { 406 Vector<void *>::const_iterator var_addr_iter = 407 mExecutable->getExportVarAddrs().begin(); 408 Vector<void *>::const_iterator var_addr_end = 409 mExecutable->getExportVarAddrs().end(); 410 411 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = 412 mExecutable->getInfo().getObjectSlots().begin(); 413 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = 414 mExecutable->getInfo().getObjectSlots().end(); 415 416 while ((var_addr_iter != var_addr_end) && 417 (is_object_iter != is_object_end)) { 418 // The field address can be NULL if the script-side has optimized 419 // the corresponding global variable away. 420 ObjectBase **obj_addr = 421 reinterpret_cast<ObjectBase **>(*var_addr_iter); 422 if (*is_object_iter) { 423 if (*var_addr_iter != NULL) { 424 rsrClearObject(mCtx->getContext(), obj_addr); 425 } 426 } 427 var_addr_iter++; 428 is_object_iter++; 429 } 430 } 431 432 if (mCompilerContext) { 433 delete mCompilerContext; 434 } 435 if (mCompilerDriver) { 436 delete mCompilerDriver; 437 } 438 if (mExecutable) { 439 delete mExecutable; 440 } 441 if (mBoundAllocs) { 442 delete[] mBoundAllocs; 443 } 444} 445 446Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { 447 if (!ptr) { 448 return NULL; 449 } 450 451 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) { 452 Allocation *a = mBoundAllocs[ct]; 453 if (!a) continue; 454 if (a->mHal.drvState.lod[0].mallocPtr == ptr) { 455 return a; 456 } 457 } 458 ALOGE("rsGetAllocation, failed to find %p", ptr); 459 return NULL; 460} 461 462 463} 464} 465