rsdBcc.cpp revision 378d30b1da622e0e75e551e95fafb18ff3a3f88e
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20#include "rsdAllocation.h" 21 22#include <bcc/BCCContext.h> 23#include <bcc/Renderscript/RSCompilerDriver.h> 24#include <bcc/Renderscript/RSExecutable.h> 25#include <bcc/Renderscript/RSInfo.h> 26 27#include "rsContext.h" 28#include "rsElement.h" 29#include "rsScriptC.h" 30 31#include "utils/Vector.h" 32#include "utils/Timers.h" 33#include "utils/StopWatch.h" 34 35using namespace android; 36using namespace android::renderscript; 37 38struct DrvScript { 39 int (*mRoot)(); 40 int (*mRootExpand)(); 41 void (*mInit)(); 42 void (*mFreeChildren)(); 43 44 bcc::BCCContext *mCompilerContext; 45 bcc::RSCompilerDriver *mCompilerDriver; 46 bcc::RSExecutable *mExecutable; 47 48 Allocation **mBoundAllocs; 49}; 50 51typedef void (*outer_foreach_t)( 52 const android::renderscript::RsForEachStubParamStruct *, 53 uint32_t x1, uint32_t x2, 54 uint32_t instep, uint32_t outstep); 55 56static Script * setTLS(Script *sc) { 57 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 58 rsAssert(tls); 59 Script *old = tls->mScript; 60 tls->mScript = sc; 61 return old; 62} 63 64 65bool rsdScriptInit(const Context *rsc, 66 ScriptC *script, 67 char const *resName, 68 char const *cacheDir, 69 uint8_t const *bitcode, 70 size_t bitcodeSize, 71 uint32_t flags) { 72 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 73 //ALOGE("rsdScriptInit %p %p", rsc, script); 74 75 pthread_mutex_lock(&rsdgInitMutex); 76 77 const char* coreLib = "/system/lib/libclcore.bc"; 78 bcc::RSInfo::FloatPrecision prec; 79 bcc::RSExecutable *exec; 80 const bcc::RSInfo *info; 81 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 82 if (drv == NULL) { 83 goto error; 84 } 85 script->mHal.drv = drv; 86 87 drv->mCompilerContext = NULL; 88 drv->mCompilerDriver = NULL; 89 drv->mExecutable = NULL; 90 91 drv->mCompilerContext = new bcc::BCCContext(); 92 if (drv->mCompilerContext == NULL) { 93 ALOGE("bcc: FAILS to create compiler context (out of memory)"); 94 goto error; 95 } 96 97 drv->mCompilerDriver = new bcc::RSCompilerDriver(); 98 if (drv->mCompilerDriver == NULL) { 99 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 100 goto error; 101 } 102 103 script->mHal.info.isThreadable = true; 104 105 drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub); 106 drv->mCompilerDriver->setRSRuntimeLookupContext(script); 107 108 exec = drv->mCompilerDriver->build(*drv->mCompilerContext, 109 cacheDir, resName, 110 (const char *)bitcode, bitcodeSize); 111 112 if (exec == NULL) { 113 ALOGE("bcc: FAILS to prepare executable for '%s'", resName); 114 goto error; 115 } 116 117 drv->mExecutable = exec; 118 119 exec->setThreadable(script->mHal.info.isThreadable); 120 if (!exec->syncInfo()) { 121 ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); 122 } 123 124 drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root")); 125 drv->mRootExpand = 126 reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand")); 127 drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init")); 128 drv->mFreeChildren = 129 reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor")); 130 131 info = &drv->mExecutable->getInfo(); 132 // Copy info over to runtime 133 script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size(); 134 script->mHal.info.exportedVariableCount = info->getExportVarNames().size(); 135 script->mHal.info.exportedPragmaCount = info->getPragmas().size(); 136 script->mHal.info.exportedPragmaKeyList = 137 const_cast<const char**>(exec->getPragmaKeys().array()); 138 script->mHal.info.exportedPragmaValueList = 139 const_cast<const char**>(exec->getPragmaValues().array()); 140 141 if (drv->mRootExpand) { 142 script->mHal.info.root = drv->mRootExpand; 143 } else { 144 script->mHal.info.root = drv->mRoot; 145 } 146 147 if (script->mHal.info.exportedVariableCount) { 148 drv->mBoundAllocs = new Allocation *[script->mHal.info.exportedVariableCount]; 149 memset(drv->mBoundAllocs, 0, sizeof(void *) * script->mHal.info.exportedVariableCount); 150 } 151 152 pthread_mutex_unlock(&rsdgInitMutex); 153 return true; 154 155error: 156 157 pthread_mutex_unlock(&rsdgInitMutex); 158 if (drv) { 159 delete drv->mCompilerContext; 160 delete drv->mCompilerDriver; 161 delete drv->mExecutable; 162 delete[] drv->mBoundAllocs; 163 free(drv); 164 } 165 script->mHal.drv = NULL; 166 return false; 167 168} 169 170typedef struct { 171 Context *rsc; 172 Script *script; 173 ForEachFunc_t kernel; 174 uint32_t sig; 175 const Allocation * ain; 176 Allocation * aout; 177 const void * usr; 178 size_t usrLen; 179 180 uint32_t mSliceSize; 181 volatile int mSliceNum; 182 183 const uint8_t *ptrIn; 184 uint32_t eStrideIn; 185 uint8_t *ptrOut; 186 uint32_t eStrideOut; 187 188 uint32_t yStrideIn; 189 uint32_t yStrideOut; 190 191 uint32_t xStart; 192 uint32_t xEnd; 193 uint32_t yStart; 194 uint32_t yEnd; 195 uint32_t zStart; 196 uint32_t zEnd; 197 uint32_t arrayStart; 198 uint32_t arrayEnd; 199 200 uint32_t dimX; 201 uint32_t dimY; 202 uint32_t dimZ; 203 uint32_t dimArray; 204} MTLaunchStruct; 205typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 206 207static void wc_xy(void *usr, uint32_t idx) { 208 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 209 RsForEachStubParamStruct p; 210 memset(&p, 0, sizeof(p)); 211 p.usr = mtls->usr; 212 p.usr_len = mtls->usrLen; 213 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 214 uint32_t sig = mtls->sig; 215 216 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 217 while (1) { 218 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 219 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 220 uint32_t yEnd = yStart + mtls->mSliceSize; 221 yEnd = rsMin(yEnd, mtls->yEnd); 222 if (yEnd <= yStart) { 223 return; 224 } 225 226 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 227 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 228 for (p.y = yStart; p.y < yEnd; p.y++) { 229 p.out = mtls->ptrOut + (mtls->yStrideOut * p.y); 230 p.in = mtls->ptrIn + (mtls->yStrideIn * p.y); 231 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 232 } 233 } 234} 235 236static void wc_x(void *usr, uint32_t idx) { 237 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 238 RsForEachStubParamStruct p; 239 memset(&p, 0, sizeof(p)); 240 p.usr = mtls->usr; 241 p.usr_len = mtls->usrLen; 242 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 243 uint32_t sig = mtls->sig; 244 245 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 246 while (1) { 247 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 248 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 249 uint32_t xEnd = xStart + mtls->mSliceSize; 250 xEnd = rsMin(xEnd, mtls->xEnd); 251 if (xEnd <= xStart) { 252 return; 253 } 254 255 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 256 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 257 258 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 259 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 260 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 261 } 262} 263 264void rsdScriptInvokeForEach(const Context *rsc, 265 Script *s, 266 uint32_t slot, 267 const Allocation * ain, 268 Allocation * aout, 269 const void * usr, 270 uint32_t usrLen, 271 const RsScriptCall *sc) { 272 273 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 274 275 MTLaunchStruct mtls; 276 memset(&mtls, 0, sizeof(mtls)); 277 278 //ALOGE("for each script %p in %p out %p", s, ain, aout); 279 280 DrvScript *drv = (DrvScript *)s->mHal.drv; 281 rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size()); 282 mtls.kernel = reinterpret_cast<ForEachFunc_t>( 283 drv->mExecutable->getExportForeachFuncAddrs()[slot]); 284 rsAssert(mtls.kernel != NULL); 285 mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second; 286 287 if (ain) { 288 mtls.dimX = ain->getType()->getDimX(); 289 mtls.dimY = ain->getType()->getDimY(); 290 mtls.dimZ = ain->getType()->getDimZ(); 291 //mtls.dimArray = ain->getType()->getDimArray(); 292 } else if (aout) { 293 mtls.dimX = aout->getType()->getDimX(); 294 mtls.dimY = aout->getType()->getDimY(); 295 mtls.dimZ = aout->getType()->getDimZ(); 296 //mtls.dimArray = aout->getType()->getDimArray(); 297 } else { 298 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 299 return; 300 } 301 302 if (!sc || (sc->xEnd == 0)) { 303 mtls.xEnd = mtls.dimX; 304 } else { 305 rsAssert(sc->xStart < mtls.dimX); 306 rsAssert(sc->xEnd <= mtls.dimX); 307 rsAssert(sc->xStart < sc->xEnd); 308 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 309 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 310 if (mtls.xStart >= mtls.xEnd) return; 311 } 312 313 if (!sc || (sc->yEnd == 0)) { 314 mtls.yEnd = mtls.dimY; 315 } else { 316 rsAssert(sc->yStart < mtls.dimY); 317 rsAssert(sc->yEnd <= mtls.dimY); 318 rsAssert(sc->yStart < sc->yEnd); 319 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 320 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 321 if (mtls.yStart >= mtls.yEnd) return; 322 } 323 324 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 325 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 326 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 327 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 328 329 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 330 331 Context *mrsc = (Context *)rsc; 332 Script * oldTLS = setTLS(s); 333 334 mtls.rsc = mrsc; 335 mtls.ain = ain; 336 mtls.aout = aout; 337 mtls.script = s; 338 mtls.usr = usr; 339 mtls.usrLen = usrLen; 340 mtls.mSliceSize = 10; 341 mtls.mSliceNum = 0; 342 343 mtls.ptrIn = NULL; 344 mtls.eStrideIn = 0; 345 if (ain) { 346 DrvAllocation *aindrv = (DrvAllocation *)ain->mHal.drv; 347 mtls.ptrIn = (const uint8_t *)aindrv->lod[0].mallocPtr; 348 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 349 mtls.yStrideIn = aindrv->lod[0].stride; 350 } 351 352 mtls.ptrOut = NULL; 353 mtls.eStrideOut = 0; 354 if (aout) { 355 DrvAllocation *aoutdrv = (DrvAllocation *)aout->mHal.drv; 356 mtls.ptrOut = (uint8_t *)aoutdrv->lod[0].mallocPtr; 357 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 358 mtls.yStrideOut = aoutdrv->lod[0].stride; 359 } 360 361 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) { 362 dc->mInForEach = true; 363 if (mtls.dimY > 1) { 364 mtls.mSliceSize = mtls.dimY / (dc->mWorkers.mCount * 4); 365 if(mtls.mSliceSize < 1) { 366 mtls.mSliceSize = 1; 367 } 368 369 rsdLaunchThreads(mrsc, wc_xy, &mtls); 370 } else { 371 mtls.mSliceSize = mtls.dimX / (dc->mWorkers.mCount * 4); 372 if(mtls.mSliceSize < 1) { 373 mtls.mSliceSize = 1; 374 } 375 376 rsdLaunchThreads(mrsc, wc_x, &mtls); 377 } 378 dc->mInForEach = false; 379 380 //ALOGE("launch 1"); 381 } else { 382 RsForEachStubParamStruct p; 383 memset(&p, 0, sizeof(p)); 384 p.usr = mtls.usr; 385 p.usr_len = mtls.usrLen; 386 uint32_t sig = mtls.sig; 387 388 //ALOGE("launch 3"); 389 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 390 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 391 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 392 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 393 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 394 mtls.dimX * mtls.dimY * p.z + 395 mtls.dimX * p.y; 396 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 397 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 398 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 399 mtls.eStrideOut); 400 } 401 } 402 } 403 } 404 405 setTLS(oldTLS); 406} 407 408 409int rsdScriptInvokeRoot(const Context *dc, Script *script) { 410 DrvScript *drv = (DrvScript *)script->mHal.drv; 411 412 Script * oldTLS = setTLS(script); 413 int ret = drv->mRoot(); 414 setTLS(oldTLS); 415 416 return ret; 417} 418 419void rsdScriptInvokeInit(const Context *dc, Script *script) { 420 DrvScript *drv = (DrvScript *)script->mHal.drv; 421 422 if (drv->mInit) { 423 drv->mInit(); 424 } 425} 426 427void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 428 DrvScript *drv = (DrvScript *)script->mHal.drv; 429 430 if (drv->mFreeChildren) { 431 drv->mFreeChildren(); 432 } 433} 434 435void rsdScriptInvokeFunction(const Context *dc, Script *script, 436 uint32_t slot, 437 const void *params, 438 size_t paramLength) { 439 DrvScript *drv = (DrvScript *)script->mHal.drv; 440 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 441 442 Script * oldTLS = setTLS(script); 443 reinterpret_cast<void (*)(const void *, uint32_t)>( 444 drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength); 445 setTLS(oldTLS); 446} 447 448void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 449 uint32_t slot, void *data, size_t dataLength) { 450 DrvScript *drv = (DrvScript *)script->mHal.drv; 451 //rsAssert(!script->mFieldIsObject[slot]); 452 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 453 454 int32_t *destPtr = reinterpret_cast<int32_t *>( 455 drv->mExecutable->getExportVarAddrs()[slot]); 456 if (!destPtr) { 457 //ALOGV("Calling setVar on slot = %i which is null", slot); 458 return; 459 } 460 461 memcpy(destPtr, data, dataLength); 462} 463 464void rsdScriptSetGlobalVarWithElemDims( 465 const android::renderscript::Context *dc, 466 const android::renderscript::Script *script, 467 uint32_t slot, void *data, size_t dataLength, 468 const android::renderscript::Element *elem, 469 const size_t *dims, size_t dimLength) { 470 DrvScript *drv = (DrvScript *)script->mHal.drv; 471 472 int32_t *destPtr = reinterpret_cast<int32_t *>( 473 drv->mExecutable->getExportVarAddrs()[slot]); 474 if (!destPtr) { 475 //ALOGV("Calling setVar on slot = %i which is null", slot); 476 return; 477 } 478 479 // We want to look at dimension in terms of integer components, 480 // but dimLength is given in terms of bytes. 481 dimLength /= sizeof(int); 482 483 // Only a single dimension is currently supported. 484 rsAssert(dimLength == 1); 485 if (dimLength == 1) { 486 // First do the increment loop. 487 size_t stride = elem->getSizeBytes(); 488 char *cVal = reinterpret_cast<char *>(data); 489 for (size_t i = 0; i < dims[0]; i++) { 490 elem->incRefs(cVal); 491 cVal += stride; 492 } 493 494 // Decrement loop comes after (to prevent race conditions). 495 char *oldVal = reinterpret_cast<char *>(destPtr); 496 for (size_t i = 0; i < dims[0]; i++) { 497 elem->decRefs(oldVal); 498 oldVal += stride; 499 } 500 } 501 502 memcpy(destPtr, data, dataLength); 503} 504 505void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, Allocation *data) { 506 DrvScript *drv = (DrvScript *)script->mHal.drv; 507 508 //rsAssert(!script->mFieldIsObject[slot]); 509 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 510 511 int32_t *destPtr = reinterpret_cast<int32_t *>( 512 drv->mExecutable->getExportVarAddrs()[slot]); 513 if (!destPtr) { 514 //ALOGV("Calling setVar on slot = %i which is null", slot); 515 return; 516 } 517 518 void *ptr = NULL; 519 drv->mBoundAllocs[slot] = data; 520 if(data) { 521 DrvAllocation *allocDrv = (DrvAllocation *)data->mHal.drv; 522 ptr = allocDrv->lod[0].mallocPtr; 523 } 524 memcpy(destPtr, &ptr, sizeof(void *)); 525} 526 527void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 528 DrvScript *drv = (DrvScript *)script->mHal.drv; 529 //rsAssert(script->mFieldIsObject[slot]); 530 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 531 532 int32_t *destPtr = reinterpret_cast<int32_t *>( 533 drv->mExecutable->getExportVarAddrs()[slot]); 534 if (!destPtr) { 535 //ALOGV("Calling setVar on slot = %i which is null", slot); 536 return; 537 } 538 539 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 540} 541 542void rsdScriptDestroy(const Context *dc, Script *script) { 543 DrvScript *drv = (DrvScript *)script->mHal.drv; 544 545 if (drv == NULL) { 546 return; 547 } 548 549 if (drv->mExecutable) { 550 Vector<void *>::const_iterator var_addr_iter = 551 drv->mExecutable->getExportVarAddrs().begin(); 552 Vector<void *>::const_iterator var_addr_end = 553 drv->mExecutable->getExportVarAddrs().end(); 554 555 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = 556 drv->mExecutable->getInfo().getObjectSlots().begin(); 557 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = 558 drv->mExecutable->getInfo().getObjectSlots().end(); 559 560 while ((var_addr_iter != var_addr_end) && 561 (is_object_iter != is_object_end)) { 562 // The field address can be NULL if the script-side has optimized 563 // the corresponding global variable away. 564 ObjectBase **obj_addr = 565 reinterpret_cast<ObjectBase **>(*var_addr_iter); 566 if (*is_object_iter) { 567 if (*var_addr_iter != NULL) { 568 rsrClearObject(dc, script, obj_addr); 569 } 570 } 571 var_addr_iter++; 572 is_object_iter++; 573 } 574 } 575 576 delete drv->mCompilerContext; 577 delete drv->mCompilerDriver; 578 delete drv->mExecutable; 579 delete[] drv->mBoundAllocs; 580 free(drv); 581 script->mHal.drv = NULL; 582} 583 584Allocation * rsdScriptGetAllocationForPointer(const android::renderscript::Context *dc, 585 const android::renderscript::Script *sc, 586 const void *ptr) { 587 DrvScript *drv = (DrvScript *)sc->mHal.drv; 588 if (!ptr) { 589 return NULL; 590 } 591 592 for (uint32_t ct=0; ct < sc->mHal.info.exportedVariableCount; ct++) { 593 Allocation *a = drv->mBoundAllocs[ct]; 594 if (!a) continue; 595 DrvAllocation *adrv = (DrvAllocation *)a->mHal.drv; 596 if (adrv->lod[0].mallocPtr == ptr) { 597 return a; 598 } 599 } 600 ALOGE("rsGetAllocation, failed to find %p", ptr); 601 return NULL; 602} 603 604