rsdBcc.cpp revision 0ab9f9f6b1fb31cda536ae4aeaed258f78ee1447
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcinfo/MetadataExtractor.h> 22 23#include "rsContext.h" 24#include "rsElement.h" 25#include "rsScriptC.h" 26 27#include "utils/Timers.h" 28#include "utils/StopWatch.h" 29 30using namespace android; 31using namespace android::renderscript; 32 33struct DrvScript { 34 int (*mRoot)(); 35 int (*mRootExpand)(); 36 void (*mInit)(); 37 void (*mFreeChildren)(); 38 39 BCCScriptRef mBccScript; 40 41 bcinfo::MetadataExtractor *ME; 42 43 InvokeFunc_t *mInvokeFunctions; 44 ForEachFunc_t *mForEachFunctions; 45 void ** mFieldAddress; 46 bool * mFieldIsObject; 47 const uint32_t *mExportForEachSignatureList; 48 49 const uint8_t * mScriptText; 50 uint32_t mScriptTextLength; 51}; 52 53typedef void (*outer_foreach_t)( 54 const android::renderscript::RsForEachStubParamStruct *, 55 uint32_t x1, uint32_t x2, 56 uint32_t instep, uint32_t outstep); 57 58static Script * setTLS(Script *sc) { 59 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 60 rsAssert(tls); 61 Script *old = tls->mScript; 62 tls->mScript = sc; 63 return old; 64} 65 66 67bool rsdScriptInit(const Context *rsc, 68 ScriptC *script, 69 char const *resName, 70 char const *cacheDir, 71 uint8_t const *bitcode, 72 size_t bitcodeSize, 73 uint32_t flags) { 74 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 75 //ALOGE("rsdScriptInit %p %p", rsc, script); 76 77 pthread_mutex_lock(&rsdgInitMutex); 78 79 size_t exportFuncCount = 0; 80 size_t exportVarCount = 0; 81 size_t objectSlotCount = 0; 82 size_t exportForEachSignatureCount = 0; 83 84 const char* coreLib = "/system/lib/libclcore.bc"; 85 bcinfo::RSFloatPrecision prec; 86 87 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 88 if (drv == NULL) { 89 goto error; 90 } 91 script->mHal.drv = drv; 92 93 drv->mBccScript = bccCreateScript(); 94 script->mHal.info.isThreadable = true; 95 drv->mScriptText = bitcode; 96 drv->mScriptTextLength = bitcodeSize; 97 98 99 drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText, 100 drv->mScriptTextLength); 101 if (!drv->ME->extract()) { 102 ALOGE("bcinfo: failed to read script metadata"); 103 goto error; 104 } 105 106 //ALOGE("mBccScript %p", script->mBccScript); 107 108 if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) { 109 ALOGE("bcc: FAILS to register symbol callback"); 110 goto error; 111 } 112 113 if (bccReadBC(drv->mBccScript, 114 resName, 115 (char const *)drv->mScriptText, 116 drv->mScriptTextLength, 0) != 0) { 117 ALOGE("bcc: FAILS to read bitcode"); 118 goto error; 119 } 120 121 // NEON-capable devices can use an accelerated math library for all 122 // reduced precision scripts. 123#if defined(ARCH_ARM_HAVE_NEON) 124 prec = drv->ME->getRSFloatPrecision(); 125 if (prec != bcinfo::RS_FP_Full) { 126 coreLib = "/system/lib/libclcore_neon.bc"; 127 } 128#endif 129 130 if (bccLinkFile(drv->mBccScript, coreLib, 0) != 0) { 131 ALOGE("bcc: FAILS to link bitcode"); 132 goto error; 133 } 134 135 if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) { 136 ALOGE("bcc: FAILS to prepare executable"); 137 goto error; 138 } 139 140 drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root")); 141 drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand")); 142 drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init")); 143 drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor")); 144 145 exportFuncCount = drv->ME->getExportFuncCount(); 146 if (exportFuncCount > 0) { 147 drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount, 148 sizeof(InvokeFunc_t)); 149 bccGetExportFuncList(drv->mBccScript, exportFuncCount, 150 (void **) drv->mInvokeFunctions); 151 } else { 152 drv->mInvokeFunctions = NULL; 153 } 154 155 exportVarCount = drv->ME->getExportVarCount(); 156 if (exportVarCount > 0) { 157 drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*)); 158 drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool)); 159 bccGetExportVarList(drv->mBccScript, exportVarCount, 160 (void **) drv->mFieldAddress); 161 } else { 162 drv->mFieldAddress = NULL; 163 drv->mFieldIsObject = NULL; 164 } 165 166 objectSlotCount = drv->ME->getObjectSlotCount(); 167 if (objectSlotCount > 0) { 168 const uint32_t *objectSlotList = drv->ME->getObjectSlotList(); 169 for (uint32_t ct=0; ct < objectSlotCount; ct++) { 170 drv->mFieldIsObject[objectSlotList[ct]] = true; 171 } 172 } 173 174 exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount(); 175 drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList(); 176 if (exportForEachSignatureCount > 0) { 177 drv->mForEachFunctions = 178 (ForEachFunc_t*) calloc(exportForEachSignatureCount, 179 sizeof(ForEachFunc_t)); 180 bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount, 181 (void **) drv->mForEachFunctions); 182 } else { 183 drv->mForEachFunctions = NULL; 184 } 185 186 // Copy info over to runtime 187 script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount(); 188 script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount(); 189 script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount(); 190 script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList(); 191 script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList(); 192 193 if (drv->mRootExpand) { 194 script->mHal.info.root = drv->mRootExpand; 195 } else { 196 script->mHal.info.root = drv->mRoot; 197 } 198 199 pthread_mutex_unlock(&rsdgInitMutex); 200 return true; 201 202error: 203 204 pthread_mutex_unlock(&rsdgInitMutex); 205 if (drv->ME) { 206 delete drv->ME; 207 drv->ME = NULL; 208 } 209 free(drv); 210 return false; 211 212} 213 214typedef struct { 215 Context *rsc; 216 Script *script; 217 ForEachFunc_t kernel; 218 uint32_t sig; 219 const Allocation * ain; 220 Allocation * aout; 221 const void * usr; 222 size_t usrLen; 223 224 uint32_t mSliceSize; 225 volatile int mSliceNum; 226 227 const uint8_t *ptrIn; 228 uint32_t eStrideIn; 229 uint8_t *ptrOut; 230 uint32_t eStrideOut; 231 232 uint32_t yStrideIn; 233 uint32_t yStrideOut; 234 235 uint32_t xStart; 236 uint32_t xEnd; 237 uint32_t yStart; 238 uint32_t yEnd; 239 uint32_t zStart; 240 uint32_t zEnd; 241 uint32_t arrayStart; 242 uint32_t arrayEnd; 243 244 uint32_t dimX; 245 uint32_t dimY; 246 uint32_t dimZ; 247 uint32_t dimArray; 248} MTLaunchStruct; 249typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 250 251static void wc_xy(void *usr, uint32_t idx) { 252 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 253 RsForEachStubParamStruct p; 254 memset(&p, 0, sizeof(p)); 255 p.usr = mtls->usr; 256 p.usr_len = mtls->usrLen; 257 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 258 uint32_t sig = mtls->sig; 259 260 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 261 while (1) { 262 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 263 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 264 uint32_t yEnd = yStart + mtls->mSliceSize; 265 yEnd = rsMin(yEnd, mtls->yEnd); 266 if (yEnd <= yStart) { 267 return; 268 } 269 270 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 271 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 272 for (p.y = yStart; p.y < yEnd; p.y++) { 273 p.out = mtls->ptrOut + (mtls->yStrideOut * p.y); 274 p.in = mtls->ptrIn + (mtls->yStrideIn * p.y); 275 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 276 } 277 } 278} 279 280static void wc_x(void *usr, uint32_t idx) { 281 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 282 RsForEachStubParamStruct p; 283 memset(&p, 0, sizeof(p)); 284 p.usr = mtls->usr; 285 p.usr_len = mtls->usrLen; 286 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 287 uint32_t sig = mtls->sig; 288 289 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 290 while (1) { 291 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 292 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 293 uint32_t xEnd = xStart + mtls->mSliceSize; 294 xEnd = rsMin(xEnd, mtls->xEnd); 295 if (xEnd <= xStart) { 296 return; 297 } 298 299 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 300 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 301 302 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 303 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 304 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 305 } 306} 307 308void rsdScriptInvokeForEach(const Context *rsc, 309 Script *s, 310 uint32_t slot, 311 const Allocation * ain, 312 Allocation * aout, 313 const void * usr, 314 uint32_t usrLen, 315 const RsScriptCall *sc) { 316 317 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 318 319 MTLaunchStruct mtls; 320 memset(&mtls, 0, sizeof(mtls)); 321 322 //ALOGE("for each script %p in %p out %p", s, ain, aout); 323 324 DrvScript *drv = (DrvScript *)s->mHal.drv; 325 mtls.kernel = drv->mForEachFunctions[slot]; 326 rsAssert(mtls.kernel != NULL); 327 mtls.sig = 0x1f; // temp fix for old apps, full table in slang_rs_export_foreach.cpp 328 if (drv->mExportForEachSignatureList) { 329 mtls.sig = drv->mExportForEachSignatureList[slot]; 330 } 331 if (ain) { 332 mtls.dimX = ain->getType()->getDimX(); 333 mtls.dimY = ain->getType()->getDimY(); 334 mtls.dimZ = ain->getType()->getDimZ(); 335 //mtls.dimArray = ain->getType()->getDimArray(); 336 } else if (aout) { 337 mtls.dimX = aout->getType()->getDimX(); 338 mtls.dimY = aout->getType()->getDimY(); 339 mtls.dimZ = aout->getType()->getDimZ(); 340 //mtls.dimArray = aout->getType()->getDimArray(); 341 } else { 342 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 343 return; 344 } 345 346 if (!sc || (sc->xEnd == 0)) { 347 mtls.xEnd = mtls.dimX; 348 } else { 349 rsAssert(sc->xStart < mtls.dimX); 350 rsAssert(sc->xEnd <= mtls.dimX); 351 rsAssert(sc->xStart < sc->xEnd); 352 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 353 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 354 if (mtls.xStart >= mtls.xEnd) return; 355 } 356 357 if (!sc || (sc->yEnd == 0)) { 358 mtls.yEnd = mtls.dimY; 359 } else { 360 rsAssert(sc->yStart < mtls.dimY); 361 rsAssert(sc->yEnd <= mtls.dimY); 362 rsAssert(sc->yStart < sc->yEnd); 363 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 364 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 365 if (mtls.yStart >= mtls.yEnd) return; 366 } 367 368 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 369 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 370 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 371 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 372 373 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 374 375 Context *mrsc = (Context *)rsc; 376 Script * oldTLS = setTLS(s); 377 378 mtls.rsc = mrsc; 379 mtls.ain = ain; 380 mtls.aout = aout; 381 mtls.script = s; 382 mtls.usr = usr; 383 mtls.usrLen = usrLen; 384 mtls.mSliceSize = 10; 385 mtls.mSliceNum = 0; 386 387 mtls.ptrIn = NULL; 388 mtls.eStrideIn = 0; 389 if (ain) { 390 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 391 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 392 mtls.yStrideIn = ain->mHal.drvState.stride; 393 } 394 395 mtls.ptrOut = NULL; 396 mtls.eStrideOut = 0; 397 if (aout) { 398 mtls.ptrOut = (uint8_t *)aout->getPtr(); 399 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 400 mtls.yStrideOut = aout->mHal.drvState.stride; 401 } 402 403 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) { 404 dc->mInForEach = true; 405 if (mtls.dimY > 1) { 406 mtls.mSliceSize = mtls.dimY / (dc->mWorkers.mCount * 4); 407 if(mtls.mSliceSize < 1) { 408 mtls.mSliceSize = 1; 409 } 410 411 rsdLaunchThreads(mrsc, wc_xy, &mtls); 412 } else { 413 mtls.mSliceSize = mtls.dimX / (dc->mWorkers.mCount * 4); 414 if(mtls.mSliceSize < 1) { 415 mtls.mSliceSize = 1; 416 } 417 418 rsdLaunchThreads(mrsc, wc_x, &mtls); 419 } 420 dc->mInForEach = false; 421 422 //ALOGE("launch 1"); 423 } else { 424 RsForEachStubParamStruct p; 425 memset(&p, 0, sizeof(p)); 426 p.usr = mtls.usr; 427 p.usr_len = mtls.usrLen; 428 uint32_t sig = mtls.sig; 429 430 //ALOGE("launch 3"); 431 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 432 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 433 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 434 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 435 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 436 mtls.dimX * mtls.dimY * p.z + 437 mtls.dimX * p.y; 438 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 439 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 440 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 441 mtls.eStrideOut); 442 } 443 } 444 } 445 } 446 447 setTLS(oldTLS); 448} 449 450 451int rsdScriptInvokeRoot(const Context *dc, Script *script) { 452 DrvScript *drv = (DrvScript *)script->mHal.drv; 453 454 Script * oldTLS = setTLS(script); 455 int ret = drv->mRoot(); 456 setTLS(oldTLS); 457 458 return ret; 459} 460 461void rsdScriptInvokeInit(const Context *dc, Script *script) { 462 DrvScript *drv = (DrvScript *)script->mHal.drv; 463 464 if (drv->mInit) { 465 drv->mInit(); 466 } 467} 468 469void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 470 DrvScript *drv = (DrvScript *)script->mHal.drv; 471 472 if (drv->mFreeChildren) { 473 drv->mFreeChildren(); 474 } 475} 476 477void rsdScriptInvokeFunction(const Context *dc, Script *script, 478 uint32_t slot, 479 const void *params, 480 size_t paramLength) { 481 DrvScript *drv = (DrvScript *)script->mHal.drv; 482 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 483 484 Script * oldTLS = setTLS(script); 485 ((void (*)(const void *, uint32_t)) 486 drv->mInvokeFunctions[slot])(params, paramLength); 487 setTLS(oldTLS); 488} 489 490void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 491 uint32_t slot, void *data, size_t dataLength) { 492 DrvScript *drv = (DrvScript *)script->mHal.drv; 493 //rsAssert(!script->mFieldIsObject[slot]); 494 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 495 496 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 497 if (!destPtr) { 498 //ALOGV("Calling setVar on slot = %i which is null", slot); 499 return; 500 } 501 502 memcpy(destPtr, data, dataLength); 503} 504 505void rsdScriptSetGlobalVarWithElemDims( 506 const android::renderscript::Context *dc, 507 const android::renderscript::Script *script, 508 uint32_t slot, void *data, size_t dataLength, 509 const android::renderscript::Element *elem, 510 const size_t *dims, size_t dimLength) { 511 DrvScript *drv = (DrvScript *)script->mHal.drv; 512 513 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 514 if (!destPtr) { 515 //ALOGV("Calling setVar on slot = %i which is null", slot); 516 return; 517 } 518 519 // We want to look at dimension in terms of integer components, 520 // but dimLength is given in terms of bytes. 521 dimLength /= sizeof(int); 522 523 // Only a single dimension is currently supported. 524 rsAssert(dimLength == 1); 525 if (dimLength == 1) { 526 // First do the increment loop. 527 size_t stride = elem->getSizeBytes(); 528 char *cVal = reinterpret_cast<char *>(data); 529 for (size_t i = 0; i < dims[0]; i++) { 530 elem->incRefs(cVal); 531 cVal += stride; 532 } 533 534 // Decrement loop comes after (to prevent race conditions). 535 char *oldVal = reinterpret_cast<char *>(destPtr); 536 for (size_t i = 0; i < dims[0]; i++) { 537 elem->decRefs(oldVal); 538 oldVal += stride; 539 } 540 } 541 542 memcpy(destPtr, data, dataLength); 543} 544 545void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 546 DrvScript *drv = (DrvScript *)script->mHal.drv; 547 //rsAssert(!script->mFieldIsObject[slot]); 548 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 549 550 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 551 if (!destPtr) { 552 //ALOGV("Calling setVar on slot = %i which is null", slot); 553 return; 554 } 555 556 memcpy(destPtr, &data, sizeof(void *)); 557} 558 559void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 560 DrvScript *drv = (DrvScript *)script->mHal.drv; 561 //rsAssert(script->mFieldIsObject[slot]); 562 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 563 564 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 565 if (!destPtr) { 566 //ALOGV("Calling setVar on slot = %i which is null", slot); 567 return; 568 } 569 570 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 571} 572 573void rsdScriptDestroy(const Context *dc, Script *script) { 574 DrvScript *drv = (DrvScript *)script->mHal.drv; 575 576 if (drv->mFieldAddress) { 577 size_t exportVarCount = drv->ME->getExportVarCount(); 578 for (size_t ct = 0; ct < exportVarCount; ct++) { 579 if (drv->mFieldIsObject[ct]) { 580 // The field address can be NULL if the script-side has 581 // optimized the corresponding global variable away. 582 if (drv->mFieldAddress[ct]) { 583 rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]); 584 } 585 } 586 } 587 free(drv->mFieldAddress); 588 drv->mFieldAddress = NULL; 589 free(drv->mFieldIsObject); 590 drv->mFieldIsObject = NULL; } 591 592 if (drv->mInvokeFunctions) { 593 free(drv->mInvokeFunctions); 594 drv->mInvokeFunctions = NULL; 595 } 596 597 if (drv->mForEachFunctions) { 598 free(drv->mForEachFunctions); 599 drv->mForEachFunctions = NULL; 600 } 601 602 delete drv->ME; 603 drv->ME = NULL; 604 605 free(drv); 606 script->mHal.drv = NULL; 607 608} 609 610 611