rsdBcc.cpp revision ebee9480507562109314cca00753a09002a91e7d
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcinfo/MetadataExtractor.h> 22 23#include "rsContext.h" 24#include "rsElement.h" 25#include "rsScriptC.h" 26 27#include "utils/Timers.h" 28#include "utils/StopWatch.h" 29 30using namespace android; 31using namespace android::renderscript; 32 33struct DrvScript { 34 int (*mRoot)(); 35 int (*mRootExpand)(); 36 void (*mInit)(); 37 void (*mFreeChildren)(); 38 39 BCCScriptRef mBccScript; 40 41 bcinfo::MetadataExtractor *ME; 42 43 InvokeFunc_t *mInvokeFunctions; 44 ForEachFunc_t *mForEachFunctions; 45 void ** mFieldAddress; 46 bool * mFieldIsObject; 47 const uint32_t *mExportForEachSignatureList; 48 49 const uint8_t * mScriptText; 50 uint32_t mScriptTextLength; 51}; 52 53typedef void (*outer_foreach_t)( 54 const android::renderscript::RsForEachStubParamStruct *, 55 uint32_t x1, uint32_t x2, 56 uint32_t instep, uint32_t outstep); 57 58static Script * setTLS(Script *sc) { 59 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 60 rsAssert(tls); 61 Script *old = tls->mScript; 62 tls->mScript = sc; 63 return old; 64} 65 66 67bool rsdScriptInit(const Context *rsc, 68 ScriptC *script, 69 char const *resName, 70 char const *cacheDir, 71 uint8_t const *bitcode, 72 size_t bitcodeSize, 73 uint32_t flags) { 74 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 75 76 pthread_mutex_lock(&rsdgInitMutex); 77 78 size_t exportFuncCount = 0; 79 size_t exportVarCount = 0; 80 size_t objectSlotCount = 0; 81 size_t exportForEachSignatureCount = 0; 82 83 const char* coreLib = "/system/lib/libclcore.bc"; 84 bcinfo::RSFloatPrecision prec; 85 86 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 87 if (drv == NULL) { 88 goto error; 89 } 90 script->mHal.drv = drv; 91 92 drv->mBccScript = bccCreateScript(); 93 script->mHal.info.isThreadable = true; 94 drv->mScriptText = bitcode; 95 drv->mScriptTextLength = bitcodeSize; 96 97 98 drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText, 99 drv->mScriptTextLength); 100 if (!drv->ME->extract()) { 101 ALOGE("bcinfo: failed to read script metadata"); 102 goto error; 103 } 104 105 //ALOGE("mBccScript %p", script->mBccScript); 106 107 if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) { 108 ALOGE("bcc: FAILS to register symbol callback"); 109 goto error; 110 } 111 112 if (bccReadBC(drv->mBccScript, 113 resName, 114 (char const *)drv->mScriptText, 115 drv->mScriptTextLength, 0) != 0) { 116 ALOGE("bcc: FAILS to read bitcode"); 117 goto error; 118 } 119 120 // NEON-capable devices can use an accelerated math library for all 121 // reduced precision scripts. 122#if defined(ARCH_ARM_HAVE_NEON) 123 prec = drv->ME->getRSFloatPrecision(); 124 if (prec != bcinfo::RS_FP_Full) { 125 coreLib = "/system/lib/libclcore_neon.bc"; 126 } 127#endif 128 129 if (bccLinkFile(drv->mBccScript, coreLib, 0) != 0) { 130 ALOGE("bcc: FAILS to link bitcode"); 131 goto error; 132 } 133 134 if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) { 135 ALOGE("bcc: FAILS to prepare executable"); 136 goto error; 137 } 138 139 drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root")); 140 drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand")); 141 drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init")); 142 drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor")); 143 144 exportFuncCount = drv->ME->getExportFuncCount(); 145 if (exportFuncCount > 0) { 146 drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount, 147 sizeof(InvokeFunc_t)); 148 bccGetExportFuncList(drv->mBccScript, exportFuncCount, 149 (void **) drv->mInvokeFunctions); 150 } else { 151 drv->mInvokeFunctions = NULL; 152 } 153 154 exportVarCount = drv->ME->getExportVarCount(); 155 if (exportVarCount > 0) { 156 drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*)); 157 drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool)); 158 bccGetExportVarList(drv->mBccScript, exportVarCount, 159 (void **) drv->mFieldAddress); 160 } else { 161 drv->mFieldAddress = NULL; 162 drv->mFieldIsObject = NULL; 163 } 164 165 objectSlotCount = drv->ME->getObjectSlotCount(); 166 if (objectSlotCount > 0) { 167 const uint32_t *objectSlotList = drv->ME->getObjectSlotList(); 168 for (uint32_t ct=0; ct < objectSlotCount; ct++) { 169 drv->mFieldIsObject[objectSlotList[ct]] = true; 170 } 171 } 172 173 exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount(); 174 drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList(); 175 if (exportForEachSignatureCount > 0) { 176 drv->mForEachFunctions = 177 (ForEachFunc_t*) calloc(exportForEachSignatureCount, 178 sizeof(ForEachFunc_t)); 179 bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount, 180 (void **) drv->mForEachFunctions); 181 } else { 182 drv->mForEachFunctions = NULL; 183 } 184 185 // Copy info over to runtime 186 script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount(); 187 script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount(); 188 script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount(); 189 script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList(); 190 script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList(); 191 192 if (drv->mRootExpand) { 193 script->mHal.info.root = drv->mRootExpand; 194 } else { 195 script->mHal.info.root = drv->mRoot; 196 } 197 198 pthread_mutex_unlock(&rsdgInitMutex); 199 return true; 200 201error: 202 203 pthread_mutex_unlock(&rsdgInitMutex); 204 if (drv->ME) { 205 delete drv->ME; 206 drv->ME = NULL; 207 } 208 free(drv); 209 return false; 210 211} 212 213typedef struct { 214 Context *rsc; 215 Script *script; 216 ForEachFunc_t kernel; 217 uint32_t sig; 218 const Allocation * ain; 219 Allocation * aout; 220 const void * usr; 221 size_t usrLen; 222 223 uint32_t mSliceSize; 224 volatile int mSliceNum; 225 226 const uint8_t *ptrIn; 227 uint32_t eStrideIn; 228 uint8_t *ptrOut; 229 uint32_t eStrideOut; 230 231 uint32_t yStrideIn; 232 uint32_t yStrideOut; 233 234 uint32_t xStart; 235 uint32_t xEnd; 236 uint32_t yStart; 237 uint32_t yEnd; 238 uint32_t zStart; 239 uint32_t zEnd; 240 uint32_t arrayStart; 241 uint32_t arrayEnd; 242 243 uint32_t dimX; 244 uint32_t dimY; 245 uint32_t dimZ; 246 uint32_t dimArray; 247} MTLaunchStruct; 248typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 249 250static void wc_xy(void *usr, uint32_t idx) { 251 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 252 RsForEachStubParamStruct p; 253 memset(&p, 0, sizeof(p)); 254 p.usr = mtls->usr; 255 p.usr_len = mtls->usrLen; 256 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 257 uint32_t sig = mtls->sig; 258 259 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 260 while (1) { 261 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 262 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 263 uint32_t yEnd = yStart + mtls->mSliceSize; 264 yEnd = rsMin(yEnd, mtls->yEnd); 265 if (yEnd <= yStart) { 266 return; 267 } 268 269 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 270 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 271 for (p.y = yStart; p.y < yEnd; p.y++) { 272 p.out = mtls->ptrOut + (mtls->yStrideOut * p.y); 273 p.in = mtls->ptrIn + (mtls->yStrideIn * p.y); 274 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 275 } 276 } 277} 278 279static void wc_x(void *usr, uint32_t idx) { 280 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 281 RsForEachStubParamStruct p; 282 memset(&p, 0, sizeof(p)); 283 p.usr = mtls->usr; 284 p.usr_len = mtls->usrLen; 285 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 286 uint32_t sig = mtls->sig; 287 288 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 289 while (1) { 290 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 291 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 292 uint32_t xEnd = xStart + mtls->mSliceSize; 293 xEnd = rsMin(xEnd, mtls->xEnd); 294 if (xEnd <= xStart) { 295 return; 296 } 297 298 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 299 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 300 301 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 302 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 303 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 304 } 305} 306 307void rsdScriptInvokeForEach(const Context *rsc, 308 Script *s, 309 uint32_t slot, 310 const Allocation * ain, 311 Allocation * aout, 312 const void * usr, 313 uint32_t usrLen, 314 const RsScriptCall *sc) { 315 316 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 317 318 MTLaunchStruct mtls; 319 memset(&mtls, 0, sizeof(mtls)); 320 321 DrvScript *drv = (DrvScript *)s->mHal.drv; 322 mtls.kernel = drv->mForEachFunctions[slot]; 323 rsAssert(mtls.kernel != NULL); 324 mtls.sig = 0x1f; // temp fix for old apps, full table in slang_rs_export_foreach.cpp 325 if (drv->mExportForEachSignatureList) { 326 mtls.sig = drv->mExportForEachSignatureList[slot]; 327 } 328 if (ain) { 329 mtls.dimX = ain->getType()->getDimX(); 330 mtls.dimY = ain->getType()->getDimY(); 331 mtls.dimZ = ain->getType()->getDimZ(); 332 //mtls.dimArray = ain->getType()->getDimArray(); 333 } else if (aout) { 334 mtls.dimX = aout->getType()->getDimX(); 335 mtls.dimY = aout->getType()->getDimY(); 336 mtls.dimZ = aout->getType()->getDimZ(); 337 //mtls.dimArray = aout->getType()->getDimArray(); 338 } else { 339 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 340 return; 341 } 342 343 if (!sc || (sc->xEnd == 0)) { 344 mtls.xEnd = mtls.dimX; 345 } else { 346 rsAssert(sc->xStart < mtls.dimX); 347 rsAssert(sc->xEnd <= mtls.dimX); 348 rsAssert(sc->xStart < sc->xEnd); 349 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 350 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 351 if (mtls.xStart >= mtls.xEnd) return; 352 } 353 354 if (!sc || (sc->yEnd == 0)) { 355 mtls.yEnd = mtls.dimY; 356 } else { 357 rsAssert(sc->yStart < mtls.dimY); 358 rsAssert(sc->yEnd <= mtls.dimY); 359 rsAssert(sc->yStart < sc->yEnd); 360 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 361 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 362 if (mtls.yStart >= mtls.yEnd) return; 363 } 364 365 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 366 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 367 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 368 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 369 370 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 371 372 Context *mrsc = (Context *)rsc; 373 Script * oldTLS = setTLS(s); 374 375 mtls.rsc = mrsc; 376 mtls.ain = ain; 377 mtls.aout = aout; 378 mtls.script = s; 379 mtls.usr = usr; 380 mtls.usrLen = usrLen; 381 mtls.mSliceSize = 10; 382 mtls.mSliceNum = 0; 383 384 mtls.ptrIn = NULL; 385 mtls.eStrideIn = 0; 386 if (ain) { 387 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 388 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 389 mtls.yStrideIn = ain->mHal.drvState.stride; 390 } 391 392 mtls.ptrOut = NULL; 393 mtls.eStrideOut = 0; 394 if (aout) { 395 mtls.ptrOut = (uint8_t *)aout->getPtr(); 396 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 397 mtls.yStrideOut = aout->mHal.drvState.stride; 398 } 399 400 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 401 if (mtls.dimY > 1) { 402 rsdLaunchThreads(mrsc, wc_xy, &mtls); 403 } else { 404 rsdLaunchThreads(mrsc, wc_x, &mtls); 405 } 406 407 //ALOGE("launch 1"); 408 } else { 409 RsForEachStubParamStruct p; 410 memset(&p, 0, sizeof(p)); 411 p.usr = mtls.usr; 412 p.usr_len = mtls.usrLen; 413 uint32_t sig = mtls.sig; 414 415 //ALOGE("launch 3"); 416 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 417 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 418 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 419 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 420 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 421 mtls.dimX * mtls.dimY * p.z + 422 mtls.dimX * p.y; 423 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 424 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 425 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 426 mtls.eStrideOut); 427 } 428 } 429 } 430 } 431 432 setTLS(oldTLS); 433} 434 435 436int rsdScriptInvokeRoot(const Context *dc, Script *script) { 437 DrvScript *drv = (DrvScript *)script->mHal.drv; 438 439 Script * oldTLS = setTLS(script); 440 int ret = drv->mRoot(); 441 setTLS(oldTLS); 442 443 return ret; 444} 445 446void rsdScriptInvokeInit(const Context *dc, Script *script) { 447 DrvScript *drv = (DrvScript *)script->mHal.drv; 448 449 if (drv->mInit) { 450 drv->mInit(); 451 } 452} 453 454void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 455 DrvScript *drv = (DrvScript *)script->mHal.drv; 456 457 if (drv->mFreeChildren) { 458 drv->mFreeChildren(); 459 } 460} 461 462void rsdScriptInvokeFunction(const Context *dc, Script *script, 463 uint32_t slot, 464 const void *params, 465 size_t paramLength) { 466 DrvScript *drv = (DrvScript *)script->mHal.drv; 467 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 468 469 Script * oldTLS = setTLS(script); 470 ((void (*)(const void *, uint32_t)) 471 drv->mInvokeFunctions[slot])(params, paramLength); 472 setTLS(oldTLS); 473} 474 475void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 476 uint32_t slot, void *data, size_t dataLength) { 477 DrvScript *drv = (DrvScript *)script->mHal.drv; 478 //rsAssert(!script->mFieldIsObject[slot]); 479 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 480 481 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 482 if (!destPtr) { 483 //ALOGV("Calling setVar on slot = %i which is null", slot); 484 return; 485 } 486 487 memcpy(destPtr, data, dataLength); 488} 489 490void rsdScriptSetGlobalVarWithElemDims( 491 const android::renderscript::Context *dc, 492 const android::renderscript::Script *script, 493 uint32_t slot, void *data, size_t dataLength, 494 const android::renderscript::Element *elem, 495 const size_t *dims, size_t dimLength) { 496 DrvScript *drv = (DrvScript *)script->mHal.drv; 497 498 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 499 if (!destPtr) { 500 //ALOGV("Calling setVar on slot = %i which is null", slot); 501 return; 502 } 503 504 // We want to look at dimension in terms of integer components, 505 // but dimLength is given in terms of bytes. 506 dimLength /= sizeof(int); 507 508 // Only a single dimension is currently supported. 509 rsAssert(dimLength == 1); 510 if (dimLength == 1) { 511 // First do the increment loop. 512 size_t stride = elem->getSizeBytes(); 513 char *cVal = reinterpret_cast<char *>(data); 514 for (size_t i = 0; i < dims[0]; i++) { 515 elem->incRefs(cVal); 516 cVal += stride; 517 } 518 519 // Decrement loop comes after (to prevent race conditions). 520 char *oldVal = reinterpret_cast<char *>(destPtr); 521 for (size_t i = 0; i < dims[0]; i++) { 522 elem->decRefs(oldVal); 523 oldVal += stride; 524 } 525 } 526 527 memcpy(destPtr, data, dataLength); 528} 529 530void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 531 DrvScript *drv = (DrvScript *)script->mHal.drv; 532 //rsAssert(!script->mFieldIsObject[slot]); 533 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 534 535 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 536 if (!destPtr) { 537 //ALOGV("Calling setVar on slot = %i which is null", slot); 538 return; 539 } 540 541 memcpy(destPtr, &data, sizeof(void *)); 542} 543 544void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 545 DrvScript *drv = (DrvScript *)script->mHal.drv; 546 //rsAssert(script->mFieldIsObject[slot]); 547 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 548 549 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 550 if (!destPtr) { 551 //ALOGV("Calling setVar on slot = %i which is null", slot); 552 return; 553 } 554 555 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 556} 557 558void rsdScriptDestroy(const Context *dc, Script *script) { 559 DrvScript *drv = (DrvScript *)script->mHal.drv; 560 561 if (drv->mFieldAddress) { 562 size_t exportVarCount = drv->ME->getExportVarCount(); 563 for (size_t ct = 0; ct < exportVarCount; ct++) { 564 if (drv->mFieldIsObject[ct]) { 565 // The field address can be NULL if the script-side has 566 // optimized the corresponding global variable away. 567 if (drv->mFieldAddress[ct]) { 568 rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]); 569 } 570 } 571 } 572 free(drv->mFieldAddress); 573 drv->mFieldAddress = NULL; 574 free(drv->mFieldIsObject); 575 drv->mFieldIsObject = NULL; } 576 577 if (drv->mInvokeFunctions) { 578 free(drv->mInvokeFunctions); 579 drv->mInvokeFunctions = NULL; 580 } 581 582 if (drv->mForEachFunctions) { 583 free(drv->mForEachFunctions); 584 drv->mForEachFunctions = NULL; 585 } 586 587 delete drv->ME; 588 drv->ME = NULL; 589 590 free(drv); 591 script->mHal.drv = NULL; 592 593} 594 595 596