rsdBcc.cpp revision 298691a87d1b30d1221e303a6788d5272c223971
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcinfo/MetadataExtractor.h> 22 23#include "rsContext.h" 24#include "rsElement.h" 25#include "rsScriptC.h" 26 27#include "utils/Timers.h" 28#include "utils/StopWatch.h" 29 30using namespace android; 31using namespace android::renderscript; 32 33struct DrvScript { 34 int (*mRoot)(); 35 int (*mRootExpand)(); 36 void (*mInit)(); 37 void (*mFreeChildren)(); 38 39 BCCScriptRef mBccScript; 40 41 bcinfo::MetadataExtractor *ME; 42 43 InvokeFunc_t *mInvokeFunctions; 44 ForEachFunc_t *mForEachFunctions; 45 void ** mFieldAddress; 46 bool * mFieldIsObject; 47 const uint32_t *mExportForEachSignatureList; 48 49 const uint8_t * mScriptText; 50 uint32_t mScriptTextLength; 51}; 52 53typedef void (*outer_foreach_t)( 54 const android::renderscript::RsForEachStubParamStruct *, 55 uint32_t x1, uint32_t x2, 56 uint32_t instep, uint32_t outstep); 57 58static Script * setTLS(Script *sc) { 59 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 60 rsAssert(tls); 61 Script *old = tls->mScript; 62 tls->mScript = sc; 63 return old; 64} 65 66 67bool rsdScriptInit(const Context *rsc, 68 ScriptC *script, 69 char const *resName, 70 char const *cacheDir, 71 uint8_t const *bitcode, 72 size_t bitcodeSize, 73 uint32_t flags) { 74 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 75 76 pthread_mutex_lock(&rsdgInitMutex); 77 78 size_t exportFuncCount = 0; 79 size_t exportVarCount = 0; 80 size_t objectSlotCount = 0; 81 size_t exportForEachSignatureCount = 0; 82 83 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 84 if (drv == NULL) { 85 goto error; 86 } 87 script->mHal.drv = drv; 88 89 drv->mBccScript = bccCreateScript(); 90 script->mHal.info.isThreadable = true; 91 drv->mScriptText = bitcode; 92 drv->mScriptTextLength = bitcodeSize; 93 94 95 drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText, 96 drv->mScriptTextLength); 97 if (!drv->ME->extract()) { 98 ALOGE("bcinfo: failed to read script metadata"); 99 goto error; 100 } 101 102 //ALOGE("mBccScript %p", script->mBccScript); 103 104 if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) { 105 ALOGE("bcc: FAILS to register symbol callback"); 106 goto error; 107 } 108 109 if (bccReadBC(drv->mBccScript, 110 resName, 111 (char const *)drv->mScriptText, 112 drv->mScriptTextLength, 0) != 0) { 113 ALOGE("bcc: FAILS to read bitcode"); 114 goto error; 115 } 116 117 if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) { 118 ALOGE("bcc: FAILS to link bitcode"); 119 goto error; 120 } 121 122 if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) { 123 ALOGE("bcc: FAILS to prepare executable"); 124 goto error; 125 } 126 127 drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root")); 128 drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand")); 129 drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init")); 130 drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor")); 131 132 exportFuncCount = drv->ME->getExportFuncCount(); 133 if (exportFuncCount > 0) { 134 drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount, 135 sizeof(InvokeFunc_t)); 136 bccGetExportFuncList(drv->mBccScript, exportFuncCount, 137 (void **) drv->mInvokeFunctions); 138 } else { 139 drv->mInvokeFunctions = NULL; 140 } 141 142 exportVarCount = drv->ME->getExportVarCount(); 143 if (exportVarCount > 0) { 144 drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*)); 145 drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool)); 146 bccGetExportVarList(drv->mBccScript, exportVarCount, 147 (void **) drv->mFieldAddress); 148 } else { 149 drv->mFieldAddress = NULL; 150 drv->mFieldIsObject = NULL; 151 } 152 153 objectSlotCount = drv->ME->getObjectSlotCount(); 154 if (objectSlotCount > 0) { 155 const uint32_t *objectSlotList = drv->ME->getObjectSlotList(); 156 for (uint32_t ct=0; ct < objectSlotCount; ct++) { 157 drv->mFieldIsObject[objectSlotList[ct]] = true; 158 } 159 } 160 161 exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount(); 162 drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList(); 163 if (exportForEachSignatureCount > 0) { 164 drv->mForEachFunctions = 165 (ForEachFunc_t*) calloc(exportForEachSignatureCount, 166 sizeof(ForEachFunc_t)); 167 bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount, 168 (void **) drv->mForEachFunctions); 169 } else { 170 drv->mForEachFunctions = NULL; 171 } 172 173 // Copy info over to runtime 174 script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount(); 175 script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount(); 176 script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount(); 177 script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList(); 178 script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList(); 179 180 if (drv->mRootExpand) { 181 script->mHal.info.root = drv->mRootExpand; 182 } else { 183 script->mHal.info.root = drv->mRoot; 184 } 185 186 pthread_mutex_unlock(&rsdgInitMutex); 187 return true; 188 189error: 190 191 pthread_mutex_unlock(&rsdgInitMutex); 192 if (drv->ME) { 193 delete drv->ME; 194 drv->ME = NULL; 195 } 196 free(drv); 197 return false; 198 199} 200 201typedef struct { 202 Context *rsc; 203 Script *script; 204 ForEachFunc_t kernel; 205 uint32_t sig; 206 const Allocation * ain; 207 Allocation * aout; 208 const void * usr; 209 size_t usrLen; 210 211 uint32_t mSliceSize; 212 volatile int mSliceNum; 213 214 const uint8_t *ptrIn; 215 uint32_t eStrideIn; 216 uint8_t *ptrOut; 217 uint32_t eStrideOut; 218 219 uint32_t yStrideIn; 220 uint32_t yStrideOut; 221 222 uint32_t xStart; 223 uint32_t xEnd; 224 uint32_t yStart; 225 uint32_t yEnd; 226 uint32_t zStart; 227 uint32_t zEnd; 228 uint32_t arrayStart; 229 uint32_t arrayEnd; 230 231 uint32_t dimX; 232 uint32_t dimY; 233 uint32_t dimZ; 234 uint32_t dimArray; 235} MTLaunchStruct; 236typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 237 238static void wc_xy(void *usr, uint32_t idx) { 239 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 240 RsForEachStubParamStruct p; 241 memset(&p, 0, sizeof(p)); 242 p.usr = mtls->usr; 243 p.usr_len = mtls->usrLen; 244 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 245 uint32_t sig = mtls->sig; 246 247 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 248 while (1) { 249 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 250 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 251 uint32_t yEnd = yStart + mtls->mSliceSize; 252 yEnd = rsMin(yEnd, mtls->yEnd); 253 if (yEnd <= yStart) { 254 return; 255 } 256 257 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 258 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 259 for (p.y = yStart; p.y < yEnd; p.y++) { 260 p.out = mtls->ptrOut + (mtls->yStrideOut * p.y); 261 p.in = mtls->ptrIn + (mtls->yStrideIn * p.y); 262 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 263 } 264 } 265} 266 267static void wc_x(void *usr, uint32_t idx) { 268 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 269 RsForEachStubParamStruct p; 270 memset(&p, 0, sizeof(p)); 271 p.usr = mtls->usr; 272 p.usr_len = mtls->usrLen; 273 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 274 uint32_t sig = mtls->sig; 275 276 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 277 while (1) { 278 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 279 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 280 uint32_t xEnd = xStart + mtls->mSliceSize; 281 xEnd = rsMin(xEnd, mtls->xEnd); 282 if (xEnd <= xStart) { 283 return; 284 } 285 286 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 287 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 288 289 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 290 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 291 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 292 } 293} 294 295void rsdScriptInvokeForEach(const Context *rsc, 296 Script *s, 297 uint32_t slot, 298 const Allocation * ain, 299 Allocation * aout, 300 const void * usr, 301 uint32_t usrLen, 302 const RsScriptCall *sc) { 303 304 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 305 306 MTLaunchStruct mtls; 307 memset(&mtls, 0, sizeof(mtls)); 308 309 DrvScript *drv = (DrvScript *)s->mHal.drv; 310 mtls.kernel = drv->mForEachFunctions[slot]; 311 rsAssert(mtls.kernel != NULL); 312 mtls.sig = 0x1f; // temp fix for old apps, full table in slang_rs_export_foreach.cpp 313 if (drv->mExportForEachSignatureList) { 314 mtls.sig = drv->mExportForEachSignatureList[slot]; 315 } 316 if (ain) { 317 mtls.dimX = ain->getType()->getDimX(); 318 mtls.dimY = ain->getType()->getDimY(); 319 mtls.dimZ = ain->getType()->getDimZ(); 320 //mtls.dimArray = ain->getType()->getDimArray(); 321 } else if (aout) { 322 mtls.dimX = aout->getType()->getDimX(); 323 mtls.dimY = aout->getType()->getDimY(); 324 mtls.dimZ = aout->getType()->getDimZ(); 325 //mtls.dimArray = aout->getType()->getDimArray(); 326 } else { 327 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 328 return; 329 } 330 331 if (!sc || (sc->xEnd == 0)) { 332 mtls.xEnd = mtls.dimX; 333 } else { 334 rsAssert(sc->xStart < mtls.dimX); 335 rsAssert(sc->xEnd <= mtls.dimX); 336 rsAssert(sc->xStart < sc->xEnd); 337 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 338 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 339 if (mtls.xStart >= mtls.xEnd) return; 340 } 341 342 if (!sc || (sc->yEnd == 0)) { 343 mtls.yEnd = mtls.dimY; 344 } else { 345 rsAssert(sc->yStart < mtls.dimY); 346 rsAssert(sc->yEnd <= mtls.dimY); 347 rsAssert(sc->yStart < sc->yEnd); 348 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 349 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 350 if (mtls.yStart >= mtls.yEnd) return; 351 } 352 353 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 354 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 355 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 356 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 357 358 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 359 360 Context *mrsc = (Context *)rsc; 361 Script * oldTLS = setTLS(s); 362 363 mtls.rsc = mrsc; 364 mtls.ain = ain; 365 mtls.aout = aout; 366 mtls.script = s; 367 mtls.usr = usr; 368 mtls.usrLen = usrLen; 369 mtls.mSliceSize = 10; 370 mtls.mSliceNum = 0; 371 372 mtls.ptrIn = NULL; 373 mtls.eStrideIn = 0; 374 if (ain) { 375 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 376 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 377 mtls.yStrideIn = ain->mHal.drvState.stride; 378 } 379 380 mtls.ptrOut = NULL; 381 mtls.eStrideOut = 0; 382 if (aout) { 383 mtls.ptrOut = (uint8_t *)aout->getPtr(); 384 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 385 mtls.yStrideOut = aout->mHal.drvState.stride; 386 } 387 388 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 389 if (mtls.dimY > 1) { 390 rsdLaunchThreads(mrsc, wc_xy, &mtls); 391 } else { 392 rsdLaunchThreads(mrsc, wc_x, &mtls); 393 } 394 395 //ALOGE("launch 1"); 396 } else { 397 RsForEachStubParamStruct p; 398 memset(&p, 0, sizeof(p)); 399 p.usr = mtls.usr; 400 p.usr_len = mtls.usrLen; 401 uint32_t sig = mtls.sig; 402 403 //ALOGE("launch 3"); 404 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 405 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 406 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 407 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 408 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 409 mtls.dimX * mtls.dimY * p.z + 410 mtls.dimX * p.y; 411 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 412 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 413 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 414 mtls.eStrideOut); 415 } 416 } 417 } 418 } 419 420 setTLS(oldTLS); 421} 422 423 424int rsdScriptInvokeRoot(const Context *dc, Script *script) { 425 DrvScript *drv = (DrvScript *)script->mHal.drv; 426 427 Script * oldTLS = setTLS(script); 428 int ret = drv->mRoot(); 429 setTLS(oldTLS); 430 431 return ret; 432} 433 434void rsdScriptInvokeInit(const Context *dc, Script *script) { 435 DrvScript *drv = (DrvScript *)script->mHal.drv; 436 437 if (drv->mInit) { 438 drv->mInit(); 439 } 440} 441 442void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 443 DrvScript *drv = (DrvScript *)script->mHal.drv; 444 445 if (drv->mFreeChildren) { 446 drv->mFreeChildren(); 447 } 448} 449 450void rsdScriptInvokeFunction(const Context *dc, Script *script, 451 uint32_t slot, 452 const void *params, 453 size_t paramLength) { 454 DrvScript *drv = (DrvScript *)script->mHal.drv; 455 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 456 457 Script * oldTLS = setTLS(script); 458 ((void (*)(const void *, uint32_t)) 459 drv->mInvokeFunctions[slot])(params, paramLength); 460 setTLS(oldTLS); 461} 462 463void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 464 uint32_t slot, void *data, size_t dataLength) { 465 DrvScript *drv = (DrvScript *)script->mHal.drv; 466 //rsAssert(!script->mFieldIsObject[slot]); 467 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 468 469 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 470 if (!destPtr) { 471 //ALOGV("Calling setVar on slot = %i which is null", slot); 472 return; 473 } 474 475 memcpy(destPtr, data, dataLength); 476} 477 478void rsdScriptSetGlobalVarWithElemDims( 479 const android::renderscript::Context *dc, 480 const android::renderscript::Script *script, 481 uint32_t slot, void *data, size_t dataLength, 482 const android::renderscript::Element *elem, 483 const size_t *dims, size_t dimLength) { 484 DrvScript *drv = (DrvScript *)script->mHal.drv; 485 486 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 487 if (!destPtr) { 488 //ALOGV("Calling setVar on slot = %i which is null", slot); 489 return; 490 } 491 492 // We want to look at dimension in terms of integer components, 493 // but dimLength is given in terms of bytes. 494 dimLength /= sizeof(int); 495 496 // Only a single dimension is currently supported. 497 rsAssert(dimLength == 1); 498 if (dimLength == 1) { 499 // First do the increment loop. 500 size_t stride = elem->getSizeBytes(); 501 char *cVal = reinterpret_cast<char *>(data); 502 for (size_t i = 0; i < dims[0]; i++) { 503 elem->incRefs(cVal); 504 cVal += stride; 505 } 506 507 // Decrement loop comes after (to prevent race conditions). 508 char *oldVal = reinterpret_cast<char *>(destPtr); 509 for (size_t i = 0; i < dims[0]; i++) { 510 elem->decRefs(oldVal); 511 oldVal += stride; 512 } 513 } 514 515 memcpy(destPtr, data, dataLength); 516} 517 518void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 519 DrvScript *drv = (DrvScript *)script->mHal.drv; 520 //rsAssert(!script->mFieldIsObject[slot]); 521 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 522 523 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 524 if (!destPtr) { 525 //ALOGV("Calling setVar on slot = %i which is null", slot); 526 return; 527 } 528 529 memcpy(destPtr, &data, sizeof(void *)); 530} 531 532void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 533 DrvScript *drv = (DrvScript *)script->mHal.drv; 534 //rsAssert(script->mFieldIsObject[slot]); 535 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 536 537 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 538 if (!destPtr) { 539 //ALOGV("Calling setVar on slot = %i which is null", slot); 540 return; 541 } 542 543 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 544} 545 546void rsdScriptDestroy(const Context *dc, Script *script) { 547 DrvScript *drv = (DrvScript *)script->mHal.drv; 548 549 if (drv->mFieldAddress) { 550 size_t exportVarCount = drv->ME->getExportVarCount(); 551 for (size_t ct = 0; ct < exportVarCount; ct++) { 552 if (drv->mFieldIsObject[ct]) { 553 // The field address can be NULL if the script-side has 554 // optimized the corresponding global variable away. 555 if (drv->mFieldAddress[ct]) { 556 rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]); 557 } 558 } 559 } 560 free(drv->mFieldAddress); 561 drv->mFieldAddress = NULL; 562 free(drv->mFieldIsObject); 563 drv->mFieldIsObject = NULL; } 564 565 if (drv->mInvokeFunctions) { 566 free(drv->mInvokeFunctions); 567 drv->mInvokeFunctions = NULL; 568 } 569 570 if (drv->mForEachFunctions) { 571 free(drv->mForEachFunctions); 572 drv->mForEachFunctions = NULL; 573 } 574 575 delete drv->ME; 576 drv->ME = NULL; 577 578 free(drv); 579 script->mHal.drv = NULL; 580 581} 582 583 584