rsdBcc.cpp revision 586e2a41144dd520390b04897936b694647a1330
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcc/BCCContext.h> 22#include <bcc/RenderScript/RSCompilerDriver.h> 23#include <bcc/RenderScript/RSExecutable.h> 24#include <bcc/RenderScript/RSInfo.h> 25 26#include "rsContext.h" 27#include "rsElement.h" 28#include "rsScriptC.h" 29 30#include "utils/Vector.h" 31#include "utils/Timers.h" 32#include "utils/StopWatch.h" 33 34using namespace android; 35using namespace android::renderscript; 36 37struct DrvScript { 38 int (*mRoot)(); 39 int (*mRootExpand)(); 40 void (*mInit)(); 41 void (*mFreeChildren)(); 42 43 bcc::BCCContext *mCompilerContext; 44 bcc::RSCompilerDriver *mCompilerDriver; 45 bcc::RSExecutable *mExecutable; 46}; 47 48typedef void (*outer_foreach_t)( 49 const android::renderscript::RsForEachStubParamStruct *, 50 uint32_t x1, uint32_t x2, 51 uint32_t instep, uint32_t outstep); 52 53static Script * setTLS(Script *sc) { 54 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 55 rsAssert(tls); 56 Script *old = tls->mScript; 57 tls->mScript = sc; 58 return old; 59} 60 61 62bool rsdScriptInit(const Context *rsc, 63 ScriptC *script, 64 char const *resName, 65 char const *cacheDir, 66 uint8_t const *bitcode, 67 size_t bitcodeSize, 68 uint32_t flags) { 69 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 70 //ALOGE("rsdScriptInit %p %p", rsc, script); 71 72 pthread_mutex_lock(&rsdgInitMutex); 73 74 const char* coreLib = "/system/lib/libclcore.bc"; 75 bcc::RSInfo::FloatPrecision prec; 76 bcc::RSExecutable *exec; 77 const bcc::RSInfo *info; 78 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 79 if (drv == NULL) { 80 goto error; 81 } 82 script->mHal.drv = drv; 83 84 drv->mCompilerContext = NULL; 85 drv->mCompilerDriver = NULL; 86 drv->mExecutable = NULL; 87 88 drv->mCompilerContext = new bcc::BCCContext(); 89 if (drv->mCompilerContext == NULL) { 90 ALOGE("bcc: FAILS to create compiler context (out of memory)"); 91 goto error; 92 } 93 94 drv->mCompilerDriver = new bcc::RSCompilerDriver(); 95 if (drv->mCompilerDriver == NULL) { 96 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 97 goto error; 98 } 99 100 script->mHal.info.isThreadable = true; 101 102 drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub); 103 drv->mCompilerDriver->setRSRuntimeLookupContext(script); 104 105 exec = drv->mCompilerDriver->build(*drv->mCompilerContext, 106 cacheDir, resName, 107 (const char *)bitcode, bitcodeSize); 108 109 if (exec == NULL) { 110 ALOGE("bcc: FAILS to prepare executable for '%s'", resName); 111 goto error; 112 } 113 114 drv->mExecutable = exec; 115 116 exec->setThreadable(script->mHal.info.isThreadable); 117 if (!exec->syncInfo()) { 118 ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); 119 } 120 121 drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root")); 122 drv->mRootExpand = 123 reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand")); 124 drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init")); 125 drv->mFreeChildren = 126 reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor")); 127 128 info = &drv->mExecutable->getInfo(); 129 // Copy info over to runtime 130 script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size(); 131 script->mHal.info.exportedVariableCount = info->getExportVarNames().size(); 132 script->mHal.info.exportedPragmaCount = info->getPragmas().size(); 133 script->mHal.info.exportedPragmaKeyList = 134 const_cast<const char**>(exec->getPragmaKeys().array()); 135 script->mHal.info.exportedPragmaValueList = 136 const_cast<const char**>(exec->getPragmaValues().array()); 137 138 if (drv->mRootExpand) { 139 script->mHal.info.root = drv->mRootExpand; 140 } else { 141 script->mHal.info.root = drv->mRoot; 142 } 143 144 pthread_mutex_unlock(&rsdgInitMutex); 145 return true; 146 147error: 148 149 pthread_mutex_unlock(&rsdgInitMutex); 150 if (drv) { 151 delete drv->mCompilerContext; 152 delete drv->mCompilerDriver; 153 delete drv->mExecutable; 154 free(drv); 155 } 156 script->mHal.drv = NULL; 157 return false; 158 159} 160 161typedef struct { 162 Context *rsc; 163 Script *script; 164 ForEachFunc_t kernel; 165 uint32_t sig; 166 const Allocation * ain; 167 Allocation * aout; 168 const void * usr; 169 size_t usrLen; 170 171 uint32_t mSliceSize; 172 volatile int mSliceNum; 173 174 const uint8_t *ptrIn; 175 uint32_t eStrideIn; 176 uint8_t *ptrOut; 177 uint32_t eStrideOut; 178 179 uint32_t yStrideIn; 180 uint32_t yStrideOut; 181 182 uint32_t xStart; 183 uint32_t xEnd; 184 uint32_t yStart; 185 uint32_t yEnd; 186 uint32_t zStart; 187 uint32_t zEnd; 188 uint32_t arrayStart; 189 uint32_t arrayEnd; 190 191 uint32_t dimX; 192 uint32_t dimY; 193 uint32_t dimZ; 194 uint32_t dimArray; 195} MTLaunchStruct; 196typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 197 198static void wc_xy(void *usr, uint32_t idx) { 199 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 200 RsForEachStubParamStruct p; 201 memset(&p, 0, sizeof(p)); 202 p.usr = mtls->usr; 203 p.usr_len = mtls->usrLen; 204 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 205 uint32_t sig = mtls->sig; 206 207 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 208 while (1) { 209 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 210 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 211 uint32_t yEnd = yStart + mtls->mSliceSize; 212 yEnd = rsMin(yEnd, mtls->yEnd); 213 if (yEnd <= yStart) { 214 return; 215 } 216 217 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 218 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 219 for (p.y = yStart; p.y < yEnd; p.y++) { 220 p.out = mtls->ptrOut + (mtls->yStrideOut * p.y); 221 p.in = mtls->ptrIn + (mtls->yStrideIn * p.y); 222 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 223 } 224 } 225} 226 227static void wc_x(void *usr, uint32_t idx) { 228 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 229 RsForEachStubParamStruct p; 230 memset(&p, 0, sizeof(p)); 231 p.usr = mtls->usr; 232 p.usr_len = mtls->usrLen; 233 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 234 uint32_t sig = mtls->sig; 235 236 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 237 while (1) { 238 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 239 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 240 uint32_t xEnd = xStart + mtls->mSliceSize; 241 xEnd = rsMin(xEnd, mtls->xEnd); 242 if (xEnd <= xStart) { 243 return; 244 } 245 246 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 247 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 248 249 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 250 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 251 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 252 } 253} 254 255void rsdScriptInvokeForEach(const Context *rsc, 256 Script *s, 257 uint32_t slot, 258 const Allocation * ain, 259 Allocation * aout, 260 const void * usr, 261 uint32_t usrLen, 262 const RsScriptCall *sc) { 263 264 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 265 266 MTLaunchStruct mtls; 267 memset(&mtls, 0, sizeof(mtls)); 268 269 //ALOGE("for each script %p in %p out %p", s, ain, aout); 270 271 DrvScript *drv = (DrvScript *)s->mHal.drv; 272 rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size()); 273 mtls.kernel = reinterpret_cast<ForEachFunc_t>( 274 drv->mExecutable->getExportForeachFuncAddrs()[slot]); 275 rsAssert(mtls.kernel != NULL); 276 mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second; 277 278 if (ain) { 279 mtls.dimX = ain->getType()->getDimX(); 280 mtls.dimY = ain->getType()->getDimY(); 281 mtls.dimZ = ain->getType()->getDimZ(); 282 //mtls.dimArray = ain->getType()->getDimArray(); 283 } else if (aout) { 284 mtls.dimX = aout->getType()->getDimX(); 285 mtls.dimY = aout->getType()->getDimY(); 286 mtls.dimZ = aout->getType()->getDimZ(); 287 //mtls.dimArray = aout->getType()->getDimArray(); 288 } else { 289 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 290 return; 291 } 292 293 if (!sc || (sc->xEnd == 0)) { 294 mtls.xEnd = mtls.dimX; 295 } else { 296 rsAssert(sc->xStart < mtls.dimX); 297 rsAssert(sc->xEnd <= mtls.dimX); 298 rsAssert(sc->xStart < sc->xEnd); 299 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 300 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 301 if (mtls.xStart >= mtls.xEnd) return; 302 } 303 304 if (!sc || (sc->yEnd == 0)) { 305 mtls.yEnd = mtls.dimY; 306 } else { 307 rsAssert(sc->yStart < mtls.dimY); 308 rsAssert(sc->yEnd <= mtls.dimY); 309 rsAssert(sc->yStart < sc->yEnd); 310 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 311 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 312 if (mtls.yStart >= mtls.yEnd) return; 313 } 314 315 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 316 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 317 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 318 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 319 320 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 321 322 Context *mrsc = (Context *)rsc; 323 Script * oldTLS = setTLS(s); 324 325 mtls.rsc = mrsc; 326 mtls.ain = ain; 327 mtls.aout = aout; 328 mtls.script = s; 329 mtls.usr = usr; 330 mtls.usrLen = usrLen; 331 mtls.mSliceSize = 10; 332 mtls.mSliceNum = 0; 333 334 mtls.ptrIn = NULL; 335 mtls.eStrideIn = 0; 336 if (ain) { 337 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 338 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 339 mtls.yStrideIn = ain->mHal.drvState.stride; 340 } 341 342 mtls.ptrOut = NULL; 343 mtls.eStrideOut = 0; 344 if (aout) { 345 mtls.ptrOut = (uint8_t *)aout->getPtr(); 346 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 347 mtls.yStrideOut = aout->mHal.drvState.stride; 348 } 349 350 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) { 351 dc->mInForEach = true; 352 if (mtls.dimY > 1) { 353 mtls.mSliceSize = mtls.dimY / (dc->mWorkers.mCount * 4); 354 if(mtls.mSliceSize < 1) { 355 mtls.mSliceSize = 1; 356 } 357 358 rsdLaunchThreads(mrsc, wc_xy, &mtls); 359 } else { 360 mtls.mSliceSize = mtls.dimX / (dc->mWorkers.mCount * 4); 361 if(mtls.mSliceSize < 1) { 362 mtls.mSliceSize = 1; 363 } 364 365 rsdLaunchThreads(mrsc, wc_x, &mtls); 366 } 367 dc->mInForEach = false; 368 369 //ALOGE("launch 1"); 370 } else { 371 RsForEachStubParamStruct p; 372 memset(&p, 0, sizeof(p)); 373 p.usr = mtls.usr; 374 p.usr_len = mtls.usrLen; 375 uint32_t sig = mtls.sig; 376 377 //ALOGE("launch 3"); 378 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 379 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 380 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 381 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 382 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 383 mtls.dimX * mtls.dimY * p.z + 384 mtls.dimX * p.y; 385 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 386 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 387 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 388 mtls.eStrideOut); 389 } 390 } 391 } 392 } 393 394 setTLS(oldTLS); 395} 396 397 398int rsdScriptInvokeRoot(const Context *dc, Script *script) { 399 DrvScript *drv = (DrvScript *)script->mHal.drv; 400 401 Script * oldTLS = setTLS(script); 402 int ret = drv->mRoot(); 403 setTLS(oldTLS); 404 405 return ret; 406} 407 408void rsdScriptInvokeInit(const Context *dc, Script *script) { 409 DrvScript *drv = (DrvScript *)script->mHal.drv; 410 411 if (drv->mInit) { 412 drv->mInit(); 413 } 414} 415 416void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 417 DrvScript *drv = (DrvScript *)script->mHal.drv; 418 419 if (drv->mFreeChildren) { 420 drv->mFreeChildren(); 421 } 422} 423 424void rsdScriptInvokeFunction(const Context *dc, Script *script, 425 uint32_t slot, 426 const void *params, 427 size_t paramLength) { 428 DrvScript *drv = (DrvScript *)script->mHal.drv; 429 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 430 431 Script * oldTLS = setTLS(script); 432 reinterpret_cast<void (*)(const void *, uint32_t)>( 433 drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength); 434 setTLS(oldTLS); 435} 436 437void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 438 uint32_t slot, void *data, size_t dataLength) { 439 DrvScript *drv = (DrvScript *)script->mHal.drv; 440 //rsAssert(!script->mFieldIsObject[slot]); 441 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 442 443 int32_t *destPtr = reinterpret_cast<int32_t *>( 444 drv->mExecutable->getExportVarAddrs()[slot]); 445 if (!destPtr) { 446 //ALOGV("Calling setVar on slot = %i which is null", slot); 447 return; 448 } 449 450 memcpy(destPtr, data, dataLength); 451} 452 453void rsdScriptSetGlobalVarWithElemDims( 454 const android::renderscript::Context *dc, 455 const android::renderscript::Script *script, 456 uint32_t slot, void *data, size_t dataLength, 457 const android::renderscript::Element *elem, 458 const size_t *dims, size_t dimLength) { 459 DrvScript *drv = (DrvScript *)script->mHal.drv; 460 461 int32_t *destPtr = reinterpret_cast<int32_t *>( 462 drv->mExecutable->getExportVarAddrs()[slot]); 463 if (!destPtr) { 464 //ALOGV("Calling setVar on slot = %i which is null", slot); 465 return; 466 } 467 468 // We want to look at dimension in terms of integer components, 469 // but dimLength is given in terms of bytes. 470 dimLength /= sizeof(int); 471 472 // Only a single dimension is currently supported. 473 rsAssert(dimLength == 1); 474 if (dimLength == 1) { 475 // First do the increment loop. 476 size_t stride = elem->getSizeBytes(); 477 char *cVal = reinterpret_cast<char *>(data); 478 for (size_t i = 0; i < dims[0]; i++) { 479 elem->incRefs(cVal); 480 cVal += stride; 481 } 482 483 // Decrement loop comes after (to prevent race conditions). 484 char *oldVal = reinterpret_cast<char *>(destPtr); 485 for (size_t i = 0; i < dims[0]; i++) { 486 elem->decRefs(oldVal); 487 oldVal += stride; 488 } 489 } 490 491 memcpy(destPtr, data, dataLength); 492} 493 494void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 495 DrvScript *drv = (DrvScript *)script->mHal.drv; 496 //rsAssert(!script->mFieldIsObject[slot]); 497 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 498 499 int32_t *destPtr = reinterpret_cast<int32_t *>( 500 drv->mExecutable->getExportVarAddrs()[slot]); 501 if (!destPtr) { 502 //ALOGV("Calling setVar on slot = %i which is null", slot); 503 return; 504 } 505 506 memcpy(destPtr, &data, sizeof(void *)); 507} 508 509void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 510 DrvScript *drv = (DrvScript *)script->mHal.drv; 511 //rsAssert(script->mFieldIsObject[slot]); 512 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 513 514 int32_t *destPtr = reinterpret_cast<int32_t *>( 515 drv->mExecutable->getExportVarAddrs()[slot]); 516 if (!destPtr) { 517 //ALOGV("Calling setVar on slot = %i which is null", slot); 518 return; 519 } 520 521 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 522} 523 524void rsdScriptDestroy(const Context *dc, Script *script) { 525 DrvScript *drv = (DrvScript *)script->mHal.drv; 526 527 if (drv == NULL) { 528 return; 529 } 530 531 if (drv->mExecutable) { 532 Vector<void *>::const_iterator var_addr_iter = 533 drv->mExecutable->getExportVarAddrs().begin(); 534 Vector<void *>::const_iterator var_addr_end = 535 drv->mExecutable->getExportVarAddrs().end(); 536 537 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = 538 drv->mExecutable->getInfo().getObjectSlots().begin(); 539 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = 540 drv->mExecutable->getInfo().getObjectSlots().end(); 541 542 while ((var_addr_iter != var_addr_end) && 543 (is_object_iter != is_object_end)) { 544 // The field address can be NULL if the script-side has optimized 545 // the corresponding global variable away. 546 ObjectBase **obj_addr = 547 reinterpret_cast<ObjectBase **>(*var_addr_iter); 548 if (*is_object_iter) { 549 if (*var_addr_iter != NULL) { 550 rsrClearObject(dc, script, obj_addr); 551 } 552 } 553 var_addr_iter++; 554 is_object_iter++; 555 } 556 } 557 558 delete drv->mCompilerContext; 559 delete drv->mCompilerDriver; 560 delete drv->mExecutable; 561 562 free(drv); 563 script->mHal.drv = NULL; 564} 565