rsdBcc.cpp revision 2980f07d3dbbca727e8efe24ace7e7928a935648
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcc/BCCContext.h> 22#include <bcc/RenderScript/RSCompilerDriver.h> 23#include <bcc/RenderScript/RSExecutable.h> 24#include <bcc/RenderScript/RSInfo.h> 25 26#include "rsContext.h" 27#include "rsElement.h" 28#include "rsScriptC.h" 29 30#include "utils/Vector.h" 31#include "utils/Timers.h" 32#include "utils/StopWatch.h" 33 34using namespace android; 35using namespace android::renderscript; 36 37struct DrvScript { 38 int (*mRoot)(); 39 int (*mRootExpand)(); 40 void (*mInit)(); 41 void (*mFreeChildren)(); 42 43 bcc::BCCContext *mCompilerContext; 44 bcc::RSCompilerDriver *mCompilerDriver; 45 bcc::RSExecutable *mExecutable; 46}; 47 48typedef void (*outer_foreach_t)( 49 const android::renderscript::RsForEachStubParamStruct *, 50 uint32_t x1, uint32_t x2, 51 uint32_t instep, uint32_t outstep); 52 53static Script * setTLS(Script *sc) { 54 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 55 rsAssert(tls); 56 Script *old = tls->mScript; 57 tls->mScript = sc; 58 return old; 59} 60 61 62bool rsdScriptInit(const Context *rsc, 63 ScriptC *script, 64 char const *resName, 65 char const *cacheDir, 66 uint8_t const *bitcode, 67 size_t bitcodeSize, 68 uint32_t flags) { 69 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 70 71 pthread_mutex_lock(&rsdgInitMutex); 72 73 bcc::RSExecutable *exec; 74 const bcc::RSInfo *info; 75 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 76 if (drv == NULL) { 77 goto error; 78 } 79 script->mHal.drv = drv; 80 81 drv->mCompilerContext = NULL; 82 drv->mCompilerDriver = NULL; 83 drv->mExecutable = NULL; 84 85 drv->mCompilerContext = new bcc::BCCContext(); 86 if (drv->mCompilerContext == NULL) { 87 ALOGE("bcc: FAILS to create compiler context (out of memory)"); 88 goto error; 89 } 90 91 drv->mCompilerDriver = new bcc::RSCompilerDriver(); 92 if (drv->mCompilerDriver == NULL) { 93 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 94 goto error; 95 } 96 97 script->mHal.info.isThreadable = true; 98 99 drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub); 100 drv->mCompilerDriver->setRSRuntimeLookupContext(script); 101 102 exec = drv->mCompilerDriver->build(*drv->mCompilerContext, 103 cacheDir, resName, 104 (const char *)bitcode, bitcodeSize); 105 106 if (exec == NULL) { 107 ALOGE("bcc: FAILS to prepare executable for '%s'", resName); 108 goto error; 109 } 110 111 drv->mExecutable = exec; 112 113 exec->setThreadable(script->mHal.info.isThreadable); 114 if (!exec->syncInfo()) { 115 ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); 116 } 117 118 drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root")); 119 drv->mRootExpand = 120 reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand")); 121 drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init")); 122 drv->mFreeChildren = 123 reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor")); 124 125 info = &drv->mExecutable->getInfo(); 126 // Copy info over to runtime 127 script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size(); 128 script->mHal.info.exportedVariableCount = info->getExportVarNames().size(); 129 script->mHal.info.exportedPragmaCount = info->getPragmas().size(); 130 script->mHal.info.exportedPragmaKeyList = 131 const_cast<const char**>(exec->getPragmaKeys().array()); 132 script->mHal.info.exportedPragmaValueList = 133 const_cast<const char**>(exec->getPragmaValues().array()); 134 135 if (drv->mRootExpand) { 136 script->mHal.info.root = drv->mRootExpand; 137 } else { 138 script->mHal.info.root = drv->mRoot; 139 } 140 141 pthread_mutex_unlock(&rsdgInitMutex); 142 return true; 143 144error: 145 146 pthread_mutex_unlock(&rsdgInitMutex); 147 if (drv) { 148 delete drv->mCompilerContext; 149 delete drv->mCompilerDriver; 150 delete drv->mExecutable; 151 free(drv); 152 } 153 script->mHal.drv = NULL; 154 return false; 155 156} 157 158typedef struct { 159 Context *rsc; 160 Script *script; 161 ForEachFunc_t kernel; 162 uint32_t sig; 163 const Allocation * ain; 164 Allocation * aout; 165 const void * usr; 166 size_t usrLen; 167 168 uint32_t mSliceSize; 169 volatile int mSliceNum; 170 171 const uint8_t *ptrIn; 172 uint32_t eStrideIn; 173 uint8_t *ptrOut; 174 uint32_t eStrideOut; 175 176 uint32_t yStrideIn; 177 uint32_t yStrideOut; 178 179 uint32_t xStart; 180 uint32_t xEnd; 181 uint32_t yStart; 182 uint32_t yEnd; 183 uint32_t zStart; 184 uint32_t zEnd; 185 uint32_t arrayStart; 186 uint32_t arrayEnd; 187 188 uint32_t dimX; 189 uint32_t dimY; 190 uint32_t dimZ; 191 uint32_t dimArray; 192} MTLaunchStruct; 193typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 194 195static void wc_xy(void *usr, uint32_t idx) { 196 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 197 RsForEachStubParamStruct p; 198 memset(&p, 0, sizeof(p)); 199 p.usr = mtls->usr; 200 p.usr_len = mtls->usrLen; 201 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 202 uint32_t sig = mtls->sig; 203 204 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 205 while (1) { 206 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 207 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 208 uint32_t yEnd = yStart + mtls->mSliceSize; 209 yEnd = rsMin(yEnd, mtls->yEnd); 210 if (yEnd <= yStart) { 211 return; 212 } 213 214 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 215 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 216 for (p.y = yStart; p.y < yEnd; p.y++) { 217 p.out = mtls->ptrOut + (mtls->yStrideOut * p.y); 218 p.in = mtls->ptrIn + (mtls->yStrideIn * p.y); 219 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 220 } 221 } 222} 223 224static void wc_x(void *usr, uint32_t idx) { 225 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 226 RsForEachStubParamStruct p; 227 memset(&p, 0, sizeof(p)); 228 p.usr = mtls->usr; 229 p.usr_len = mtls->usrLen; 230 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 231 uint32_t sig = mtls->sig; 232 233 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 234 while (1) { 235 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 236 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 237 uint32_t xEnd = xStart + mtls->mSliceSize; 238 xEnd = rsMin(xEnd, mtls->xEnd); 239 if (xEnd <= xStart) { 240 return; 241 } 242 243 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 244 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 245 246 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 247 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 248 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 249 } 250} 251 252void rsdScriptInvokeForEach(const Context *rsc, 253 Script *s, 254 uint32_t slot, 255 const Allocation * ain, 256 Allocation * aout, 257 const void * usr, 258 uint32_t usrLen, 259 const RsScriptCall *sc) { 260 261 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 262 263 MTLaunchStruct mtls; 264 memset(&mtls, 0, sizeof(mtls)); 265 266 DrvScript *drv = (DrvScript *)s->mHal.drv; 267 rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size()); 268 mtls.kernel = reinterpret_cast<ForEachFunc_t>( 269 drv->mExecutable->getExportForeachFuncAddrs()[slot]); 270 rsAssert(mtls.kernel != NULL); 271 mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second; 272 273 if (ain) { 274 mtls.dimX = ain->getType()->getDimX(); 275 mtls.dimY = ain->getType()->getDimY(); 276 mtls.dimZ = ain->getType()->getDimZ(); 277 //mtls.dimArray = ain->getType()->getDimArray(); 278 } else if (aout) { 279 mtls.dimX = aout->getType()->getDimX(); 280 mtls.dimY = aout->getType()->getDimY(); 281 mtls.dimZ = aout->getType()->getDimZ(); 282 //mtls.dimArray = aout->getType()->getDimArray(); 283 } else { 284 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 285 return; 286 } 287 288 if (!sc || (sc->xEnd == 0)) { 289 mtls.xEnd = mtls.dimX; 290 } else { 291 rsAssert(sc->xStart < mtls.dimX); 292 rsAssert(sc->xEnd <= mtls.dimX); 293 rsAssert(sc->xStart < sc->xEnd); 294 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 295 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 296 if (mtls.xStart >= mtls.xEnd) return; 297 } 298 299 if (!sc || (sc->yEnd == 0)) { 300 mtls.yEnd = mtls.dimY; 301 } else { 302 rsAssert(sc->yStart < mtls.dimY); 303 rsAssert(sc->yEnd <= mtls.dimY); 304 rsAssert(sc->yStart < sc->yEnd); 305 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 306 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 307 if (mtls.yStart >= mtls.yEnd) return; 308 } 309 310 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 311 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 312 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 313 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 314 315 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 316 317 Context *mrsc = (Context *)rsc; 318 Script * oldTLS = setTLS(s); 319 320 mtls.rsc = mrsc; 321 mtls.ain = ain; 322 mtls.aout = aout; 323 mtls.script = s; 324 mtls.usr = usr; 325 mtls.usrLen = usrLen; 326 mtls.mSliceSize = 10; 327 mtls.mSliceNum = 0; 328 329 mtls.ptrIn = NULL; 330 mtls.eStrideIn = 0; 331 if (ain) { 332 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 333 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 334 mtls.yStrideIn = ain->mHal.drvState.stride; 335 } 336 337 mtls.ptrOut = NULL; 338 mtls.eStrideOut = 0; 339 if (aout) { 340 mtls.ptrOut = (uint8_t *)aout->getPtr(); 341 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 342 mtls.yStrideOut = aout->mHal.drvState.stride; 343 } 344 345 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 346 if (mtls.dimY > 1) { 347 rsdLaunchThreads(mrsc, wc_xy, &mtls); 348 } else { 349 rsdLaunchThreads(mrsc, wc_x, &mtls); 350 } 351 352 //ALOGE("launch 1"); 353 } else { 354 RsForEachStubParamStruct p; 355 memset(&p, 0, sizeof(p)); 356 p.usr = mtls.usr; 357 p.usr_len = mtls.usrLen; 358 uint32_t sig = mtls.sig; 359 360 //ALOGE("launch 3"); 361 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 362 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 363 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 364 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 365 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 366 mtls.dimX * mtls.dimY * p.z + 367 mtls.dimX * p.y; 368 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 369 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 370 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 371 mtls.eStrideOut); 372 } 373 } 374 } 375 } 376 377 setTLS(oldTLS); 378} 379 380 381int rsdScriptInvokeRoot(const Context *dc, Script *script) { 382 DrvScript *drv = (DrvScript *)script->mHal.drv; 383 384 Script * oldTLS = setTLS(script); 385 int ret = drv->mRoot(); 386 setTLS(oldTLS); 387 388 return ret; 389} 390 391void rsdScriptInvokeInit(const Context *dc, Script *script) { 392 DrvScript *drv = (DrvScript *)script->mHal.drv; 393 394 if (drv->mInit) { 395 drv->mInit(); 396 } 397} 398 399void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 400 DrvScript *drv = (DrvScript *)script->mHal.drv; 401 402 if (drv->mFreeChildren) { 403 drv->mFreeChildren(); 404 } 405} 406 407void rsdScriptInvokeFunction(const Context *dc, Script *script, 408 uint32_t slot, 409 const void *params, 410 size_t paramLength) { 411 DrvScript *drv = (DrvScript *)script->mHal.drv; 412 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 413 414 Script * oldTLS = setTLS(script); 415 reinterpret_cast<void (*)(const void *, uint32_t)>( 416 drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength); 417 setTLS(oldTLS); 418} 419 420void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 421 uint32_t slot, void *data, size_t dataLength) { 422 DrvScript *drv = (DrvScript *)script->mHal.drv; 423 //rsAssert(!script->mFieldIsObject[slot]); 424 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 425 426 int32_t *destPtr = reinterpret_cast<int32_t *>( 427 drv->mExecutable->getExportVarAddrs()[slot]); 428 if (!destPtr) { 429 //ALOGV("Calling setVar on slot = %i which is null", slot); 430 return; 431 } 432 433 memcpy(destPtr, data, dataLength); 434} 435 436void rsdScriptSetGlobalVarWithElemDims( 437 const android::renderscript::Context *dc, 438 const android::renderscript::Script *script, 439 uint32_t slot, void *data, size_t dataLength, 440 const android::renderscript::Element *elem, 441 const size_t *dims, size_t dimLength) { 442 DrvScript *drv = (DrvScript *)script->mHal.drv; 443 444 int32_t *destPtr = reinterpret_cast<int32_t *>( 445 drv->mExecutable->getExportVarAddrs()[slot]); 446 if (!destPtr) { 447 //ALOGV("Calling setVar on slot = %i which is null", slot); 448 return; 449 } 450 451 // We want to look at dimension in terms of integer components, 452 // but dimLength is given in terms of bytes. 453 dimLength /= sizeof(int); 454 455 // Only a single dimension is currently supported. 456 rsAssert(dimLength == 1); 457 if (dimLength == 1) { 458 // First do the increment loop. 459 size_t stride = elem->getSizeBytes(); 460 char *cVal = reinterpret_cast<char *>(data); 461 for (size_t i = 0; i < dims[0]; i++) { 462 elem->incRefs(cVal); 463 cVal += stride; 464 } 465 466 // Decrement loop comes after (to prevent race conditions). 467 char *oldVal = reinterpret_cast<char *>(destPtr); 468 for (size_t i = 0; i < dims[0]; i++) { 469 elem->decRefs(oldVal); 470 oldVal += stride; 471 } 472 } 473 474 memcpy(destPtr, data, dataLength); 475} 476 477void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 478 DrvScript *drv = (DrvScript *)script->mHal.drv; 479 //rsAssert(!script->mFieldIsObject[slot]); 480 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 481 482 int32_t *destPtr = reinterpret_cast<int32_t *>( 483 drv->mExecutable->getExportVarAddrs()[slot]); 484 if (!destPtr) { 485 //ALOGV("Calling setVar on slot = %i which is null", slot); 486 return; 487 } 488 489 memcpy(destPtr, &data, sizeof(void *)); 490} 491 492void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 493 DrvScript *drv = (DrvScript *)script->mHal.drv; 494 //rsAssert(script->mFieldIsObject[slot]); 495 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 496 497 int32_t *destPtr = reinterpret_cast<int32_t *>( 498 drv->mExecutable->getExportVarAddrs()[slot]); 499 if (!destPtr) { 500 //ALOGV("Calling setVar on slot = %i which is null", slot); 501 return; 502 } 503 504 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 505} 506 507void rsdScriptDestroy(const Context *dc, Script *script) { 508 DrvScript *drv = (DrvScript *)script->mHal.drv; 509 510 if (drv == NULL) { 511 return; 512 } 513 514 if (drv->mExecutable) { 515 Vector<void *>::const_iterator var_addr_iter = 516 drv->mExecutable->getExportVarAddrs().begin(); 517 Vector<void *>::const_iterator var_addr_end = 518 drv->mExecutable->getExportVarAddrs().end(); 519 520 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = 521 drv->mExecutable->getInfo().getObjectSlots().begin(); 522 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = 523 drv->mExecutable->getInfo().getObjectSlots().end(); 524 525 while ((var_addr_iter != var_addr_end) && 526 (is_object_iter != is_object_end)) { 527 // The field address can be NULL if the script-side has optimized 528 // the corresponding global variable away. 529 ObjectBase **obj_addr = 530 reinterpret_cast<ObjectBase **>(*var_addr_iter); 531 if (*is_object_iter) { 532 if (*var_addr_iter != NULL) { 533 rsrClearObject(dc, script, obj_addr); 534 } 535 } 536 var_addr_iter++; 537 is_object_iter++; 538 } 539 } 540 541 delete drv->mCompilerContext; 542 delete drv->mCompilerDriver; 543 delete drv->mExecutable; 544 545 free(drv); 546 script->mHal.drv = NULL; 547} 548 549 550