rsdBcc.cpp revision f22c8ace148b69847aaf5ad1829e9ec95a44df6c
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcinfo/MetadataExtractor.h> 22 23#include "rsContext.h" 24#include "rsScriptC.h" 25 26#include "utils/Timers.h" 27#include "utils/StopWatch.h" 28 29using namespace android; 30using namespace android::renderscript; 31 32struct DrvScript { 33 int (*mRoot)(); 34 int (*mRootExpand)(); 35 void (*mInit)(); 36 void (*mFreeChildren)(); 37 38 BCCScriptRef mBccScript; 39 40 bcinfo::MetadataExtractor *ME; 41 42 InvokeFunc_t *mInvokeFunctions; 43 ForEachFunc_t *mForEachFunctions; 44 void ** mFieldAddress; 45 bool * mFieldIsObject; 46 const uint32_t *mExportForEachSignatureList; 47 48 const uint8_t * mScriptText; 49 uint32_t mScriptTextLength; 50}; 51 52typedef void (*outer_foreach_t)( 53 const android::renderscript::RsForEachStubParamStruct *, 54 uint32_t x1, uint32_t x2, 55 uint32_t instep, uint32_t outstep); 56 57static Script * setTLS(Script *sc) { 58 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 59 rsAssert(tls); 60 Script *old = tls->mScript; 61 tls->mScript = sc; 62 return old; 63} 64 65 66bool rsdScriptInit(const Context *rsc, 67 ScriptC *script, 68 char const *resName, 69 char const *cacheDir, 70 uint8_t const *bitcode, 71 size_t bitcodeSize, 72 uint32_t flags) { 73 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 74 75 pthread_mutex_lock(&rsdgInitMutex); 76 77 size_t exportFuncCount = 0; 78 size_t exportVarCount = 0; 79 size_t objectSlotCount = 0; 80 size_t exportForEachSignatureCount = 0; 81 82 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 83 if (drv == NULL) { 84 goto error; 85 } 86 script->mHal.drv = drv; 87 88 drv->mBccScript = bccCreateScript(); 89 script->mHal.info.isThreadable = true; 90 drv->mScriptText = bitcode; 91 drv->mScriptTextLength = bitcodeSize; 92 93 94 drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText, 95 drv->mScriptTextLength); 96 if (!drv->ME->extract()) { 97 ALOGE("bcinfo: failed to read script metadata"); 98 goto error; 99 } 100 101 //ALOGE("mBccScript %p", script->mBccScript); 102 103 if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) { 104 ALOGE("bcc: FAILS to register symbol callback"); 105 goto error; 106 } 107 108 if (bccReadBC(drv->mBccScript, 109 resName, 110 (char const *)drv->mScriptText, 111 drv->mScriptTextLength, 0) != 0) { 112 ALOGE("bcc: FAILS to read bitcode"); 113 goto error; 114 } 115 116 if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) { 117 ALOGE("bcc: FAILS to link bitcode"); 118 goto error; 119 } 120 121 if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) { 122 ALOGE("bcc: FAILS to prepare executable"); 123 goto error; 124 } 125 126 drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root")); 127 drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand")); 128 drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init")); 129 drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor")); 130 131 exportFuncCount = drv->ME->getExportFuncCount(); 132 if (exportFuncCount > 0) { 133 drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount, 134 sizeof(InvokeFunc_t)); 135 bccGetExportFuncList(drv->mBccScript, exportFuncCount, 136 (void **) drv->mInvokeFunctions); 137 } else { 138 drv->mInvokeFunctions = NULL; 139 } 140 141 exportVarCount = drv->ME->getExportVarCount(); 142 if (exportVarCount > 0) { 143 drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*)); 144 drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool)); 145 bccGetExportVarList(drv->mBccScript, exportVarCount, 146 (void **) drv->mFieldAddress); 147 } else { 148 drv->mFieldAddress = NULL; 149 drv->mFieldIsObject = NULL; 150 } 151 152 objectSlotCount = drv->ME->getObjectSlotCount(); 153 if (objectSlotCount > 0) { 154 const uint32_t *objectSlotList = drv->ME->getObjectSlotList(); 155 for (uint32_t ct=0; ct < objectSlotCount; ct++) { 156 drv->mFieldIsObject[objectSlotList[ct]] = true; 157 } 158 } 159 160 exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount(); 161 drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList(); 162 if (exportForEachSignatureCount > 0) { 163 drv->mForEachFunctions = 164 (ForEachFunc_t*) calloc(exportForEachSignatureCount, 165 sizeof(ForEachFunc_t)); 166 bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount, 167 (void **) drv->mForEachFunctions); 168 } else { 169 drv->mForEachFunctions = NULL; 170 } 171 172 // Copy info over to runtime 173 script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount(); 174 script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount(); 175 script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount(); 176 script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList(); 177 script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList(); 178 179 if (drv->mRootExpand) { 180 script->mHal.info.root = drv->mRootExpand; 181 } else { 182 script->mHal.info.root = drv->mRoot; 183 } 184 185 pthread_mutex_unlock(&rsdgInitMutex); 186 return true; 187 188error: 189 190 pthread_mutex_unlock(&rsdgInitMutex); 191 if (drv->ME) { 192 delete drv->ME; 193 drv->ME = NULL; 194 } 195 free(drv); 196 return false; 197 198} 199 200typedef struct { 201 Context *rsc; 202 Script *script; 203 ForEachFunc_t kernel; 204 uint32_t sig; 205 const Allocation * ain; 206 Allocation * aout; 207 const void * usr; 208 size_t usrLen; 209 210 uint32_t mSliceSize; 211 volatile int mSliceNum; 212 213 const uint8_t *ptrIn; 214 uint32_t eStrideIn; 215 uint8_t *ptrOut; 216 uint32_t eStrideOut; 217 218 uint32_t yStrideIn; 219 uint32_t yStrideOut; 220 221 uint32_t xStart; 222 uint32_t xEnd; 223 uint32_t yStart; 224 uint32_t yEnd; 225 uint32_t zStart; 226 uint32_t zEnd; 227 uint32_t arrayStart; 228 uint32_t arrayEnd; 229 230 uint32_t dimX; 231 uint32_t dimY; 232 uint32_t dimZ; 233 uint32_t dimArray; 234} MTLaunchStruct; 235typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 236 237static void wc_xy(void *usr, uint32_t idx) { 238 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 239 RsForEachStubParamStruct p; 240 memset(&p, 0, sizeof(p)); 241 p.usr = mtls->usr; 242 p.usr_len = mtls->usrLen; 243 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 244 uint32_t sig = mtls->sig; 245 246 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 247 while (1) { 248 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 249 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 250 uint32_t yEnd = yStart + mtls->mSliceSize; 251 yEnd = rsMin(yEnd, mtls->yEnd); 252 if (yEnd <= yStart) { 253 return; 254 } 255 256 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 257 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 258 for (p.y = yStart; p.y < yEnd; p.y++) { 259 p.out = mtls->ptrOut + (mtls->yStrideOut * p.y); 260 p.in = mtls->ptrIn + (mtls->yStrideIn * p.y); 261 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 262 } 263 } 264} 265 266static void wc_x(void *usr, uint32_t idx) { 267 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 268 RsForEachStubParamStruct p; 269 memset(&p, 0, sizeof(p)); 270 p.usr = mtls->usr; 271 p.usr_len = mtls->usrLen; 272 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 273 uint32_t sig = mtls->sig; 274 275 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 276 while (1) { 277 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 278 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 279 uint32_t xEnd = xStart + mtls->mSliceSize; 280 xEnd = rsMin(xEnd, mtls->xEnd); 281 if (xEnd <= xStart) { 282 return; 283 } 284 285 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 286 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 287 288 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 289 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 290 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 291 } 292} 293 294void rsdScriptInvokeForEach(const Context *rsc, 295 Script *s, 296 uint32_t slot, 297 const Allocation * ain, 298 Allocation * aout, 299 const void * usr, 300 uint32_t usrLen, 301 const RsScriptCall *sc) { 302 303 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 304 305 MTLaunchStruct mtls; 306 memset(&mtls, 0, sizeof(mtls)); 307 308 DrvScript *drv = (DrvScript *)s->mHal.drv; 309 mtls.kernel = drv->mForEachFunctions[slot]; 310 rsAssert(mtls.kernel != NULL); 311 mtls.sig = 0x1f; // temp fix for old apps, full table in slang_rs_export_foreach.cpp 312 if (drv->mExportForEachSignatureList) { 313 mtls.sig = drv->mExportForEachSignatureList[slot]; 314 } 315 if (ain) { 316 mtls.dimX = ain->getType()->getDimX(); 317 mtls.dimY = ain->getType()->getDimY(); 318 mtls.dimZ = ain->getType()->getDimZ(); 319 //mtls.dimArray = ain->getType()->getDimArray(); 320 } else if (aout) { 321 mtls.dimX = aout->getType()->getDimX(); 322 mtls.dimY = aout->getType()->getDimY(); 323 mtls.dimZ = aout->getType()->getDimZ(); 324 //mtls.dimArray = aout->getType()->getDimArray(); 325 } else { 326 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 327 return; 328 } 329 330 if (!sc || (sc->xEnd == 0)) { 331 mtls.xEnd = mtls.dimX; 332 } else { 333 rsAssert(sc->xStart < mtls.dimX); 334 rsAssert(sc->xEnd <= mtls.dimX); 335 rsAssert(sc->xStart < sc->xEnd); 336 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 337 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 338 if (mtls.xStart >= mtls.xEnd) return; 339 } 340 341 if (!sc || (sc->yEnd == 0)) { 342 mtls.yEnd = mtls.dimY; 343 } else { 344 rsAssert(sc->yStart < mtls.dimY); 345 rsAssert(sc->yEnd <= mtls.dimY); 346 rsAssert(sc->yStart < sc->yEnd); 347 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 348 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 349 if (mtls.yStart >= mtls.yEnd) return; 350 } 351 352 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 353 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 354 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 355 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 356 357 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 358 359 Context *mrsc = (Context *)rsc; 360 Script * oldTLS = setTLS(s); 361 362 mtls.rsc = mrsc; 363 mtls.ain = ain; 364 mtls.aout = aout; 365 mtls.script = s; 366 mtls.usr = usr; 367 mtls.usrLen = usrLen; 368 mtls.mSliceSize = 10; 369 mtls.mSliceNum = 0; 370 371 mtls.ptrIn = NULL; 372 mtls.eStrideIn = 0; 373 if (ain) { 374 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 375 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 376 mtls.yStrideIn = ain->mHal.drvState.stride; 377 } 378 379 mtls.ptrOut = NULL; 380 mtls.eStrideOut = 0; 381 if (aout) { 382 mtls.ptrOut = (uint8_t *)aout->getPtr(); 383 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 384 mtls.yStrideOut = aout->mHal.drvState.stride; 385 } 386 387 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 388 if (mtls.dimY > 1) { 389 rsdLaunchThreads(mrsc, wc_xy, &mtls); 390 } else { 391 rsdLaunchThreads(mrsc, wc_x, &mtls); 392 } 393 394 //ALOGE("launch 1"); 395 } else { 396 RsForEachStubParamStruct p; 397 memset(&p, 0, sizeof(p)); 398 p.usr = mtls.usr; 399 p.usr_len = mtls.usrLen; 400 uint32_t sig = mtls.sig; 401 402 //ALOGE("launch 3"); 403 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 404 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 405 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 406 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 407 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 408 mtls.dimX * mtls.dimY * p.z + 409 mtls.dimX * p.y; 410 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 411 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 412 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 413 mtls.eStrideOut); 414 } 415 } 416 } 417 } 418 419 setTLS(oldTLS); 420} 421 422 423int rsdScriptInvokeRoot(const Context *dc, Script *script) { 424 DrvScript *drv = (DrvScript *)script->mHal.drv; 425 426 Script * oldTLS = setTLS(script); 427 int ret = drv->mRoot(); 428 setTLS(oldTLS); 429 430 return ret; 431} 432 433void rsdScriptInvokeInit(const Context *dc, Script *script) { 434 DrvScript *drv = (DrvScript *)script->mHal.drv; 435 436 if (drv->mInit) { 437 drv->mInit(); 438 } 439} 440 441void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 442 DrvScript *drv = (DrvScript *)script->mHal.drv; 443 444 if (drv->mFreeChildren) { 445 drv->mFreeChildren(); 446 } 447} 448 449void rsdScriptInvokeFunction(const Context *dc, Script *script, 450 uint32_t slot, 451 const void *params, 452 size_t paramLength) { 453 DrvScript *drv = (DrvScript *)script->mHal.drv; 454 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 455 456 Script * oldTLS = setTLS(script); 457 ((void (*)(const void *, uint32_t)) 458 drv->mInvokeFunctions[slot])(params, paramLength); 459 setTLS(oldTLS); 460} 461 462void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 463 uint32_t slot, void *data, size_t dataLength) { 464 DrvScript *drv = (DrvScript *)script->mHal.drv; 465 //rsAssert(!script->mFieldIsObject[slot]); 466 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 467 468 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 469 if (!destPtr) { 470 //ALOGV("Calling setVar on slot = %i which is null", slot); 471 return; 472 } 473 474 memcpy(destPtr, data, dataLength); 475} 476 477void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 478 DrvScript *drv = (DrvScript *)script->mHal.drv; 479 //rsAssert(!script->mFieldIsObject[slot]); 480 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 481 482 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 483 if (!destPtr) { 484 //ALOGV("Calling setVar on slot = %i which is null", slot); 485 return; 486 } 487 488 memcpy(destPtr, &data, sizeof(void *)); 489} 490 491void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 492 DrvScript *drv = (DrvScript *)script->mHal.drv; 493 //rsAssert(script->mFieldIsObject[slot]); 494 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 495 496 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 497 if (!destPtr) { 498 //ALOGV("Calling setVar on slot = %i which is null", slot); 499 return; 500 } 501 502 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 503} 504 505void rsdScriptDestroy(const Context *dc, Script *script) { 506 DrvScript *drv = (DrvScript *)script->mHal.drv; 507 508 if (drv->mFieldAddress) { 509 size_t exportVarCount = drv->ME->getExportVarCount(); 510 for (size_t ct = 0; ct < exportVarCount; ct++) { 511 if (drv->mFieldIsObject[ct]) { 512 // The field address can be NULL if the script-side has 513 // optimized the corresponding global variable away. 514 if (drv->mFieldAddress[ct]) { 515 rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]); 516 } 517 } 518 } 519 free(drv->mFieldAddress); 520 drv->mFieldAddress = NULL; 521 free(drv->mFieldIsObject); 522 drv->mFieldIsObject = NULL; } 523 524 if (drv->mInvokeFunctions) { 525 free(drv->mInvokeFunctions); 526 drv->mInvokeFunctions = NULL; 527 } 528 529 if (drv->mForEachFunctions) { 530 free(drv->mForEachFunctions); 531 drv->mForEachFunctions = NULL; 532 } 533 534 delete drv->ME; 535 drv->ME = NULL; 536 537 free(drv); 538 script->mHal.drv = NULL; 539 540} 541 542 543