rsdBcc.cpp revision a6ab26a035017143a24efa11859c9d8815a27f41
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcinfo/MetadataExtractor.h> 22 23#include "rsContext.h" 24#include "rsScriptC.h" 25 26#include "utils/Timers.h" 27#include "utils/StopWatch.h" 28 29using namespace android; 30using namespace android::renderscript; 31 32struct DrvScript { 33 int (*mRoot)(); 34 int (*mRootExpand)(); 35 void (*mInit)(); 36 void (*mFreeChildren)(); 37 38 BCCScriptRef mBccScript; 39 40 bcinfo::MetadataExtractor *ME; 41 42 InvokeFunc_t *mInvokeFunctions; 43 ForEachFunc_t *mForEachFunctions; 44 void ** mFieldAddress; 45 bool * mFieldIsObject; 46 const uint32_t *mExportForEachSignatureList; 47 48 const uint8_t * mScriptText; 49 uint32_t mScriptTextLength; 50}; 51 52typedef void (*outer_foreach_t)( 53 const android::renderscript::RsForEachStubParamStruct *, 54 uint32_t x1, uint32_t x2, 55 uint32_t instep, uint32_t outstep); 56 57static Script * setTLS(Script *sc) { 58 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 59 rsAssert(tls); 60 Script *old = tls->mScript; 61 tls->mScript = sc; 62 return old; 63} 64 65 66bool rsdScriptInit(const Context *rsc, 67 ScriptC *script, 68 char const *resName, 69 char const *cacheDir, 70 uint8_t const *bitcode, 71 size_t bitcodeSize, 72 uint32_t flags) { 73 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 74 75 pthread_mutex_lock(&rsdgInitMutex); 76 77 size_t exportFuncCount = 0; 78 size_t exportVarCount = 0; 79 size_t objectSlotCount = 0; 80 size_t exportForEachSignatureCount = 0; 81 82 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 83 if (drv == NULL) { 84 goto error; 85 } 86 script->mHal.drv = drv; 87 88 drv->mBccScript = bccCreateScript(); 89 script->mHal.info.isThreadable = true; 90 drv->mScriptText = bitcode; 91 drv->mScriptTextLength = bitcodeSize; 92 93 94 drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText, 95 drv->mScriptTextLength); 96 if (!drv->ME->extract()) { 97 ALOGE("bcinfo: failed to read script metadata"); 98 goto error; 99 } 100 101 //ALOGE("mBccScript %p", script->mBccScript); 102 103 if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) { 104 ALOGE("bcc: FAILS to register symbol callback"); 105 goto error; 106 } 107 108 if (bccReadBC(drv->mBccScript, 109 resName, 110 (char const *)drv->mScriptText, 111 drv->mScriptTextLength, 0) != 0) { 112 ALOGE("bcc: FAILS to read bitcode"); 113 goto error; 114 } 115 116 if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) { 117 ALOGE("bcc: FAILS to link bitcode"); 118 goto error; 119 } 120 121 if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) { 122 ALOGE("bcc: FAILS to prepare executable"); 123 goto error; 124 } 125 126 drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root")); 127 drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand")); 128 drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init")); 129 drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor")); 130 131 exportFuncCount = drv->ME->getExportFuncCount(); 132 if (exportFuncCount > 0) { 133 drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount, 134 sizeof(InvokeFunc_t)); 135 bccGetExportFuncList(drv->mBccScript, exportFuncCount, 136 (void **) drv->mInvokeFunctions); 137 } else { 138 drv->mInvokeFunctions = NULL; 139 } 140 141 exportVarCount = drv->ME->getExportVarCount(); 142 if (exportVarCount > 0) { 143 drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*)); 144 drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool)); 145 bccGetExportVarList(drv->mBccScript, exportVarCount, 146 (void **) drv->mFieldAddress); 147 } else { 148 drv->mFieldAddress = NULL; 149 drv->mFieldIsObject = NULL; 150 } 151 152 objectSlotCount = drv->ME->getObjectSlotCount(); 153 if (objectSlotCount > 0) { 154 const uint32_t *objectSlotList = drv->ME->getObjectSlotList(); 155 for (uint32_t ct=0; ct < objectSlotCount; ct++) { 156 drv->mFieldIsObject[objectSlotList[ct]] = true; 157 } 158 } 159 160 exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount(); 161 drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList(); 162 if (exportForEachSignatureCount > 0) { 163 drv->mForEachFunctions = 164 (ForEachFunc_t*) calloc(exportForEachSignatureCount, 165 sizeof(ForEachFunc_t)); 166 bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount, 167 (void **) drv->mForEachFunctions); 168 } else { 169 drv->mForEachFunctions = NULL; 170 } 171 172 // Copy info over to runtime 173 script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount(); 174 script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount(); 175 script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount(); 176 script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList(); 177 script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList(); 178 179 if (drv->mRootExpand) { 180 script->mHal.info.root = drv->mRootExpand; 181 } else { 182 script->mHal.info.root = drv->mRoot; 183 } 184 185 pthread_mutex_unlock(&rsdgInitMutex); 186 return true; 187 188error: 189 190 pthread_mutex_unlock(&rsdgInitMutex); 191 if (drv->ME) { 192 delete drv->ME; 193 drv->ME = NULL; 194 } 195 free(drv); 196 return false; 197 198} 199 200typedef struct { 201 Context *rsc; 202 Script *script; 203 ForEachFunc_t kernel; 204 uint32_t sig; 205 const Allocation * ain; 206 Allocation * aout; 207 const void * usr; 208 size_t usrLen; 209 210 uint32_t mSliceSize; 211 volatile int mSliceNum; 212 213 const uint8_t *ptrIn; 214 uint32_t eStrideIn; 215 uint8_t *ptrOut; 216 uint32_t eStrideOut; 217 218 uint32_t xStart; 219 uint32_t xEnd; 220 uint32_t yStart; 221 uint32_t yEnd; 222 uint32_t zStart; 223 uint32_t zEnd; 224 uint32_t arrayStart; 225 uint32_t arrayEnd; 226 227 uint32_t dimX; 228 uint32_t dimY; 229 uint32_t dimZ; 230 uint32_t dimArray; 231} MTLaunchStruct; 232typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 233 234static void wc_xy(void *usr, uint32_t idx) { 235 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 236 RsForEachStubParamStruct p; 237 memset(&p, 0, sizeof(p)); 238 p.usr = mtls->usr; 239 p.usr_len = mtls->usrLen; 240 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 241 uint32_t sig = mtls->sig; 242 243 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 244 while (1) { 245 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 246 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 247 uint32_t yEnd = yStart + mtls->mSliceSize; 248 yEnd = rsMin(yEnd, mtls->yEnd); 249 if (yEnd <= yStart) { 250 return; 251 } 252 253 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 254 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 255 for (p.y = yStart; p.y < yEnd; p.y++) { 256 uint32_t offset = mtls->dimX * p.y; 257 p.out = mtls->ptrOut + (mtls->eStrideOut * offset); 258 p.in = mtls->ptrIn + (mtls->eStrideIn * offset); 259 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 260 } 261 } 262} 263 264static void wc_x(void *usr, uint32_t idx) { 265 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 266 RsForEachStubParamStruct p; 267 memset(&p, 0, sizeof(p)); 268 p.usr = mtls->usr; 269 p.usr_len = mtls->usrLen; 270 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 271 uint32_t sig = mtls->sig; 272 273 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 274 while (1) { 275 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 276 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 277 uint32_t xEnd = xStart + mtls->mSliceSize; 278 xEnd = rsMin(xEnd, mtls->xEnd); 279 if (xEnd <= xStart) { 280 return; 281 } 282 283 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 284 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 285 286 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 287 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 288 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 289 } 290} 291 292void rsdScriptInvokeForEach(const Context *rsc, 293 Script *s, 294 uint32_t slot, 295 const Allocation * ain, 296 Allocation * aout, 297 const void * usr, 298 uint32_t usrLen, 299 const RsScriptCall *sc) { 300 301 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 302 303 MTLaunchStruct mtls; 304 memset(&mtls, 0, sizeof(mtls)); 305 306 DrvScript *drv = (DrvScript *)s->mHal.drv; 307 mtls.kernel = drv->mForEachFunctions[slot]; 308 rsAssert(mtls.kernel != NULL); 309 mtls.sig = 0x1f; // temp fix for old apps, full table in slang_rs_export_foreach.cpp 310 if (drv->mExportForEachSignatureList) { 311 mtls.sig = drv->mExportForEachSignatureList[slot]; 312 } 313 if (ain) { 314 mtls.dimX = ain->getType()->getDimX(); 315 mtls.dimY = ain->getType()->getDimY(); 316 mtls.dimZ = ain->getType()->getDimZ(); 317 //mtls.dimArray = ain->getType()->getDimArray(); 318 } else if (aout) { 319 mtls.dimX = aout->getType()->getDimX(); 320 mtls.dimY = aout->getType()->getDimY(); 321 mtls.dimZ = aout->getType()->getDimZ(); 322 //mtls.dimArray = aout->getType()->getDimArray(); 323 } else { 324 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 325 return; 326 } 327 328 if (!sc || (sc->xEnd == 0)) { 329 mtls.xEnd = mtls.dimX; 330 } else { 331 rsAssert(sc->xStart < mtls.dimX); 332 rsAssert(sc->xEnd <= mtls.dimX); 333 rsAssert(sc->xStart < sc->xEnd); 334 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 335 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 336 if (mtls.xStart >= mtls.xEnd) return; 337 } 338 339 if (!sc || (sc->yEnd == 0)) { 340 mtls.yEnd = mtls.dimY; 341 } else { 342 rsAssert(sc->yStart < mtls.dimY); 343 rsAssert(sc->yEnd <= mtls.dimY); 344 rsAssert(sc->yStart < sc->yEnd); 345 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 346 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 347 if (mtls.yStart >= mtls.yEnd) return; 348 } 349 350 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 351 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 352 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 353 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 354 355 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 356 357 Context *mrsc = (Context *)rsc; 358 Script * oldTLS = setTLS(s); 359 360 mtls.rsc = mrsc; 361 mtls.ain = ain; 362 mtls.aout = aout; 363 mtls.script = s; 364 mtls.usr = usr; 365 mtls.usrLen = usrLen; 366 mtls.mSliceSize = 10; 367 mtls.mSliceNum = 0; 368 369 mtls.ptrIn = NULL; 370 mtls.eStrideIn = 0; 371 if (ain) { 372 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 373 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 374 } 375 376 mtls.ptrOut = NULL; 377 mtls.eStrideOut = 0; 378 if (aout) { 379 mtls.ptrOut = (uint8_t *)aout->getPtr(); 380 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 381 } 382 383 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 384 if (mtls.dimY > 1) { 385 rsdLaunchThreads(mrsc, wc_xy, &mtls); 386 } else { 387 rsdLaunchThreads(mrsc, wc_x, &mtls); 388 } 389 390 //ALOGE("launch 1"); 391 } else { 392 RsForEachStubParamStruct p; 393 memset(&p, 0, sizeof(p)); 394 p.usr = mtls.usr; 395 p.usr_len = mtls.usrLen; 396 uint32_t sig = mtls.sig; 397 398 //ALOGE("launch 3"); 399 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 400 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 401 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 402 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 403 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 404 mtls.dimX * mtls.dimY * p.z + 405 mtls.dimX * p.y; 406 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 407 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 408 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 409 mtls.eStrideOut); 410 } 411 } 412 } 413 } 414 415 setTLS(oldTLS); 416} 417 418 419int rsdScriptInvokeRoot(const Context *dc, Script *script) { 420 DrvScript *drv = (DrvScript *)script->mHal.drv; 421 422 Script * oldTLS = setTLS(script); 423 int ret = drv->mRoot(); 424 setTLS(oldTLS); 425 426 return ret; 427} 428 429void rsdScriptInvokeInit(const Context *dc, Script *script) { 430 DrvScript *drv = (DrvScript *)script->mHal.drv; 431 432 if (drv->mInit) { 433 drv->mInit(); 434 } 435} 436 437void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 438 DrvScript *drv = (DrvScript *)script->mHal.drv; 439 440 if (drv->mFreeChildren) { 441 drv->mFreeChildren(); 442 } 443} 444 445void rsdScriptInvokeFunction(const Context *dc, Script *script, 446 uint32_t slot, 447 const void *params, 448 size_t paramLength) { 449 DrvScript *drv = (DrvScript *)script->mHal.drv; 450 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 451 452 Script * oldTLS = setTLS(script); 453 ((void (*)(const void *, uint32_t)) 454 drv->mInvokeFunctions[slot])(params, paramLength); 455 setTLS(oldTLS); 456} 457 458void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 459 uint32_t slot, void *data, size_t dataLength) { 460 DrvScript *drv = (DrvScript *)script->mHal.drv; 461 //rsAssert(!script->mFieldIsObject[slot]); 462 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 463 464 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 465 if (!destPtr) { 466 //ALOGV("Calling setVar on slot = %i which is null", slot); 467 return; 468 } 469 470 memcpy(destPtr, data, dataLength); 471} 472 473void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 474 DrvScript *drv = (DrvScript *)script->mHal.drv; 475 //rsAssert(!script->mFieldIsObject[slot]); 476 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 477 478 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 479 if (!destPtr) { 480 //ALOGV("Calling setVar on slot = %i which is null", slot); 481 return; 482 } 483 484 memcpy(destPtr, &data, sizeof(void *)); 485} 486 487void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 488 DrvScript *drv = (DrvScript *)script->mHal.drv; 489 //rsAssert(script->mFieldIsObject[slot]); 490 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 491 492 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 493 if (!destPtr) { 494 //ALOGV("Calling setVar on slot = %i which is null", slot); 495 return; 496 } 497 498 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 499} 500 501void rsdScriptDestroy(const Context *dc, Script *script) { 502 DrvScript *drv = (DrvScript *)script->mHal.drv; 503 504 if (drv->mFieldAddress) { 505 size_t exportVarCount = drv->ME->getExportVarCount(); 506 for (size_t ct = 0; ct < exportVarCount; ct++) { 507 if (drv->mFieldIsObject[ct]) { 508 // The field address can be NULL if the script-side has 509 // optimized the corresponding global variable away. 510 if (drv->mFieldAddress[ct]) { 511 rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]); 512 } 513 } 514 } 515 free(drv->mFieldAddress); 516 drv->mFieldAddress = NULL; 517 free(drv->mFieldIsObject); 518 drv->mFieldIsObject = NULL; } 519 520 if (drv->mInvokeFunctions) { 521 free(drv->mInvokeFunctions); 522 drv->mInvokeFunctions = NULL; 523 } 524 525 if (drv->mForEachFunctions) { 526 free(drv->mForEachFunctions); 527 drv->mForEachFunctions = NULL; 528 } 529 530 delete drv->ME; 531 drv->ME = NULL; 532 533 free(drv); 534 script->mHal.drv = NULL; 535 536} 537 538 539