rsdBcc.cpp revision 4419977d78018a9933c7f455fe001f644f2d638b
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcinfo/MetadataExtractor.h> 22 23#include "rsContext.h" 24#include "rsScriptC.h" 25 26#include "utils/Timers.h" 27#include "utils/StopWatch.h" 28extern "C" { 29#include "libdex/ZipArchive.h" 30} 31 32using namespace android; 33using namespace android::renderscript; 34 35struct DrvScript { 36 int (*mRoot)(); 37 int (*mRootExpand)(); 38 void (*mInit)(); 39 void (*mFreeChildren)(); 40 41 BCCScriptRef mBccScript; 42 43 bcinfo::MetadataExtractor *ME; 44 45 InvokeFunc_t *mInvokeFunctions; 46 ForEachFunc_t *mForEachFunctions; 47 void ** mFieldAddress; 48 bool * mFieldIsObject; 49 const uint32_t *mExportForEachSignatureList; 50 51 const uint8_t * mScriptText; 52 uint32_t mScriptTextLength; 53}; 54 55typedef void (*outer_foreach_t)( 56 const android::renderscript::RsForEachStubParamStruct *, 57 uint32_t x1, uint32_t x2, 58 uint32_t instep, uint32_t outstep); 59 60static Script * setTLS(Script *sc) { 61 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 62 rsAssert(tls); 63 Script *old = tls->mScript; 64 tls->mScript = sc; 65 return old; 66} 67 68 69bool rsdScriptInit(const Context *rsc, 70 ScriptC *script, 71 char const *resName, 72 char const *cacheDir, 73 uint8_t const *bitcode, 74 size_t bitcodeSize, 75 uint32_t flags) { 76 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 77 78 pthread_mutex_lock(&rsdgInitMutex); 79 80 size_t exportFuncCount = 0; 81 size_t exportVarCount = 0; 82 size_t objectSlotCount = 0; 83 size_t exportForEachSignatureCount = 0; 84 85 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 86 if (drv == NULL) { 87 goto error; 88 } 89 script->mHal.drv = drv; 90 91 drv->mBccScript = bccCreateScript(); 92 script->mHal.info.isThreadable = true; 93 drv->mScriptText = bitcode; 94 drv->mScriptTextLength = bitcodeSize; 95 96 97 drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText, 98 drv->mScriptTextLength); 99 if (!drv->ME->extract()) { 100 ALOGE("bcinfo: failed to read script metadata"); 101 goto error; 102 } 103 104 //ALOGE("mBccScript %p", script->mBccScript); 105 106 if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) { 107 ALOGE("bcc: FAILS to register symbol callback"); 108 goto error; 109 } 110 111 if (bccReadBC(drv->mBccScript, 112 resName, 113 (char const *)drv->mScriptText, 114 drv->mScriptTextLength, 0) != 0) { 115 ALOGE("bcc: FAILS to read bitcode"); 116 goto error; 117 } 118 119 if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) { 120 ALOGE("bcc: FAILS to link bitcode"); 121 goto error; 122 } 123 124 if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) { 125 ALOGE("bcc: FAILS to prepare executable"); 126 goto error; 127 } 128 129 drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root")); 130 drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand")); 131 drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init")); 132 drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor")); 133 134 exportFuncCount = drv->ME->getExportFuncCount(); 135 if (exportFuncCount > 0) { 136 drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount, 137 sizeof(InvokeFunc_t)); 138 bccGetExportFuncList(drv->mBccScript, exportFuncCount, 139 (void **) drv->mInvokeFunctions); 140 } else { 141 drv->mInvokeFunctions = NULL; 142 } 143 144 exportVarCount = drv->ME->getExportVarCount(); 145 if (exportVarCount > 0) { 146 drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*)); 147 drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool)); 148 bccGetExportVarList(drv->mBccScript, exportVarCount, 149 (void **) drv->mFieldAddress); 150 } else { 151 drv->mFieldAddress = NULL; 152 drv->mFieldIsObject = NULL; 153 } 154 155 objectSlotCount = drv->ME->getObjectSlotCount(); 156 if (objectSlotCount > 0) { 157 const uint32_t *objectSlotList = drv->ME->getObjectSlotList(); 158 for (uint32_t ct=0; ct < objectSlotCount; ct++) { 159 drv->mFieldIsObject[objectSlotList[ct]] = true; 160 } 161 } 162 163 exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount(); 164 drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList(); 165 if (exportForEachSignatureCount > 0) { 166 drv->mForEachFunctions = 167 (ForEachFunc_t*) calloc(exportForEachSignatureCount, 168 sizeof(ForEachFunc_t)); 169 bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount, 170 (void **) drv->mForEachFunctions); 171 } else { 172 drv->mForEachFunctions = NULL; 173 } 174 175 // Copy info over to runtime 176 script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount(); 177 script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount(); 178 script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount(); 179 script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList(); 180 script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList(); 181 182 if (drv->mRootExpand) { 183 script->mHal.info.root = drv->mRootExpand; 184 } else { 185 script->mHal.info.root = drv->mRoot; 186 } 187 188 pthread_mutex_unlock(&rsdgInitMutex); 189 return true; 190 191error: 192 193 pthread_mutex_unlock(&rsdgInitMutex); 194 if (drv->ME) { 195 delete drv->ME; 196 drv->ME = NULL; 197 } 198 free(drv); 199 return false; 200 201} 202 203typedef struct { 204 Context *rsc; 205 Script *script; 206 ForEachFunc_t kernel; 207 uint32_t sig; 208 const Allocation * ain; 209 Allocation * aout; 210 const void * usr; 211 size_t usrLen; 212 213 uint32_t mSliceSize; 214 volatile int mSliceNum; 215 216 const uint8_t *ptrIn; 217 uint32_t eStrideIn; 218 uint8_t *ptrOut; 219 uint32_t eStrideOut; 220 221 uint32_t xStart; 222 uint32_t xEnd; 223 uint32_t yStart; 224 uint32_t yEnd; 225 uint32_t zStart; 226 uint32_t zEnd; 227 uint32_t arrayStart; 228 uint32_t arrayEnd; 229 230 uint32_t dimX; 231 uint32_t dimY; 232 uint32_t dimZ; 233 uint32_t dimArray; 234} MTLaunchStruct; 235typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 236 237static void wc_xy(void *usr, uint32_t idx) { 238 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 239 RsForEachStubParamStruct p; 240 memset(&p, 0, sizeof(p)); 241 p.usr = mtls->usr; 242 p.usr_len = mtls->usrLen; 243 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 244 uint32_t sig = mtls->sig; 245 246 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 247 while (1) { 248 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 249 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 250 uint32_t yEnd = yStart + mtls->mSliceSize; 251 yEnd = rsMin(yEnd, mtls->yEnd); 252 if (yEnd <= yStart) { 253 return; 254 } 255 256 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 257 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 258 for (p.y = yStart; p.y < yEnd; p.y++) { 259 uint32_t offset = mtls->dimX * p.y; 260 p.out = mtls->ptrOut + (mtls->eStrideOut * offset); 261 p.in = mtls->ptrIn + (mtls->eStrideIn * offset); 262 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 263 } 264 } 265} 266 267static void wc_x(void *usr, uint32_t idx) { 268 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 269 RsForEachStubParamStruct p; 270 memset(&p, 0, sizeof(p)); 271 p.usr = mtls->usr; 272 p.usr_len = mtls->usrLen; 273 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 274 uint32_t sig = mtls->sig; 275 276 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 277 while (1) { 278 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 279 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 280 uint32_t xEnd = xStart + mtls->mSliceSize; 281 xEnd = rsMin(xEnd, mtls->xEnd); 282 if (xEnd <= xStart) { 283 return; 284 } 285 286 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 287 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 288 289 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 290 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 291 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 292 } 293} 294 295void rsdScriptInvokeForEach(const Context *rsc, 296 Script *s, 297 uint32_t slot, 298 const Allocation * ain, 299 Allocation * aout, 300 const void * usr, 301 uint32_t usrLen, 302 const RsScriptCall *sc) { 303 304 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 305 306 MTLaunchStruct mtls; 307 memset(&mtls, 0, sizeof(mtls)); 308 309 DrvScript *drv = (DrvScript *)s->mHal.drv; 310 mtls.kernel = drv->mForEachFunctions[slot]; 311 rsAssert(mtls.kernel != NULL); 312 mtls.sig = 0x1f; // temp fix for old apps, full table in slang_rs_export_foreach.cpp 313 if (drv->mExportForEachSignatureList) { 314 mtls.sig = drv->mExportForEachSignatureList[slot]; 315 } 316 if (ain) { 317 mtls.dimX = ain->getType()->getDimX(); 318 mtls.dimY = ain->getType()->getDimY(); 319 mtls.dimZ = ain->getType()->getDimZ(); 320 //mtls.dimArray = ain->getType()->getDimArray(); 321 } else if (aout) { 322 mtls.dimX = aout->getType()->getDimX(); 323 mtls.dimY = aout->getType()->getDimY(); 324 mtls.dimZ = aout->getType()->getDimZ(); 325 //mtls.dimArray = aout->getType()->getDimArray(); 326 } else { 327 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 328 return; 329 } 330 331 if (!sc || (sc->xEnd == 0)) { 332 mtls.xEnd = mtls.dimX; 333 } else { 334 rsAssert(sc->xStart < mtls.dimX); 335 rsAssert(sc->xEnd <= mtls.dimX); 336 rsAssert(sc->xStart < sc->xEnd); 337 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 338 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 339 if (mtls.xStart >= mtls.xEnd) return; 340 } 341 342 if (!sc || (sc->yEnd == 0)) { 343 mtls.yEnd = mtls.dimY; 344 } else { 345 rsAssert(sc->yStart < mtls.dimY); 346 rsAssert(sc->yEnd <= mtls.dimY); 347 rsAssert(sc->yStart < sc->yEnd); 348 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 349 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 350 if (mtls.yStart >= mtls.yEnd) return; 351 } 352 353 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 354 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 355 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 356 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 357 358 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 359 360 Context *mrsc = (Context *)rsc; 361 Script * oldTLS = setTLS(s); 362 363 mtls.rsc = mrsc; 364 mtls.ain = ain; 365 mtls.aout = aout; 366 mtls.script = s; 367 mtls.usr = usr; 368 mtls.usrLen = usrLen; 369 mtls.mSliceSize = 10; 370 mtls.mSliceNum = 0; 371 372 mtls.ptrIn = NULL; 373 mtls.eStrideIn = 0; 374 if (ain) { 375 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 376 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 377 } 378 379 mtls.ptrOut = NULL; 380 mtls.eStrideOut = 0; 381 if (aout) { 382 mtls.ptrOut = (uint8_t *)aout->getPtr(); 383 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 384 } 385 386 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 387 if (mtls.dimY > 1) { 388 rsdLaunchThreads(mrsc, wc_xy, &mtls); 389 } else { 390 rsdLaunchThreads(mrsc, wc_x, &mtls); 391 } 392 393 //ALOGE("launch 1"); 394 } else { 395 RsForEachStubParamStruct p; 396 memset(&p, 0, sizeof(p)); 397 p.usr = mtls.usr; 398 p.usr_len = mtls.usrLen; 399 uint32_t sig = mtls.sig; 400 401 //ALOGE("launch 3"); 402 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 403 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 404 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 405 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 406 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 407 mtls.dimX * mtls.dimY * p.z + 408 mtls.dimX * p.y; 409 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 410 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 411 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 412 mtls.eStrideOut); 413 } 414 } 415 } 416 } 417 418 setTLS(oldTLS); 419} 420 421 422int rsdScriptInvokeRoot(const Context *dc, Script *script) { 423 DrvScript *drv = (DrvScript *)script->mHal.drv; 424 425 Script * oldTLS = setTLS(script); 426 int ret = drv->mRoot(); 427 setTLS(oldTLS); 428 429 return ret; 430} 431 432void rsdScriptInvokeInit(const Context *dc, Script *script) { 433 DrvScript *drv = (DrvScript *)script->mHal.drv; 434 435 if (drv->mInit) { 436 drv->mInit(); 437 } 438} 439 440void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 441 DrvScript *drv = (DrvScript *)script->mHal.drv; 442 443 if (drv->mFreeChildren) { 444 drv->mFreeChildren(); 445 } 446} 447 448void rsdScriptInvokeFunction(const Context *dc, Script *script, 449 uint32_t slot, 450 const void *params, 451 size_t paramLength) { 452 DrvScript *drv = (DrvScript *)script->mHal.drv; 453 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 454 455 Script * oldTLS = setTLS(script); 456 ((void (*)(const void *, uint32_t)) 457 drv->mInvokeFunctions[slot])(params, paramLength); 458 setTLS(oldTLS); 459} 460 461void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 462 uint32_t slot, void *data, size_t dataLength) { 463 DrvScript *drv = (DrvScript *)script->mHal.drv; 464 //rsAssert(!script->mFieldIsObject[slot]); 465 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 466 467 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 468 if (!destPtr) { 469 //ALOGV("Calling setVar on slot = %i which is null", slot); 470 return; 471 } 472 473 memcpy(destPtr, data, dataLength); 474} 475 476void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 477 DrvScript *drv = (DrvScript *)script->mHal.drv; 478 //rsAssert(!script->mFieldIsObject[slot]); 479 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 480 481 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 482 if (!destPtr) { 483 //ALOGV("Calling setVar on slot = %i which is null", slot); 484 return; 485 } 486 487 memcpy(destPtr, &data, sizeof(void *)); 488} 489 490void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 491 DrvScript *drv = (DrvScript *)script->mHal.drv; 492 //rsAssert(script->mFieldIsObject[slot]); 493 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 494 495 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 496 if (!destPtr) { 497 //ALOGV("Calling setVar on slot = %i which is null", slot); 498 return; 499 } 500 501 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 502} 503 504void rsdScriptDestroy(const Context *dc, Script *script) { 505 DrvScript *drv = (DrvScript *)script->mHal.drv; 506 507 if (drv->mFieldAddress) { 508 size_t exportVarCount = drv->ME->getExportVarCount(); 509 for (size_t ct = 0; ct < exportVarCount; ct++) { 510 if (drv->mFieldIsObject[ct]) { 511 // The field address can be NULL if the script-side has 512 // optimized the corresponding global variable away. 513 if (drv->mFieldAddress[ct]) { 514 rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]); 515 } 516 } 517 } 518 free(drv->mFieldAddress); 519 drv->mFieldAddress = NULL; 520 free(drv->mFieldIsObject); 521 drv->mFieldIsObject = NULL; } 522 523 if (drv->mInvokeFunctions) { 524 free(drv->mInvokeFunctions); 525 drv->mInvokeFunctions = NULL; 526 } 527 528 if (drv->mForEachFunctions) { 529 free(drv->mForEachFunctions); 530 drv->mForEachFunctions = NULL; 531 } 532 533 delete drv->ME; 534 drv->ME = NULL; 535 536 free(drv); 537 script->mHal.drv = NULL; 538 539} 540 541 542