rsdBcc.cpp revision 3815badf95a7dca8aa278e3e12f07a3924a82319
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include "rsdCore.h" 19#include "rsdBcc.h" 20#include "rsdRuntime.h" 21 22#include <bcinfo/MetadataExtractor.h> 23 24#include "rsContext.h" 25#include "rsScriptC.h" 26 27#include "utils/Timers.h" 28#include "utils/StopWatch.h" 29extern "C" { 30#include "libdex/ZipArchive.h" 31} 32 33 34using namespace android; 35using namespace android::renderscript; 36 37struct DrvScript { 38 int (*mRoot)(); 39 void (*mInit)(); 40 41 BCCScriptRef mBccScript; 42 43 bcinfo::MetadataExtractor *ME; 44 45 InvokeFunc_t *mInvokeFunctions; 46 void ** mFieldAddress; 47 bool * mFieldIsObject; 48 const uint32_t *mExportForEachSignatureList; 49 50 const uint8_t * mScriptText; 51 uint32_t mScriptTextLength; 52}; 53 54 55static Script * setTLS(Script *sc) { 56 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 57 rsAssert(tls); 58 Script *old = tls->mScript; 59 tls->mScript = sc; 60 return old; 61} 62 63 64bool rsdScriptInit(const Context *rsc, 65 ScriptC *script, 66 char const *resName, 67 char const *cacheDir, 68 uint8_t const *bitcode, 69 size_t bitcodeSize, 70 uint32_t flags) { 71 //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 72 73 pthread_mutex_lock(&rsdgInitMutex); 74 char *cachePath = NULL; 75 size_t exportFuncCount = 0; 76 size_t exportVarCount = 0; 77 size_t objectSlotCount = 0; 78 size_t exportForEachSignatureCount = 0; 79 80 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 81 if (drv == NULL) { 82 goto error; 83 } 84 script->mHal.drv = drv; 85 86 drv->mBccScript = bccCreateScript(); 87 script->mHal.info.isThreadable = true; 88 drv->mScriptText = bitcode; 89 drv->mScriptTextLength = bitcodeSize; 90 91 92 drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText, 93 drv->mScriptTextLength); 94 if (!drv->ME->extract()) { 95 LOGE("bcinfo: failed to read script metadata"); 96 goto error; 97 } 98 99 //LOGE("mBccScript %p", script->mBccScript); 100 101 if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) { 102 LOGE("bcc: FAILS to register symbol callback"); 103 goto error; 104 } 105 106 if (bccReadBC(drv->mBccScript, 107 resName, 108 (char const *)drv->mScriptText, 109 drv->mScriptTextLength, 0) != 0) { 110 LOGE("bcc: FAILS to read bitcode"); 111 goto error; 112 } 113 114 if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) { 115 LOGE("bcc: FAILS to link bitcode"); 116 goto error; 117 } 118 119 if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) { 120 LOGE("bcc: FAILS to prepare executable"); 121 goto error; 122 } 123 124 free(cachePath); 125 126 drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root")); 127 drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init")); 128 129 exportFuncCount = drv->ME->getExportFuncCount(); 130 if (exportFuncCount > 0) { 131 drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount, 132 sizeof(InvokeFunc_t)); 133 bccGetExportFuncList(drv->mBccScript, exportFuncCount, 134 (void **) drv->mInvokeFunctions); 135 } else { 136 drv->mInvokeFunctions = NULL; 137 } 138 139 exportVarCount = drv->ME->getExportVarCount(); 140 if (exportVarCount > 0) { 141 drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*)); 142 drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool)); 143 bccGetExportVarList(drv->mBccScript, exportVarCount, 144 (void **) drv->mFieldAddress); 145 } else { 146 drv->mFieldAddress = NULL; 147 drv->mFieldIsObject = NULL; 148 } 149 150 objectSlotCount = drv->ME->getObjectSlotCount(); 151 if (objectSlotCount > 0) { 152 const uint32_t *objectSlotList = drv->ME->getObjectSlotList(); 153 for (uint32_t ct=0; ct < objectSlotCount; ct++) { 154 drv->mFieldIsObject[objectSlotList[ct]] = true; 155 } 156 } 157 158 exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount(); 159 rsAssert(exportForEachSignatureCount <= 1); 160 drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList(); 161 162 // Copy info over to runtime 163 script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount(); 164 script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount(); 165 script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount(); 166 script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList(); 167 script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList(); 168 script->mHal.info.root = drv->mRoot; 169 170 pthread_mutex_unlock(&rsdgInitMutex); 171 return true; 172 173error: 174 175 pthread_mutex_unlock(&rsdgInitMutex); 176 if (drv->ME) { 177 delete drv->ME; 178 drv->ME = NULL; 179 } 180 free(drv); 181 return false; 182 183} 184 185typedef struct { 186 Context *rsc; 187 Script *script; 188 uint32_t sig; 189 const Allocation * ain; 190 Allocation * aout; 191 const void * usr; 192 size_t usrLen; 193 194 uint32_t mSliceSize; 195 volatile int mSliceNum; 196 197 const uint8_t *ptrIn; 198 uint32_t eStrideIn; 199 uint8_t *ptrOut; 200 uint32_t eStrideOut; 201 202 uint32_t xStart; 203 uint32_t xEnd; 204 uint32_t yStart; 205 uint32_t yEnd; 206 uint32_t zStart; 207 uint32_t zEnd; 208 uint32_t arrayStart; 209 uint32_t arrayEnd; 210 211 uint32_t dimX; 212 uint32_t dimY; 213 uint32_t dimZ; 214 uint32_t dimArray; 215} MTLaunchStruct; 216typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 217 218static void wc_xy(void *usr, uint32_t idx) { 219 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 220 RsForEachStubParamStruct p; 221 memset(&p, 0, sizeof(p)); 222 p.usr = mtls->usr; 223 p.usr_len = mtls->usrLen; 224 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 225 uint32_t sig = mtls->sig; 226 227 while (1) { 228 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 229 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 230 uint32_t yEnd = yStart + mtls->mSliceSize; 231 yEnd = rsMin(yEnd, mtls->yEnd); 232 if (yEnd <= yStart) { 233 return; 234 } 235 236 //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 237 //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 238 for (p.y = yStart; p.y < yEnd; p.y++) { 239 uint32_t offset = mtls->dimX * p.y; 240 uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset); 241 const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset); 242 243 for (p.x = mtls->xStart; p.x < mtls->xEnd; p.x++) { 244 p.in = xPtrIn; 245 p.out = xPtrOut; 246 dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p); 247 xPtrIn += mtls->eStrideIn; 248 xPtrOut += mtls->eStrideOut; 249 } 250 } 251 } 252} 253 254static void wc_x(void *usr, uint32_t idx) { 255 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 256 RsForEachStubParamStruct p; 257 memset(&p, 0, sizeof(p)); 258 p.usr = mtls->usr; 259 p.usr_len = mtls->usrLen; 260 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 261 uint32_t sig = mtls->sig; 262 263 while (1) { 264 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 265 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 266 uint32_t xEnd = xStart + mtls->mSliceSize; 267 xEnd = rsMin(xEnd, mtls->xEnd); 268 if (xEnd <= xStart) { 269 return; 270 } 271 272 //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 273 //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 274 uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart); 275 const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart); 276 for (p.x = xStart; p.x < xEnd; p.x++) { 277 p.in = xPtrIn; 278 p.out = xPtrOut; 279 dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p); 280 xPtrIn += mtls->eStrideIn; 281 xPtrOut += mtls->eStrideOut; 282 } 283 } 284} 285 286void rsdScriptInvokeForEach(const Context *rsc, 287 Script *s, 288 uint32_t slot, 289 const Allocation * ain, 290 Allocation * aout, 291 const void * usr, 292 uint32_t usrLen, 293 const RsScriptCall *sc) { 294 295 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 296 297 MTLaunchStruct mtls; 298 memset(&mtls, 0, sizeof(mtls)); 299 300 DrvScript *drv = (DrvScript *)s->mHal.drv; 301 // We only support slot 0 (root) at this point in time. 302 rsAssert(slot == 0); 303 mtls.sig = drv->mExportForEachSignatureList[slot]; 304 if (ain) { 305 mtls.dimX = ain->getType()->getDimX(); 306 mtls.dimY = ain->getType()->getDimY(); 307 mtls.dimZ = ain->getType()->getDimZ(); 308 //mtls.dimArray = ain->getType()->getDimArray(); 309 } else if (aout) { 310 mtls.dimX = aout->getType()->getDimX(); 311 mtls.dimY = aout->getType()->getDimY(); 312 mtls.dimZ = aout->getType()->getDimZ(); 313 //mtls.dimArray = aout->getType()->getDimArray(); 314 } else { 315 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 316 return; 317 } 318 319 if (!sc || (sc->xEnd == 0)) { 320 mtls.xEnd = mtls.dimX; 321 } else { 322 rsAssert(sc->xStart < mtls.dimX); 323 rsAssert(sc->xEnd <= mtls.dimX); 324 rsAssert(sc->xStart < sc->xEnd); 325 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 326 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 327 if (mtls.xStart >= mtls.xEnd) return; 328 } 329 330 if (!sc || (sc->yEnd == 0)) { 331 mtls.yEnd = mtls.dimY; 332 } else { 333 rsAssert(sc->yStart < mtls.dimY); 334 rsAssert(sc->yEnd <= mtls.dimY); 335 rsAssert(sc->yStart < sc->yEnd); 336 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 337 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 338 if (mtls.yStart >= mtls.yEnd) return; 339 } 340 341 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 342 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 343 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 344 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 345 346 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 347 348 Context *mrsc = (Context *)rsc; 349 Script * oldTLS = setTLS(s); 350 351 mtls.rsc = mrsc; 352 mtls.ain = ain; 353 mtls.aout = aout; 354 mtls.script = s; 355 mtls.usr = usr; 356 mtls.usrLen = usrLen; 357 mtls.mSliceSize = 10; 358 mtls.mSliceNum = 0; 359 360 mtls.ptrIn = NULL; 361 mtls.eStrideIn = 0; 362 if (ain) { 363 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 364 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 365 } 366 367 mtls.ptrOut = NULL; 368 mtls.eStrideOut = 0; 369 if (aout) { 370 mtls.ptrOut = (uint8_t *)aout->getPtr(); 371 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 372 } 373 374 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 375 if (mtls.dimY > 1) { 376 rsdLaunchThreads(mrsc, wc_xy, &mtls); 377 } else { 378 rsdLaunchThreads(mrsc, wc_x, &mtls); 379 } 380 381 //LOGE("launch 1"); 382 } else { 383 RsForEachStubParamStruct p; 384 memset(&p, 0, sizeof(p)); 385 p.usr = mtls.usr; 386 p.usr_len = mtls.usrLen; 387 uint32_t sig = mtls.sig; 388 389 //LOGE("launch 3"); 390 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 391 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 392 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 393 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 394 mtls.dimX * mtls.dimY * p.z + 395 mtls.dimX * p.y; 396 uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset); 397 const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset); 398 399 for (p.x = mtls.xStart; p.x < mtls.xEnd; p.x++) { 400 p.in = xPtrIn; 401 p.out = xPtrOut; 402 dc->mForEachLaunch[sig](&s->mHal.info.root, &p); 403 xPtrIn += mtls.eStrideIn; 404 xPtrOut += mtls.eStrideOut; 405 } 406 } 407 } 408 } 409 } 410 411 setTLS(oldTLS); 412} 413 414 415int rsdScriptInvokeRoot(const Context *dc, Script *script) { 416 DrvScript *drv = (DrvScript *)script->mHal.drv; 417 418 Script * oldTLS = setTLS(script); 419 int ret = drv->mRoot(); 420 setTLS(oldTLS); 421 422 return ret; 423} 424 425void rsdScriptInvokeInit(const Context *dc, Script *script) { 426 DrvScript *drv = (DrvScript *)script->mHal.drv; 427 428 if (drv->mInit) { 429 drv->mInit(); 430 } 431} 432 433 434void rsdScriptInvokeFunction(const Context *dc, Script *script, 435 uint32_t slot, 436 const void *params, 437 size_t paramLength) { 438 DrvScript *drv = (DrvScript *)script->mHal.drv; 439 //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 440 441 Script * oldTLS = setTLS(script); 442 ((void (*)(const void *, uint32_t)) 443 drv->mInvokeFunctions[slot])(params, paramLength); 444 setTLS(oldTLS); 445} 446 447void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 448 uint32_t slot, void *data, size_t dataLength) { 449 DrvScript *drv = (DrvScript *)script->mHal.drv; 450 //rsAssert(!script->mFieldIsObject[slot]); 451 //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 452 453 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 454 if (!destPtr) { 455 //LOGV("Calling setVar on slot = %i which is null", slot); 456 return; 457 } 458 459 memcpy(destPtr, data, dataLength); 460} 461 462void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 463 DrvScript *drv = (DrvScript *)script->mHal.drv; 464 //rsAssert(!script->mFieldIsObject[slot]); 465 //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 466 467 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 468 if (!destPtr) { 469 //LOGV("Calling setVar on slot = %i which is null", slot); 470 return; 471 } 472 473 memcpy(destPtr, &data, sizeof(void *)); 474} 475 476void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 477 DrvScript *drv = (DrvScript *)script->mHal.drv; 478 //rsAssert(script->mFieldIsObject[slot]); 479 //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 480 481 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 482 if (!destPtr) { 483 //LOGV("Calling setVar on slot = %i which is null", slot); 484 return; 485 } 486 487 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 488} 489 490void rsdScriptDestroy(const Context *dc, Script *script) { 491 DrvScript *drv = (DrvScript *)script->mHal.drv; 492 493 if (drv->mFieldAddress) { 494 size_t exportVarCount = drv->ME->getExportVarCount(); 495 for (size_t ct = 0; ct < exportVarCount; ct++) { 496 if (drv->mFieldIsObject[ct]) { 497 // The field address can be NULL if the script-side has 498 // optimized the corresponding global variable away. 499 if (drv->mFieldAddress[ct]) { 500 rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]); 501 } 502 } 503 } 504 free(drv->mFieldAddress); 505 drv->mFieldAddress = NULL; 506 free(drv->mFieldIsObject); 507 drv->mFieldIsObject = NULL; } 508 509 if (drv->mInvokeFunctions) { 510 free(drv->mInvokeFunctions); 511 drv->mInvokeFunctions = NULL; 512 } 513 514 delete drv->ME; 515 drv->ME = NULL; 516 517 free(drv); 518 script->mHal.drv = NULL; 519 520} 521 522 523