rsdBcc.cpp revision cdfdb8f2cdf4668c476cac842212892b2505ff3f
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include "rsdCore.h" 19#include "rsdBcc.h" 20 21#include "rsContext.h" 22#include "rsScriptC.h" 23 24#include "utils/Timers.h" 25#include "utils/StopWatch.h" 26extern "C" { 27#include "libdex/ZipArchive.h" 28} 29 30 31using namespace android; 32using namespace android::renderscript; 33 34struct DrvScript { 35 int (*mRoot)(); 36 void (*mInit)(); 37 38 BCCScriptRef mBccScript; 39 40 uint32_t mInvokeFunctionCount; 41 InvokeFunc_t *mInvokeFunctions; 42 uint32_t mFieldCount; 43 void ** mFieldAddress; 44 bool * mFieldIsObject; 45 46 const uint8_t * mScriptText; 47 uint32_t mScriptTextLength; 48 49 //uint32_t * mObjectSlots; 50 //uint32_t mObjectSlotCount; 51 52 uint32_t mPragmaCount; 53 const char ** mPragmaKeys; 54 const char ** mPragmaValues; 55 56}; 57 58static Script * setTLS(Script *sc) { 59 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); 60 rsAssert(tls); 61 Script *old = tls->mScript; 62 tls->mScript = sc; 63 return old; 64} 65 66 67// Input: cacheDir 68// Input: resName 69// Input: extName 70// 71// Note: cacheFile = resName + extName 72// 73// Output: Returns cachePath == cacheDir + cacheFile 74static char *genCacheFileName(const char *cacheDir, 75 const char *resName, 76 const char *extName) { 77 char cachePath[512]; 78 char cacheFile[sizeof(cachePath)]; 79 const size_t kBufLen = sizeof(cachePath) - 1; 80 81 cacheFile[0] = '\0'; 82 // Note: resName today is usually something like 83 // "/com.android.fountain:raw/fountain" 84 if (resName[0] != '/') { 85 // Get the absolute path of the raw/***.bc file. 86 87 // Generate the absolute path. This doesn't do everything it 88 // should, e.g. if resName is "./out/whatever" it doesn't crunch 89 // the leading "./" out because this if-block is not triggered, 90 // but it'll make do. 91 // 92 if (getcwd(cacheFile, kBufLen) == NULL) { 93 LOGE("Can't get CWD while opening raw/***.bc file\n"); 94 return NULL; 95 } 96 // Append "/" at the end of cacheFile so far. 97 strncat(cacheFile, "/", kBufLen); 98 } 99 100 // cacheFile = resName + extName 101 // 102 strncat(cacheFile, resName, kBufLen); 103 if (extName != NULL) { 104 // TODO(srhines): strncat() is a bit dangerous 105 strncat(cacheFile, extName, kBufLen); 106 } 107 108 // Turn the path into a flat filename by replacing 109 // any slashes after the first one with '@' characters. 110 char *cp = cacheFile + 1; 111 while (*cp != '\0') { 112 if (*cp == '/') { 113 *cp = '@'; 114 } 115 cp++; 116 } 117 118 // Tack on the file name for the actual cache file path. 119 strncpy(cachePath, cacheDir, kBufLen); 120 strncat(cachePath, cacheFile, kBufLen); 121 122 LOGV("Cache file for '%s' '%s' is '%s'\n", resName, extName, cachePath); 123 return strdup(cachePath); 124} 125 126bool rsdScriptInit(const Context *rsc, 127 ScriptC *script, 128 char const *resName, 129 char const *cacheDir, 130 uint8_t const *bitcode, 131 size_t bitcodeSize, 132 uint32_t flags, 133 RsHalSymbolLookupFunc lookupFunc) { 134 //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 135 136 char *cachePath = NULL; 137 uint32_t objectSlotCount = 0; 138 139 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 140 if (drv == NULL) { 141 return false; 142 } 143 script->mHal.drv = drv; 144 145 drv->mBccScript = bccCreateScript(); 146 script->mHal.info.isThreadable = true; 147 drv->mScriptText = bitcode; 148 drv->mScriptTextLength = bitcodeSize; 149 150 //LOGE("mBccScript %p", script->mBccScript); 151 152 if (bccRegisterSymbolCallback(drv->mBccScript, lookupFunc, script) != 0) { 153 LOGE("bcc: FAILS to register symbol callback"); 154 goto error; 155 } 156 157 if (bccReadBC(drv->mBccScript, 158 resName, 159 (char const *)drv->mScriptText, 160 drv->mScriptTextLength, 0) != 0) { 161 LOGE("bcc: FAILS to read bitcode"); 162 return NULL; 163 } 164 165#if 1 166 if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) { 167 LOGE("bcc: FAILS to link bitcode"); 168 return NULL; 169 } 170#endif 171 cachePath = genCacheFileName(cacheDir, resName, ".oBCC"); 172 173 if (bccPrepareExecutable(drv->mBccScript, cachePath, 0) != 0) { 174 LOGE("bcc: FAILS to prepare executable"); 175 return NULL; 176 } 177 178 free(cachePath); 179 180 drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root")); 181 drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init")); 182 183 drv->mInvokeFunctionCount = bccGetExportFuncCount(drv->mBccScript); 184 if (drv->mInvokeFunctionCount <= 0) 185 drv->mInvokeFunctions = NULL; 186 else { 187 drv->mInvokeFunctions = (InvokeFunc_t*) calloc(drv->mInvokeFunctionCount, sizeof(InvokeFunc_t)); 188 bccGetExportFuncList(drv->mBccScript, drv->mInvokeFunctionCount, (void **) drv->mInvokeFunctions); 189 } 190 191 drv->mFieldCount = bccGetExportVarCount(drv->mBccScript); 192 if (drv->mFieldCount <= 0) { 193 drv->mFieldAddress = NULL; 194 drv->mFieldIsObject = NULL; 195 } else { 196 drv->mFieldAddress = (void **) calloc(drv->mFieldCount, sizeof(void *)); 197 drv->mFieldIsObject = (bool *) calloc(drv->mFieldCount, sizeof(bool)); 198 bccGetExportVarList(drv->mBccScript, drv->mFieldCount, (void **) drv->mFieldAddress); 199 } 200 201 objectSlotCount = bccGetObjectSlotCount(drv->mBccScript); 202 if (objectSlotCount) { 203 uint32_t * slots = new uint32_t[objectSlotCount]; 204 bccGetObjectSlotList(drv->mBccScript, objectSlotCount, slots); 205 for (uint32_t ct=0; ct < objectSlotCount; ct++) { 206 drv->mFieldIsObject[slots[ct]] = true; 207 } 208 delete [] slots; 209 } 210 211 uint32_t mPragmaCount; 212 const char ** mPragmaKeys; 213 const char ** mPragmaValues; 214 215 const static int pragmaMax = 16; 216 drv->mPragmaCount = bccGetPragmaCount(drv->mBccScript); 217 if (drv->mPragmaCount <= 0) { 218 drv->mPragmaKeys = NULL; 219 drv->mPragmaValues = NULL; 220 } else { 221 drv->mPragmaKeys = (const char **) calloc(drv->mFieldCount, sizeof(const char *)); 222 drv->mPragmaValues = (const char **) calloc(drv->mFieldCount, sizeof(const char *)); 223 bccGetPragmaList(drv->mBccScript, drv->mPragmaCount, drv->mPragmaKeys, drv->mPragmaValues); 224 } 225 226 227 228 // Copy info over to runtime 229 script->mHal.info.exportedFunctionCount = drv->mInvokeFunctionCount; 230 script->mHal.info.exportedVariableCount = drv->mFieldCount; 231 script->mHal.info.exportedPragmaCount = drv->mPragmaCount; 232 script->mHal.info.exportedPragmaKeyList = drv->mPragmaKeys; 233 script->mHal.info.exportedPragmaValueList = drv->mPragmaValues; 234 script->mHal.info.root = drv->mRoot; 235 236 237 return true; 238 239error: 240 241 free(drv); 242 return false; 243 244} 245 246typedef struct { 247 Context *rsc; 248 Script *script; 249 const Allocation * ain; 250 Allocation * aout; 251 const void * usr; 252 253 uint32_t mSliceSize; 254 volatile int mSliceNum; 255 256 const uint8_t *ptrIn; 257 uint32_t eStrideIn; 258 uint8_t *ptrOut; 259 uint32_t eStrideOut; 260 261 uint32_t xStart; 262 uint32_t xEnd; 263 uint32_t yStart; 264 uint32_t yEnd; 265 uint32_t zStart; 266 uint32_t zEnd; 267 uint32_t arrayStart; 268 uint32_t arrayEnd; 269 270 uint32_t dimX; 271 uint32_t dimY; 272 uint32_t dimZ; 273 uint32_t dimArray; 274} MTLaunchStruct; 275typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 276 277static void wc_xy(void *usr, uint32_t idx) { 278 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 279 280 while (1) { 281 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 282 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 283 uint32_t yEnd = yStart + mtls->mSliceSize; 284 yEnd = rsMin(yEnd, mtls->yEnd); 285 if (yEnd <= yStart) { 286 return; 287 } 288 289 //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 290 //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 291 for (uint32_t y = yStart; y < yEnd; y++) { 292 uint32_t offset = mtls->dimX * y; 293 uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset); 294 const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset); 295 296 for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) { 297 ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0); 298 xPtrIn += mtls->eStrideIn; 299 xPtrOut += mtls->eStrideOut; 300 } 301 } 302 } 303} 304 305static void wc_x(void *usr, uint32_t idx) { 306 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 307 308 while (1) { 309 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 310 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 311 uint32_t xEnd = xStart + mtls->mSliceSize; 312 xEnd = rsMin(xEnd, mtls->xEnd); 313 if (xEnd <= xStart) { 314 return; 315 } 316 317 //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 318 //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 319 uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart); 320 const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart); 321 for (uint32_t x = xStart; x < xEnd; x++) { 322 ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0); 323 xPtrIn += mtls->eStrideIn; 324 xPtrOut += mtls->eStrideOut; 325 } 326 } 327} 328 329void rsdScriptInvokeForEach(const Context *rsc, 330 Script *s, 331 const Allocation * ain, 332 Allocation * aout, 333 const void * usr, 334 uint32_t usrLen, 335 const RsScriptCall *sc) { 336 337 RsHal * dc = (RsHal *)rsc->mHal.drv; 338 339 MTLaunchStruct mtls; 340 memset(&mtls, 0, sizeof(mtls)); 341 342 if (ain) { 343 mtls.dimX = ain->getType()->getDimX(); 344 mtls.dimY = ain->getType()->getDimY(); 345 mtls.dimZ = ain->getType()->getDimZ(); 346 //mtls.dimArray = ain->getType()->getDimArray(); 347 } else if (aout) { 348 mtls.dimX = aout->getType()->getDimX(); 349 mtls.dimY = aout->getType()->getDimY(); 350 mtls.dimZ = aout->getType()->getDimZ(); 351 //mtls.dimArray = aout->getType()->getDimArray(); 352 } else { 353 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 354 return; 355 } 356 357 if (!sc || (sc->xEnd == 0)) { 358 mtls.xEnd = mtls.dimX; 359 } else { 360 rsAssert(sc->xStart < mtls.dimX); 361 rsAssert(sc->xEnd <= mtls.dimX); 362 rsAssert(sc->xStart < sc->xEnd); 363 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 364 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 365 if (mtls.xStart >= mtls.xEnd) return; 366 } 367 368 if (!sc || (sc->yEnd == 0)) { 369 mtls.yEnd = mtls.dimY; 370 } else { 371 rsAssert(sc->yStart < mtls.dimY); 372 rsAssert(sc->yEnd <= mtls.dimY); 373 rsAssert(sc->yStart < sc->yEnd); 374 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 375 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 376 if (mtls.yStart >= mtls.yEnd) return; 377 } 378 379 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 380 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 381 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 382 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 383 384 rsAssert(ain->getType()->getDimZ() == 0); 385 386 Context *mrsc = (Context *)rsc; 387 Script * oldTLS = setTLS(s); 388 389 mtls.rsc = mrsc; 390 mtls.ain = ain; 391 mtls.aout = aout; 392 mtls.script = s; 393 mtls.usr = usr; 394 mtls.mSliceSize = 10; 395 mtls.mSliceNum = 0; 396 397 mtls.ptrIn = NULL; 398 mtls.eStrideIn = 0; 399 if (ain) { 400 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 401 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 402 } 403 404 mtls.ptrOut = NULL; 405 mtls.eStrideOut = 0; 406 if (aout) { 407 mtls.ptrOut = (uint8_t *)aout->getPtr(); 408 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 409 } 410 411 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 412 if (mtls.dimY > 1) { 413 rsdLaunchThreads(mrsc, wc_xy, &mtls); 414 } else { 415 rsdLaunchThreads(mrsc, wc_x, &mtls); 416 } 417 418 //LOGE("launch 1"); 419 } else { 420 //LOGE("launch 3"); 421 for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) { 422 for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) { 423 for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) { 424 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar + 425 mtls.dimX * mtls.dimY * z + 426 mtls.dimX * y; 427 uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset); 428 const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset); 429 430 for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) { 431 ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar); 432 xPtrIn += mtls.eStrideIn; 433 xPtrOut += mtls.eStrideOut; 434 } 435 } 436 } 437 } 438 } 439 440 setTLS(oldTLS); 441} 442 443 444int rsdScriptInvokeRoot(const Context *dc, Script *script) { 445 DrvScript *drv = (DrvScript *)script->mHal.drv; 446 447 Script * oldTLS = setTLS(script); 448 int ret = drv->mRoot(); 449 setTLS(oldTLS); 450 451 return ret; 452} 453 454void rsdScriptInvokeInit(const Context *dc, Script *script) { 455 DrvScript *drv = (DrvScript *)script->mHal.drv; 456 457 if (drv->mInit) { 458 drv->mInit(); 459 } 460} 461 462 463void rsdScriptInvokeFunction(const Context *dc, Script *script, 464 uint32_t slot, 465 const void *params, 466 size_t paramLength) { 467 DrvScript *drv = (DrvScript *)script->mHal.drv; 468 //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 469 470 Script * oldTLS = setTLS(script); 471 ((void (*)(const void *, uint32_t)) 472 drv->mInvokeFunctions[slot])(params, paramLength); 473 setTLS(oldTLS); 474} 475 476void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 477 uint32_t slot, void *data, size_t dataLength) { 478 DrvScript *drv = (DrvScript *)script->mHal.drv; 479 //rsAssert(!script->mFieldIsObject[slot]); 480 //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 481 482 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 483 if (!destPtr) { 484 //LOGV("Calling setVar on slot = %i which is null", slot); 485 return; 486 } 487 488 memcpy(destPtr, data, dataLength); 489} 490 491void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 492 DrvScript *drv = (DrvScript *)script->mHal.drv; 493 //rsAssert(!script->mFieldIsObject[slot]); 494 //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 495 496 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 497 if (!destPtr) { 498 //LOGV("Calling setVar on slot = %i which is null", slot); 499 return; 500 } 501 502 memcpy(destPtr, &data, sizeof(void *)); 503} 504 505void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 506 DrvScript *drv = (DrvScript *)script->mHal.drv; 507 //rsAssert(script->mFieldIsObject[slot]); 508 //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 509 510 int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot]; 511 if (!destPtr) { 512 //LOGV("Calling setVar on slot = %i which is null", slot); 513 return; 514 } 515 516 rsiSetObject((ObjectBase **)destPtr, data); 517} 518 519void rsdScriptDestroy(const Context *dc, Script *script) { 520 DrvScript *drv = (DrvScript *)script->mHal.drv; 521 522 if (drv->mFieldAddress) { 523 for (size_t ct=0; ct < drv->mFieldCount; ct++) { 524 if (drv->mFieldIsObject[ct]) { 525 rsiClearObject((ObjectBase **)&drv->mFieldAddress[ct]); 526 } 527 } 528 delete [] drv->mFieldAddress; 529 delete [] drv->mFieldIsObject; 530 drv->mFieldAddress = NULL; 531 drv->mFieldIsObject = NULL; 532 drv->mFieldCount = 0; 533 } 534 535 if (drv->mInvokeFunctions) { 536 delete [] drv->mInvokeFunctions; 537 drv->mInvokeFunctions = NULL; 538 drv->mInvokeFunctionCount = 0; 539 } 540 free(drv); 541 script->mHal.drv = NULL; 542 543} 544 545 546