rsdBcc.cpp revision ec3fc1163c01e18b1454057723c451f0d96868ad
1/* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsdCore.h" 18#include "rsdBcc.h" 19#include "rsdRuntime.h" 20 21#include <bcc/BCCContext.h> 22#include <bcc/RenderScript/RSCompilerDriver.h> 23#include <bcc/RenderScript/RSExecutable.h> 24#include <bcc/RenderScript/RSInfo.h> 25 26#include "rsContext.h" 27#include "rsScriptC.h" 28 29#include "utils/Vector.h" 30#include "utils/Timers.h" 31#include "utils/StopWatch.h" 32 33using namespace android; 34using namespace android::renderscript; 35 36struct DrvScript { 37 int (*mRoot)(); 38 int (*mRootExpand)(); 39 void (*mInit)(); 40 void (*mFreeChildren)(); 41 42 bcc::BCCContext *mCompilerContext; 43 bcc::RSCompilerDriver *mCompilerDriver; 44 bcc::RSExecutable *mExecutable; 45}; 46 47typedef void (*outer_foreach_t)( 48 const android::renderscript::RsForEachStubParamStruct *, 49 uint32_t x1, uint32_t x2, 50 uint32_t instep, uint32_t outstep); 51 52static Script * setTLS(Script *sc) { 53 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); 54 rsAssert(tls); 55 Script *old = tls->mScript; 56 tls->mScript = sc; 57 return old; 58} 59 60 61bool rsdScriptInit(const Context *rsc, 62 ScriptC *script, 63 char const *resName, 64 char const *cacheDir, 65 uint8_t const *bitcode, 66 size_t bitcodeSize, 67 uint32_t flags) { 68 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); 69 70 pthread_mutex_lock(&rsdgInitMutex); 71 72 bcc::RSExecutable *exec; 73 const bcc::RSInfo *info; 74 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); 75 if (drv == NULL) { 76 goto error; 77 } 78 script->mHal.drv = drv; 79 80 drv->mCompilerContext = NULL; 81 drv->mCompilerDriver = NULL; 82 drv->mExecutable = NULL; 83 84 drv->mCompilerContext = new bcc::BCCContext(); 85 if (drv->mCompilerContext == NULL) { 86 ALOGE("bcc: FAILS to create compiler context (out of memory)"); 87 goto error; 88 } 89 90 drv->mCompilerDriver = new bcc::RSCompilerDriver(); 91 if (drv->mCompilerDriver == NULL) { 92 ALOGE("bcc: FAILS to create compiler driver (out of memory)"); 93 goto error; 94 } 95 96 script->mHal.info.isThreadable = true; 97 98 drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub); 99 drv->mCompilerDriver->setRSRuntimeLookupContext(script); 100 101 exec = drv->mCompilerDriver->build(*drv->mCompilerContext, 102 cacheDir, resName, 103 (const char *)bitcode, bitcodeSize); 104 105 if (exec == NULL) { 106 ALOGE("bcc: FAILS to prepare executable for '%s'", resName); 107 goto error; 108 } 109 110 drv->mExecutable = exec; 111 112 exec->setThreadable(script->mHal.info.isThreadable); 113 if (!exec->syncInfo()) { 114 ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); 115 } 116 117 drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root")); 118 drv->mRootExpand = 119 reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand")); 120 drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init")); 121 drv->mFreeChildren = 122 reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor")); 123 124 info = &drv->mExecutable->getInfo(); 125 // Copy info over to runtime 126 script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size(); 127 script->mHal.info.exportedVariableCount = info->getExportVarNames().size(); 128 script->mHal.info.exportedPragmaCount = info->getPragmas().size(); 129 script->mHal.info.exportedPragmaKeyList = 130 const_cast<const char**>(exec->getPragmaKeys().array()); 131 script->mHal.info.exportedPragmaValueList = 132 const_cast<const char**>(exec->getPragmaValues().array()); 133 134 if (drv->mRootExpand) { 135 script->mHal.info.root = drv->mRootExpand; 136 } else { 137 script->mHal.info.root = drv->mRoot; 138 } 139 140 pthread_mutex_unlock(&rsdgInitMutex); 141 return true; 142 143error: 144 145 pthread_mutex_unlock(&rsdgInitMutex); 146 if (drv) { 147 delete drv->mCompilerContext; 148 delete drv->mCompilerDriver; 149 delete drv->mExecutable; 150 free(drv); 151 } 152 script->mHal.drv = NULL; 153 return false; 154 155} 156 157typedef struct { 158 Context *rsc; 159 Script *script; 160 ForEachFunc_t kernel; 161 uint32_t sig; 162 const Allocation * ain; 163 Allocation * aout; 164 const void * usr; 165 size_t usrLen; 166 167 uint32_t mSliceSize; 168 volatile int mSliceNum; 169 170 const uint8_t *ptrIn; 171 uint32_t eStrideIn; 172 uint8_t *ptrOut; 173 uint32_t eStrideOut; 174 175 uint32_t yStrideIn; 176 uint32_t yStrideOut; 177 178 uint32_t xStart; 179 uint32_t xEnd; 180 uint32_t yStart; 181 uint32_t yEnd; 182 uint32_t zStart; 183 uint32_t zEnd; 184 uint32_t arrayStart; 185 uint32_t arrayEnd; 186 187 uint32_t dimX; 188 uint32_t dimY; 189 uint32_t dimZ; 190 uint32_t dimArray; 191} MTLaunchStruct; 192typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); 193 194static void wc_xy(void *usr, uint32_t idx) { 195 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 196 RsForEachStubParamStruct p; 197 memset(&p, 0, sizeof(p)); 198 p.usr = mtls->usr; 199 p.usr_len = mtls->usrLen; 200 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 201 uint32_t sig = mtls->sig; 202 203 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 204 while (1) { 205 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 206 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; 207 uint32_t yEnd = yStart + mtls->mSliceSize; 208 yEnd = rsMin(yEnd, mtls->yEnd); 209 if (yEnd <= yStart) { 210 return; 211 } 212 213 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); 214 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 215 for (p.y = yStart; p.y < yEnd; p.y++) { 216 p.out = mtls->ptrOut + (mtls->yStrideOut * p.y); 217 p.in = mtls->ptrIn + (mtls->yStrideIn * p.y); 218 fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut); 219 } 220 } 221} 222 223static void wc_x(void *usr, uint32_t idx) { 224 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 225 RsForEachStubParamStruct p; 226 memset(&p, 0, sizeof(p)); 227 p.usr = mtls->usr; 228 p.usr_len = mtls->usrLen; 229 RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; 230 uint32_t sig = mtls->sig; 231 232 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 233 while (1) { 234 uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); 235 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; 236 uint32_t xEnd = xStart + mtls->mSliceSize; 237 xEnd = rsMin(xEnd, mtls->xEnd); 238 if (xEnd <= xStart) { 239 return; 240 } 241 242 //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); 243 //ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); 244 245 p.out = mtls->ptrOut + (mtls->eStrideOut * xStart); 246 p.in = mtls->ptrIn + (mtls->eStrideIn * xStart); 247 fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut); 248 } 249} 250 251void rsdScriptInvokeForEach(const Context *rsc, 252 Script *s, 253 uint32_t slot, 254 const Allocation * ain, 255 Allocation * aout, 256 const void * usr, 257 uint32_t usrLen, 258 const RsScriptCall *sc) { 259 260 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 261 262 MTLaunchStruct mtls; 263 memset(&mtls, 0, sizeof(mtls)); 264 265 DrvScript *drv = (DrvScript *)s->mHal.drv; 266 rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size()); 267 mtls.kernel = reinterpret_cast<ForEachFunc_t>( 268 drv->mExecutable->getExportForeachFuncAddrs()[slot]); 269 rsAssert(mtls.kernel != NULL); 270 mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second; 271 272 if (ain) { 273 mtls.dimX = ain->getType()->getDimX(); 274 mtls.dimY = ain->getType()->getDimY(); 275 mtls.dimZ = ain->getType()->getDimZ(); 276 //mtls.dimArray = ain->getType()->getDimArray(); 277 } else if (aout) { 278 mtls.dimX = aout->getType()->getDimX(); 279 mtls.dimY = aout->getType()->getDimY(); 280 mtls.dimZ = aout->getType()->getDimZ(); 281 //mtls.dimArray = aout->getType()->getDimArray(); 282 } else { 283 rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); 284 return; 285 } 286 287 if (!sc || (sc->xEnd == 0)) { 288 mtls.xEnd = mtls.dimX; 289 } else { 290 rsAssert(sc->xStart < mtls.dimX); 291 rsAssert(sc->xEnd <= mtls.dimX); 292 rsAssert(sc->xStart < sc->xEnd); 293 mtls.xStart = rsMin(mtls.dimX, sc->xStart); 294 mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); 295 if (mtls.xStart >= mtls.xEnd) return; 296 } 297 298 if (!sc || (sc->yEnd == 0)) { 299 mtls.yEnd = mtls.dimY; 300 } else { 301 rsAssert(sc->yStart < mtls.dimY); 302 rsAssert(sc->yEnd <= mtls.dimY); 303 rsAssert(sc->yStart < sc->yEnd); 304 mtls.yStart = rsMin(mtls.dimY, sc->yStart); 305 mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); 306 if (mtls.yStart >= mtls.yEnd) return; 307 } 308 309 mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); 310 mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); 311 mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); 312 mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); 313 314 rsAssert(!ain || (ain->getType()->getDimZ() == 0)); 315 316 Context *mrsc = (Context *)rsc; 317 Script * oldTLS = setTLS(s); 318 319 mtls.rsc = mrsc; 320 mtls.ain = ain; 321 mtls.aout = aout; 322 mtls.script = s; 323 mtls.usr = usr; 324 mtls.usrLen = usrLen; 325 mtls.mSliceSize = 10; 326 mtls.mSliceNum = 0; 327 328 mtls.ptrIn = NULL; 329 mtls.eStrideIn = 0; 330 if (ain) { 331 mtls.ptrIn = (const uint8_t *)ain->getPtr(); 332 mtls.eStrideIn = ain->getType()->getElementSizeBytes(); 333 mtls.yStrideIn = ain->mHal.drvState.stride; 334 } 335 336 mtls.ptrOut = NULL; 337 mtls.eStrideOut = 0; 338 if (aout) { 339 mtls.ptrOut = (uint8_t *)aout->getPtr(); 340 mtls.eStrideOut = aout->getType()->getElementSizeBytes(); 341 mtls.yStrideOut = aout->mHal.drvState.stride; 342 } 343 344 if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { 345 if (mtls.dimY > 1) { 346 rsdLaunchThreads(mrsc, wc_xy, &mtls); 347 } else { 348 rsdLaunchThreads(mrsc, wc_x, &mtls); 349 } 350 351 //ALOGE("launch 1"); 352 } else { 353 RsForEachStubParamStruct p; 354 memset(&p, 0, sizeof(p)); 355 p.usr = mtls.usr; 356 p.usr_len = mtls.usrLen; 357 uint32_t sig = mtls.sig; 358 359 //ALOGE("launch 3"); 360 outer_foreach_t fn = (outer_foreach_t) mtls.kernel; 361 for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) { 362 for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) { 363 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) { 364 uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] + 365 mtls.dimX * mtls.dimY * p.z + 366 mtls.dimX * p.y; 367 p.out = mtls.ptrOut + (mtls.eStrideOut * offset); 368 p.in = mtls.ptrIn + (mtls.eStrideIn * offset); 369 fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn, 370 mtls.eStrideOut); 371 } 372 } 373 } 374 } 375 376 setTLS(oldTLS); 377} 378 379 380int rsdScriptInvokeRoot(const Context *dc, Script *script) { 381 DrvScript *drv = (DrvScript *)script->mHal.drv; 382 383 Script * oldTLS = setTLS(script); 384 int ret = drv->mRoot(); 385 setTLS(oldTLS); 386 387 return ret; 388} 389 390void rsdScriptInvokeInit(const Context *dc, Script *script) { 391 DrvScript *drv = (DrvScript *)script->mHal.drv; 392 393 if (drv->mInit) { 394 drv->mInit(); 395 } 396} 397 398void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { 399 DrvScript *drv = (DrvScript *)script->mHal.drv; 400 401 if (drv->mFreeChildren) { 402 drv->mFreeChildren(); 403 } 404} 405 406void rsdScriptInvokeFunction(const Context *dc, Script *script, 407 uint32_t slot, 408 const void *params, 409 size_t paramLength) { 410 DrvScript *drv = (DrvScript *)script->mHal.drv; 411 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); 412 413 Script * oldTLS = setTLS(script); 414 reinterpret_cast<void (*)(const void *, uint32_t)>( 415 drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength); 416 setTLS(oldTLS); 417} 418 419void rsdScriptSetGlobalVar(const Context *dc, const Script *script, 420 uint32_t slot, void *data, size_t dataLength) { 421 DrvScript *drv = (DrvScript *)script->mHal.drv; 422 //rsAssert(!script->mFieldIsObject[slot]); 423 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); 424 425 int32_t *destPtr = reinterpret_cast<int32_t *>( 426 drv->mExecutable->getExportVarAddrs()[slot]); 427 if (!destPtr) { 428 //ALOGV("Calling setVar on slot = %i which is null", slot); 429 return; 430 } 431 432 memcpy(destPtr, data, dataLength); 433} 434 435void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) { 436 DrvScript *drv = (DrvScript *)script->mHal.drv; 437 //rsAssert(!script->mFieldIsObject[slot]); 438 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); 439 440 int32_t *destPtr = reinterpret_cast<int32_t *>( 441 drv->mExecutable->getExportVarAddrs()[slot]); 442 if (!destPtr) { 443 //ALOGV("Calling setVar on slot = %i which is null", slot); 444 return; 445 } 446 447 memcpy(destPtr, &data, sizeof(void *)); 448} 449 450void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { 451 DrvScript *drv = (DrvScript *)script->mHal.drv; 452 //rsAssert(script->mFieldIsObject[slot]); 453 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); 454 455 int32_t *destPtr = reinterpret_cast<int32_t *>( 456 drv->mExecutable->getExportVarAddrs()[slot]); 457 if (!destPtr) { 458 //ALOGV("Calling setVar on slot = %i which is null", slot); 459 return; 460 } 461 462 rsrSetObject(dc, script, (ObjectBase **)destPtr, data); 463} 464 465void rsdScriptDestroy(const Context *dc, Script *script) { 466 DrvScript *drv = (DrvScript *)script->mHal.drv; 467 468 if (drv == NULL) { 469 return; 470 } 471 472 if (drv->mExecutable) { 473 Vector<void *>::const_iterator var_addr_iter = 474 drv->mExecutable->getExportVarAddrs().begin(); 475 Vector<void *>::const_iterator var_addr_end = 476 drv->mExecutable->getExportVarAddrs().end(); 477 478 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = 479 drv->mExecutable->getInfo().getObjectSlots().begin(); 480 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = 481 drv->mExecutable->getInfo().getObjectSlots().end(); 482 483 while ((var_addr_iter != var_addr_end) && 484 (is_object_iter != is_object_end)) { 485 // The field address can be NULL if the script-side has optimized 486 // the corresponding global variable away. 487 ObjectBase **obj_addr = 488 reinterpret_cast<ObjectBase **>(*var_addr_iter); 489 if (*is_object_iter) { 490 if (*var_addr_iter != NULL) { 491 rsrClearObject(dc, script, obj_addr); 492 } 493 } 494 var_addr_iter++; 495 is_object_iter++; 496 } 497 } 498 499 delete drv->mCompilerContext; 500 delete drv->mCompilerDriver; 501 delete drv->mExecutable; 502 503 free(drv); 504 script->mHal.drv = NULL; 505} 506 507 508