1/* libs/pixelflinger/codeflinger/GGLAssembler.cpp 2** 3** Copyright 2006, The Android Open Source Project 4** 5** Licensed under the Apache License, Version 2.0 (the "License"); 6** you may not use this file except in compliance with the License. 7** You may obtain a copy of the License at 8** 9** http://www.apache.org/licenses/LICENSE-2.0 10** 11** Unless required by applicable law or agreed to in writing, software 12** distributed under the License is distributed on an "AS IS" BASIS, 13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14** See the License for the specific language governing permissions and 15** limitations under the License. 16*/ 17 18#define LOG_TAG "GGLAssembler" 19 20#include <assert.h> 21#include <stdint.h> 22#include <stdlib.h> 23#include <stdio.h> 24#include <sys/types.h> 25#include <cutils/log.h> 26 27#include "codeflinger/GGLAssembler.h" 28 29namespace android { 30 31// ---------------------------------------------------------------------------- 32 33GGLAssembler::GGLAssembler(ARMAssemblerInterface* target) 34 : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7) 35{ 36} 37 38GGLAssembler::~GGLAssembler() 39{ 40} 41 42void GGLAssembler::prolog() 43{ 44 ARMAssemblerProxy::prolog(); 45} 46 47void GGLAssembler::epilog(uint32_t touched) 48{ 49 ARMAssemblerProxy::epilog(touched); 50} 51 52void GGLAssembler::reset(int opt_level) 53{ 54 ARMAssemblerProxy::reset(); 55 RegisterAllocator::reset(); 56 mOptLevel = opt_level; 57} 58 59// --------------------------------------------------------------------------- 60 61int GGLAssembler::scanline(const needs_t& needs, context_t const* c) 62{ 63 int err = 0; 64 int opt_level = mOptLevel; 65 while (opt_level >= 0) { 66 reset(opt_level); 67 err = scanline_core(needs, c); 68 if (err == 0) 69 break; 70 opt_level--; 71 } 72 73 // XXX: in theory, pcForLabel is not valid before generate() 74 uint32_t* fragment_start_pc = pcForLabel("fragment_loop"); 75 uint32_t* fragment_end_pc = pcForLabel("epilog"); 76 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc); 77 78 // build a name for our pipeline 79 char name[64]; 80 sprintf(name, 81 "scanline__%08X:%08X_%08X_%08X [%3d ipp]", 82 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops); 83 84 if (err) { 85 LOGE("Error while generating ""%s""\n", name); 86 disassemble(name); 87 return -1; 88 } 89 90 return generate(name); 91} 92 93int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) 94{ 95 int64_t duration = ggl_system_time(); 96 97 mBlendFactorCached = 0; 98 mBlending = 0; 99 mMasking = 0; 100 mAA = GGL_READ_NEEDS(P_AA, needs.p); 101 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p); 102 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER; 103 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER; 104 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0; 105 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0; 106 mBuilderContext.needs = needs; 107 mBuilderContext.c = c; 108 mBuilderContext.Rctx = reserveReg(R0); // context always in R0 109 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ]; 110 111 // ------------------------------------------------------------------------ 112 113 decodeLogicOpNeeds(needs); 114 115 decodeTMUNeeds(needs, c); 116 117 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n)); 118 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n)); 119 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n)); 120 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n)); 121 122 if (!mCbFormat.c[GGLFormat::ALPHA].h) { 123 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) || 124 (mBlendSrc == GGL_DST_ALPHA)) { 125 mBlendSrc = GGL_ONE; 126 } 127 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) || 128 (mBlendSrcA == GGL_DST_ALPHA)) { 129 mBlendSrcA = GGL_ONE; 130 } 131 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) || 132 (mBlendDst == GGL_DST_ALPHA)) { 133 mBlendDst = GGL_ONE; 134 } 135 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) || 136 (mBlendDstA == GGL_DST_ALPHA)) { 137 mBlendDstA = GGL_ONE; 138 } 139 } 140 141 // if we need the framebuffer, read it now 142 const int blending = blending_codes(mBlendSrc, mBlendDst) | 143 blending_codes(mBlendSrcA, mBlendDstA); 144 145 // XXX: handle special cases, destination not modified... 146 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 147 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) { 148 // Destination unmodified (beware of logic ops) 149 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 150 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) { 151 // Destination is zero (beware of logic ops) 152 } 153 154 int fbComponents = 0; 155 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n); 156 for (int i=0 ; i<4 ; i++) { 157 const int mask = 1<<i; 158 component_info_t& info = mInfo[i]; 159 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 160 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 161 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA) 162 fs = GGL_ONE; 163 info.masked = !!(masking & mask); 164 info.inDest = !info.masked && mCbFormat.c[i].h && 165 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp)); 166 if (mCbFormat.components >= GGL_LUMINANCE && 167 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) { 168 info.inDest = false; 169 } 170 info.needed = (i==GGLFormat::ALPHA) && 171 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS); 172 info.replaced = !!(mTextureMachine.replaced & mask); 173 info.iterated = (!info.replaced && (info.inDest || info.needed)); 174 info.smooth = mSmooth && info.iterated; 175 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA); 176 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 177 178 mBlending |= (info.blend ? mask : 0); 179 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0; 180 fbComponents |= mCbFormat.c[i].h ? mask : 0; 181 } 182 183 mAllMasked = (mMasking == fbComponents); 184 if (mAllMasked) { 185 mDithering = 0; 186 } 187 188 fragment_parts_t parts; 189 190 // ------------------------------------------------------------------------ 191 prolog(); 192 // ------------------------------------------------------------------------ 193 194 build_scanline_prolog(parts, needs); 195 196 if (registerFile().status()) 197 return registerFile().status(); 198 199 // ------------------------------------------------------------------------ 200 label("fragment_loop"); 201 // ------------------------------------------------------------------------ 202 { 203 Scratch regs(registerFile()); 204 205 if (mDithering) { 206 // update the dither index. 207 MOV(AL, 0, parts.count.reg, 208 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT)); 209 ADD(AL, 0, parts.count.reg, parts.count.reg, 210 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT))); 211 MOV(AL, 0, parts.count.reg, 212 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT)); 213 } 214 215 // XXX: could we do an early alpha-test here in some cases? 216 // It would probaly be used only with smooth-alpha and no texture 217 // (or no alpha component in the texture). 218 219 // Early z-test 220 if (mAlphaTest==GGL_ALWAYS) { 221 build_depth_test(parts, Z_TEST|Z_WRITE); 222 } else { 223 // we cannot do the z-write here, because 224 // it might be killed by the alpha-test later 225 build_depth_test(parts, Z_TEST); 226 } 227 228 { // texture coordinates 229 Scratch scratches(registerFile()); 230 231 // texel generation 232 build_textures(parts, regs); 233 } 234 235 if ((blending & (FACTOR_DST|BLEND_DST)) || 236 (mMasking && !mAllMasked) || 237 (mLogicOp & LOGIC_OP_DST)) 238 { 239 // blending / logic_op / masking need the framebuffer 240 mDstPixel.setTo(regs.obtain(), &mCbFormat); 241 242 // load the framebuffer pixel 243 comment("fetch color-buffer"); 244 load(parts.cbPtr, mDstPixel); 245 } 246 247 if (registerFile().status()) 248 return registerFile().status(); 249 250 pixel_t pixel; 251 int directTex = mTextureMachine.directTexture; 252 if (directTex | parts.packed) { 253 // note: we can't have both here 254 // iterated color or direct texture 255 pixel = directTex ? parts.texel[directTex-1] : parts.iterated; 256 pixel.flags &= ~CORRUPTIBLE; 257 } else { 258 if (mDithering) { 259 const int ctxtReg = mBuilderContext.Rctx; 260 const int mask = GGL_DITHER_SIZE-1; 261 parts.dither = reg_t(regs.obtain()); 262 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask)); 263 ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg); 264 LDRB(AL, parts.dither.reg, parts.dither.reg, 265 immed12_pre(GGL_OFFSETOF(ditherMatrix))); 266 } 267 268 // allocate a register for the resulting pixel 269 pixel.setTo(regs.obtain(), &mCbFormat, FIRST); 270 271 build_component(pixel, parts, GGLFormat::ALPHA, regs); 272 273 if (mAlphaTest!=GGL_ALWAYS) { 274 // only handle the z-write part here. We know z-test 275 // was successful, as well as alpha-test. 276 build_depth_test(parts, Z_WRITE); 277 } 278 279 build_component(pixel, parts, GGLFormat::RED, regs); 280 build_component(pixel, parts, GGLFormat::GREEN, regs); 281 build_component(pixel, parts, GGLFormat::BLUE, regs); 282 283 pixel.flags |= CORRUPTIBLE; 284 } 285 286 if (registerFile().status()) 287 return registerFile().status(); 288 289 if (pixel.reg == -1) { 290 // be defensive here. if we're here it's probably 291 // that this whole fragment is a no-op. 292 pixel = mDstPixel; 293 } 294 295 if (!mAllMasked) { 296 // logic operation 297 build_logic_op(pixel, regs); 298 299 // masking 300 build_masking(pixel, regs); 301 302 comment("store"); 303 store(parts.cbPtr, pixel, WRITE_BACK); 304 } 305 } 306 307 if (registerFile().status()) 308 return registerFile().status(); 309 310 // update the iterated color... 311 if (parts.reload != 3) { 312 build_smooth_shade(parts); 313 } 314 315 // update iterated z 316 build_iterate_z(parts); 317 318 // update iterated fog 319 build_iterate_f(parts); 320 321 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 322 B(PL, "fragment_loop"); 323 label("epilog"); 324 epilog(registerFile().touched()); 325 326 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) { 327 if (mDepthTest!=GGL_ALWAYS) { 328 label("discard_before_textures"); 329 build_iterate_texture_coordinates(parts); 330 } 331 label("discard_after_textures"); 332 build_smooth_shade(parts); 333 build_iterate_z(parts); 334 build_iterate_f(parts); 335 if (!mAllMasked) { 336 ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); 337 } 338 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 339 B(PL, "fragment_loop"); 340 epilog(registerFile().touched()); 341 } 342 343 return registerFile().status(); 344} 345 346// --------------------------------------------------------------------------- 347 348void GGLAssembler::build_scanline_prolog( 349 fragment_parts_t& parts, const needs_t& needs) 350{ 351 Scratch scratches(registerFile()); 352 int Rctx = mBuilderContext.Rctx; 353 354 // compute count 355 comment("compute ct (# of pixels to process)"); 356 parts.count.setTo(obtainReg()); 357 int Rx = scratches.obtain(); 358 int Ry = scratches.obtain(); 359 CONTEXT_LOAD(Rx, iterators.xl); 360 CONTEXT_LOAD(parts.count.reg, iterators.xr); 361 CONTEXT_LOAD(Ry, iterators.y); 362 363 // parts.count = iterators.xr - Rx 364 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx); 365 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1)); 366 367 if (mDithering) { 368 // parts.count.reg = 0xNNNNXXDD 369 // NNNN = count-1 370 // DD = dither offset 371 // XX = 0xxxxxxx (x = garbage) 372 Scratch scratches(registerFile()); 373 int tx = scratches.obtain(); 374 int ty = scratches.obtain(); 375 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK)); 376 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK)); 377 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT)); 378 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16)); 379 } else { 380 // parts.count.reg = 0xNNNN0000 381 // NNNN = count-1 382 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16)); 383 } 384 385 if (!mAllMasked) { 386 // compute dst ptr 387 comment("compute color-buffer pointer"); 388 const int cb_bits = mCbFormat.size*8; 389 int Rs = scratches.obtain(); 390 parts.cbPtr.setTo(obtainReg(), cb_bits); 391 CONTEXT_LOAD(Rs, state.buffers.color.stride); 392 CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data); 393 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs 394 base_offset(parts.cbPtr, parts.cbPtr, Rs); 395 scratches.recycle(Rs); 396 } 397 398 // init fog 399 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p); 400 if (need_fog) { 401 comment("compute initial fog coordinate"); 402 Scratch scratches(registerFile()); 403 int dfdx = scratches.obtain(); 404 int ydfdy = scratches.obtain(); 405 int f = ydfdy; 406 CONTEXT_LOAD(dfdx, generated_vars.dfdx); 407 CONTEXT_LOAD(ydfdy, iterators.ydfdy); 408 MLA(AL, 0, f, Rx, dfdx, ydfdy); 409 CONTEXT_STORE(f, generated_vars.f); 410 } 411 412 // init Z coordinate 413 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 414 parts.z = reg_t(obtainReg()); 415 comment("compute initial Z coordinate"); 416 Scratch scratches(registerFile()); 417 int dzdx = scratches.obtain(); 418 int ydzdy = parts.z.reg; 419 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point 420 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point 421 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy); 422 423 // we're going to index zbase of parts.count 424 // zbase = base + (xl-count + stride*y)*2 425 int Rs = dzdx; 426 int zbase = scratches.obtain(); 427 CONTEXT_LOAD(Rs, state.buffers.depth.stride); 428 CONTEXT_LOAD(zbase, state.buffers.depth.data); 429 SMLABB(AL, Rs, Ry, Rs, Rx); 430 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16)); 431 ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); 432 CONTEXT_STORE(zbase, generated_vars.zbase); 433 } 434 435 // init texture coordinates 436 init_textures(parts.coords, reg_t(Rx), reg_t(Ry)); 437 scratches.recycle(Ry); 438 439 // iterated color 440 init_iterated_color(parts, reg_t(Rx)); 441 442 // init coverage factor application (anti-aliasing) 443 if (mAA) { 444 parts.covPtr.setTo(obtainReg(), 16); 445 CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage); 446 ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); 447 } 448} 449 450// --------------------------------------------------------------------------- 451 452void GGLAssembler::build_component( pixel_t& pixel, 453 const fragment_parts_t& parts, 454 int component, 455 Scratch& regs) 456{ 457 static char const * comments[] = {"alpha", "red", "green", "blue"}; 458 comment(comments[component]); 459 460 // local register file 461 Scratch scratches(registerFile()); 462 const int dst_component_size = pixel.component_size(component); 463 464 component_t temp(-1); 465 build_incoming_component( temp, dst_component_size, 466 parts, component, scratches, regs); 467 468 if (mInfo[component].inDest) { 469 470 // blending... 471 build_blending( temp, mDstPixel, component, scratches ); 472 473 // downshift component and rebuild pixel... 474 downshift(pixel, component, temp, parts.dither); 475 } 476} 477 478void GGLAssembler::build_incoming_component( 479 component_t& temp, 480 int dst_size, 481 const fragment_parts_t& parts, 482 int component, 483 Scratch& scratches, 484 Scratch& global_regs) 485{ 486 const uint32_t component_mask = 1<<component; 487 488 // Figure out what we need for the blending stage... 489 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 490 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 491 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) { 492 fs = GGL_ONE; 493 } 494 495 // Figure out what we need to extract and for what reason 496 const int blending = blending_codes(fs, fd); 497 498 // Are we actually going to blend? 499 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 500 501 // expand the source if the destination has more bits 502 int need_expander = false; 503 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) { 504 texture_unit_t& tmu = mTextureMachine.tmu[i]; 505 if ((tmu.format_idx) && 506 (parts.texel[i].component_size(component) < dst_size)) { 507 need_expander = true; 508 } 509 } 510 511 // do we need to extract this component? 512 const bool multiTexture = mTextureMachine.activeUnits > 1; 513 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) && 514 (isAlphaSourceNeeded()); 515 int need_extract = mInfo[component].needed; 516 if (mInfo[component].inDest) 517 { 518 need_extract |= ((need_blending ? 519 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander)); 520 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced); 521 need_extract |= mInfo[component].smooth; 522 need_extract |= mInfo[component].fog; 523 need_extract |= mDithering; 524 need_extract |= multiTexture; 525 } 526 527 if (need_extract) { 528 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches; 529 component_t fragment; 530 531 // iterated color 532 build_iterated_color(fragment, parts, component, regs); 533 534 // texture environement (decal, modulate, replace) 535 build_texture_environment(fragment, parts, component, regs); 536 537 // expand the source if the destination has more bits 538 if (need_expander && (fragment.size() < dst_size)) { 539 // we're here only if we fetched a texel 540 // (so we know for sure fragment is CORRUPTIBLE) 541 expand(fragment, fragment, dst_size); 542 } 543 544 // We have a few specific things to do for the alpha-channel 545 if ((component==GGLFormat::ALPHA) && 546 (mInfo[component].needed || fragment.size()<dst_size)) 547 { 548 // convert to integer_t first and make sure 549 // we don't corrupt a needed register 550 if (fragment.l) { 551 component_t incoming(fragment); 552 modify(fragment, regs); 553 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l)); 554 fragment.h -= fragment.l; 555 fragment.l = 0; 556 } 557 558 // coverage factor application 559 build_coverage_application(fragment, parts, regs); 560 561 // alpha-test 562 build_alpha_test(fragment, parts); 563 564 if (blend_needs_alpha_source) { 565 // We keep only 8 bits for the blending stage 566 const int shift = fragment.h <= 8 ? 0 : fragment.h-8; 567 if (fragment.flags & CORRUPTIBLE) { 568 fragment.flags &= ~CORRUPTIBLE; 569 mAlphaSource.setTo(fragment.reg, 570 fragment.size(), fragment.flags); 571 if (shift) { 572 MOV(AL, 0, mAlphaSource.reg, 573 reg_imm(mAlphaSource.reg, LSR, shift)); 574 } 575 } else { 576 // XXX: it would better to do this in build_blend_factor() 577 // so we can avoid the extra MOV below. 578 mAlphaSource.setTo(regs.obtain(), 579 fragment.size(), CORRUPTIBLE); 580 if (shift) { 581 MOV(AL, 0, mAlphaSource.reg, 582 reg_imm(fragment.reg, LSR, shift)); 583 } else { 584 MOV(AL, 0, mAlphaSource.reg, fragment.reg); 585 } 586 } 587 mAlphaSource.s -= shift; 588 } 589 } 590 591 // fog... 592 build_fog( fragment, component, regs ); 593 594 temp = fragment; 595 } else { 596 if (mInfo[component].inDest) { 597 // extraction not needed and replace 598 // we just select the right component 599 if ((mTextureMachine.replaced & component_mask) == 0) { 600 // component wasn't replaced, so use it! 601 temp = component_t(parts.iterated, component); 602 } 603 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 604 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 605 if ((tmu.mask & component_mask) && 606 ((tmu.replaced & component_mask) == 0)) { 607 temp = component_t(parts.texel[i], component); 608 } 609 } 610 } 611 } 612} 613 614bool GGLAssembler::isAlphaSourceNeeded() const 615{ 616 // XXX: also needed for alpha-test 617 const int bs = mBlendSrc; 618 const int bd = mBlendDst; 619 return bs==GGL_SRC_ALPHA_SATURATE || 620 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA || 621 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ; 622} 623 624// --------------------------------------------------------------------------- 625 626void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts) 627{ 628 if (mSmooth && !parts.iterated_packed) { 629 // update the iterated color in a pipelined way... 630 comment("update iterated color"); 631 Scratch scratches(registerFile()); 632 633 const int reload = parts.reload; 634 for (int i=0 ; i<4 ; i++) { 635 if (!mInfo[i].iterated) 636 continue; 637 638 int c = parts.argb[i].reg; 639 int dx = parts.argb_dx[i].reg; 640 641 if (reload & 1) { 642 c = scratches.obtain(); 643 CONTEXT_LOAD(c, generated_vars.argb[i].c); 644 } 645 if (reload & 2) { 646 dx = scratches.obtain(); 647 CONTEXT_LOAD(dx, generated_vars.argb[i].dx); 648 } 649 650 if (mSmooth) { 651 ADD(AL, 0, c, c, dx); 652 } 653 654 if (reload & 1) { 655 CONTEXT_STORE(c, generated_vars.argb[i].c); 656 scratches.recycle(c); 657 } 658 if (reload & 2) { 659 scratches.recycle(dx); 660 } 661 } 662 } 663} 664 665// --------------------------------------------------------------------------- 666 667void GGLAssembler::build_coverage_application(component_t& fragment, 668 const fragment_parts_t& parts, Scratch& regs) 669{ 670 // here fragment.l is guarenteed to be 0 671 if (mAA) { 672 // coverages are 1.15 fixed-point numbers 673 comment("coverage application"); 674 675 component_t incoming(fragment); 676 modify(fragment, regs); 677 678 Scratch scratches(registerFile()); 679 int cf = scratches.obtain(); 680 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2)); 681 if (fragment.h > 31) { 682 fragment.h--; 683 SMULWB(AL, fragment.reg, incoming.reg, cf); 684 } else { 685 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1)); 686 SMULWB(AL, fragment.reg, fragment.reg, cf); 687 } 688 } 689} 690 691// --------------------------------------------------------------------------- 692 693void GGLAssembler::build_alpha_test(component_t& fragment, 694 const fragment_parts_t& parts) 695{ 696 if (mAlphaTest != GGL_ALWAYS) { 697 comment("Alpha Test"); 698 Scratch scratches(registerFile()); 699 int ref = scratches.obtain(); 700 const int shift = GGL_COLOR_BITS-fragment.size(); 701 CONTEXT_LOAD(ref, state.alpha_test.ref); 702 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift)); 703 else CMP(AL, fragment.reg, ref); 704 int cc = NV; 705 switch (mAlphaTest) { 706 case GGL_NEVER: cc = NV; break; 707 case GGL_LESS: cc = LT; break; 708 case GGL_EQUAL: cc = EQ; break; 709 case GGL_LEQUAL: cc = LS; break; 710 case GGL_GREATER: cc = HI; break; 711 case GGL_NOTEQUAL: cc = NE; break; 712 case GGL_GEQUAL: cc = HS; break; 713 } 714 B(cc^1, "discard_after_textures"); 715 } 716} 717 718// --------------------------------------------------------------------------- 719 720void GGLAssembler::build_depth_test( 721 const fragment_parts_t& parts, uint32_t mask) 722{ 723 mask &= Z_TEST|Z_WRITE; 724 const needs_t& needs = mBuilderContext.needs; 725 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p); 726 Scratch scratches(registerFile()); 727 728 if (mDepthTest != GGL_ALWAYS || zmask) { 729 int cc=AL, ic=AL; 730 switch (mDepthTest) { 731 case GGL_LESS: ic = HI; break; 732 case GGL_EQUAL: ic = EQ; break; 733 case GGL_LEQUAL: ic = HS; break; 734 case GGL_GREATER: ic = LT; break; 735 case GGL_NOTEQUAL: ic = NE; break; 736 case GGL_GEQUAL: ic = LS; break; 737 case GGL_NEVER: 738 // this never happens, because it's taken care of when 739 // computing the needs. but we keep it for completness. 740 comment("Depth Test (NEVER)"); 741 B(AL, "discard_before_textures"); 742 return; 743 case GGL_ALWAYS: 744 // we're here because zmask is enabled 745 mask &= ~Z_TEST; // test always passes. 746 break; 747 } 748 749 // inverse the condition 750 cc = ic^1; 751 752 if ((mask & Z_WRITE) && !zmask) { 753 mask &= ~Z_WRITE; 754 } 755 756 if (!mask) 757 return; 758 759 comment("Depth Test"); 760 761 int zbase = scratches.obtain(); 762 int depth = scratches.obtain(); 763 int z = parts.z.reg; 764 765 CONTEXT_LOAD(zbase, generated_vars.zbase); // stall 766 SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); 767 // above does zbase = zbase + ((count >> 16) << 1) 768 769 if (mask & Z_TEST) { 770 LDRH(AL, depth, zbase); // stall 771 CMP(AL, depth, reg_imm(z, LSR, 16)); 772 B(cc, "discard_before_textures"); 773 } 774 if (mask & Z_WRITE) { 775 if (mask == Z_WRITE) { 776 // only z-write asked, cc is meaningless 777 ic = AL; 778 } 779 MOV(AL, 0, depth, reg_imm(z, LSR, 16)); 780 STRH(ic, depth, zbase); 781 } 782 } 783} 784 785void GGLAssembler::build_iterate_z(const fragment_parts_t& parts) 786{ 787 const needs_t& needs = mBuilderContext.needs; 788 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 789 Scratch scratches(registerFile()); 790 int dzdx = scratches.obtain(); 791 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall 792 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx); 793 } 794} 795 796void GGLAssembler::build_iterate_f(const fragment_parts_t& parts) 797{ 798 const needs_t& needs = mBuilderContext.needs; 799 if (GGL_READ_NEEDS(P_FOG, needs.p)) { 800 Scratch scratches(registerFile()); 801 int dfdx = scratches.obtain(); 802 int f = scratches.obtain(); 803 CONTEXT_LOAD(f, generated_vars.f); 804 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall 805 ADD(AL, 0, f, f, dfdx); 806 CONTEXT_STORE(f, generated_vars.f); 807 } 808} 809 810// --------------------------------------------------------------------------- 811 812void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs) 813{ 814 const needs_t& needs = mBuilderContext.needs; 815 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 816 if (opcode == GGL_COPY) 817 return; 818 819 comment("logic operation"); 820 821 pixel_t s(pixel); 822 if (!(pixel.flags & CORRUPTIBLE)) { 823 pixel.reg = regs.obtain(); 824 pixel.flags |= CORRUPTIBLE; 825 } 826 827 pixel_t d(mDstPixel); 828 switch(opcode) { 829 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break; 830 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break; 831 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break; 832 case GGL_COPY: break; 833 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break; 834 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break; 835 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break; 836 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break; 837 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg); 838 MVN(AL, 0, pixel.reg, pixel.reg); break; 839 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg); 840 MVN(AL, 0, pixel.reg, pixel.reg); break; 841 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break; 842 case GGL_OR_REVERSE: // s | ~d == ~(~s & d) 843 BIC(AL, 0, pixel.reg, d.reg, s.reg); 844 MVN(AL, 0, pixel.reg, pixel.reg); break; 845 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break; 846 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d) 847 BIC(AL, 0, pixel.reg, s.reg, d.reg); 848 MVN(AL, 0, pixel.reg, pixel.reg); break; 849 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg); 850 MVN(AL, 0, pixel.reg, pixel.reg); break; 851 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break; 852 }; 853} 854 855// --------------------------------------------------------------------------- 856 857static uint32_t find_bottom(uint32_t val) 858{ 859 uint32_t i = 0; 860 while (!(val & (3<<i))) 861 i+= 2; 862 return i; 863} 864 865static void normalize(uint32_t& val, uint32_t& rot) 866{ 867 rot = 0; 868 while (!(val&3) || (val & 0xFC000000)) { 869 uint32_t newval; 870 newval = val >> 2; 871 newval |= (val&3) << 30; 872 val = newval; 873 rot += 2; 874 if (rot == 32) { 875 rot = 0; 876 break; 877 } 878 } 879} 880 881void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits) 882{ 883 uint32_t rot; 884 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 885 mask &= size; 886 887 if (mask == size) { 888 if (d != s) 889 MOV( AL, 0, d, s); 890 return; 891 } 892 893 int negative_logic = !isValidImmediate(mask); 894 if (negative_logic) { 895 mask = ~mask & size; 896 } 897 normalize(mask, rot); 898 899 if (mask) { 900 while (mask) { 901 uint32_t bitpos = find_bottom(mask); 902 int shift = rot + bitpos; 903 uint32_t m = mask & (0xff << bitpos); 904 mask &= ~m; 905 m >>= bitpos; 906 int32_t newMask = (m<<shift) | (m>>(32-shift)); 907 if (!negative_logic) { 908 AND( AL, 0, d, s, imm(newMask) ); 909 } else { 910 BIC( AL, 0, d, s, imm(newMask) ); 911 } 912 s = d; 913 } 914 } else { 915 MOV( AL, 0, d, imm(0)); 916 } 917} 918 919void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs) 920{ 921 if (!mMasking || mAllMasked) { 922 return; 923 } 924 925 comment("color mask"); 926 927 pixel_t fb(mDstPixel); 928 pixel_t s(pixel); 929 if (!(pixel.flags & CORRUPTIBLE)) { 930 pixel.reg = regs.obtain(); 931 pixel.flags |= CORRUPTIBLE; 932 } 933 934 int mask = 0; 935 for (int i=0 ; i<4 ; i++) { 936 const int component_mask = 1<<i; 937 const int h = fb.format.c[i].h; 938 const int l = fb.format.c[i].l; 939 if (h && (!(mMasking & component_mask))) { 940 mask |= ((1<<(h-l))-1) << l; 941 } 942 } 943 944 // There is no need to clear the masked components of the source 945 // (unless we applied a logic op), because they're already zeroed 946 // by construction (masked components are not computed) 947 948 if (mLogicOp) { 949 const needs_t& needs = mBuilderContext.needs; 950 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 951 if (opcode != GGL_CLEAR) { 952 // clear masked component of source 953 build_and_immediate(pixel.reg, s.reg, mask, fb.size()); 954 s = pixel; 955 } 956 } 957 958 // clear non masked components of destination 959 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size()); 960 961 // or back the channels that were masked 962 if (s.reg == fb.reg) { 963 // this is in fact a MOV 964 if (s.reg == pixel.reg) { 965 // ugh. this in in fact a nop 966 } else { 967 MOV(AL, 0, pixel.reg, fb.reg); 968 } 969 } else { 970 ORR(AL, 0, pixel.reg, s.reg, fb.reg); 971 } 972} 973 974// --------------------------------------------------------------------------- 975 976void GGLAssembler::base_offset( 977 const pointer_t& d, const pointer_t& b, const reg_t& o) 978{ 979 switch (b.size) { 980 case 32: 981 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); 982 break; 983 case 24: 984 if (d.reg == b.reg) { 985 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 986 ADD(AL, 0, d.reg, d.reg, o.reg); 987 } else { 988 ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); 989 ADD(AL, 0, d.reg, d.reg, b.reg); 990 } 991 break; 992 case 16: 993 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 994 break; 995 case 8: 996 ADD(AL, 0, d.reg, b.reg, o.reg); 997 break; 998 } 999} 1000 1001// ---------------------------------------------------------------------------- 1002// cheezy register allocator... 1003// ---------------------------------------------------------------------------- 1004 1005void RegisterAllocator::reset() 1006{ 1007 mRegs.reset(); 1008} 1009 1010int RegisterAllocator::reserveReg(int reg) 1011{ 1012 return mRegs.reserve(reg); 1013} 1014 1015int RegisterAllocator::obtainReg() 1016{ 1017 return mRegs.obtain(); 1018} 1019 1020void RegisterAllocator::recycleReg(int reg) 1021{ 1022 mRegs.recycle(reg); 1023} 1024 1025RegisterAllocator::RegisterFile& RegisterAllocator::registerFile() 1026{ 1027 return mRegs; 1028} 1029 1030// ---------------------------------------------------------------------------- 1031 1032RegisterAllocator::RegisterFile::RegisterFile() 1033 : mRegs(0), mTouched(0), mStatus(0) 1034{ 1035 reserve(ARMAssemblerInterface::SP); 1036 reserve(ARMAssemblerInterface::PC); 1037} 1038 1039RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs) 1040 : mRegs(rhs.mRegs), mTouched(rhs.mTouched) 1041{ 1042} 1043 1044RegisterAllocator::RegisterFile::~RegisterFile() 1045{ 1046} 1047 1048bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const 1049{ 1050 return (mRegs == rhs.mRegs); 1051} 1052 1053void RegisterAllocator::RegisterFile::reset() 1054{ 1055 mRegs = mTouched = mStatus = 0; 1056 reserve(ARMAssemblerInterface::SP); 1057 reserve(ARMAssemblerInterface::PC); 1058} 1059 1060int RegisterAllocator::RegisterFile::reserve(int reg) 1061{ 1062 LOG_ALWAYS_FATAL_IF(isUsed(reg), 1063 "reserving register %d, but already in use", 1064 reg); 1065 mRegs |= (1<<reg); 1066 mTouched |= mRegs; 1067 return reg; 1068} 1069 1070void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) 1071{ 1072 mRegs |= regMask; 1073 mTouched |= regMask; 1074} 1075 1076int RegisterAllocator::RegisterFile::isUsed(int reg) const 1077{ 1078 LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg); 1079 return mRegs & (1<<reg); 1080} 1081 1082int RegisterAllocator::RegisterFile::obtain() 1083{ 1084 const char priorityList[14] = { 0, 1, 2, 3, 1085 12, 14, 4, 5, 1086 6, 7, 8, 9, 1087 10, 11 }; 1088 const int nbreg = sizeof(priorityList); 1089 int i, r; 1090 for (i=0 ; i<nbreg ; i++) { 1091 r = priorityList[i]; 1092 if (!isUsed(r)) { 1093 break; 1094 } 1095 } 1096 // this is not an error anymore because, we'll try again with 1097 // a lower optimization level. 1098 //LOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n"); 1099 if (i >= nbreg) { 1100 mStatus |= OUT_OF_REGISTERS; 1101 // we return SP so we can more easily debug things 1102 // the code will never be run anyway. 1103 return ARMAssemblerInterface::SP; 1104 } 1105 reserve(r); 1106 return r; 1107} 1108 1109bool RegisterAllocator::RegisterFile::hasFreeRegs() const 1110{ 1111 return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true; 1112} 1113 1114int RegisterAllocator::RegisterFile::countFreeRegs() const 1115{ 1116 int f = ~mRegs & 0xFFFF; 1117 // now count number of 1 1118 f = (f & 0x5555) + ((f>>1) & 0x5555); 1119 f = (f & 0x3333) + ((f>>2) & 0x3333); 1120 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F); 1121 f = (f & 0x00FF) + ((f>>8) & 0x00FF); 1122 return f; 1123} 1124 1125void RegisterAllocator::RegisterFile::recycle(int reg) 1126{ 1127 LOG_FATAL_IF(!isUsed(reg), 1128 "recycling unallocated register %d", 1129 reg); 1130 mRegs &= ~(1<<reg); 1131} 1132 1133void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) 1134{ 1135 LOG_FATAL_IF((mRegs & regMask)!=regMask, 1136 "recycling unallocated registers " 1137 "(recycle=%08x, allocated=%08x, unallocated=%08x)", 1138 regMask, mRegs, mRegs®Mask); 1139 mRegs &= ~regMask; 1140} 1141 1142uint32_t RegisterAllocator::RegisterFile::touched() const 1143{ 1144 return mTouched; 1145} 1146 1147// ---------------------------------------------------------------------------- 1148 1149}; // namespace android 1150 1151