GGLAssembler.cpp revision 4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53
1/* libs/pixelflinger/codeflinger/GGLAssembler.cpp 2** 3** Copyright 2006, The Android Open Source Project 4** 5** Licensed under the Apache License, Version 2.0 (the "License"); 6** you may not use this file except in compliance with the License. 7** You may obtain a copy of the License at 8** 9** http://www.apache.org/licenses/LICENSE-2.0 10** 11** Unless required by applicable law or agreed to in writing, software 12** distributed under the License is distributed on an "AS IS" BASIS, 13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14** See the License for the specific language governing permissions and 15** limitations under the License. 16*/ 17 18#define LOG_TAG "GGLAssembler" 19 20#include <assert.h> 21#include <stdint.h> 22#include <stdlib.h> 23#include <stdio.h> 24#include <sys/types.h> 25#include <cutils/log.h> 26 27#include "codeflinger/GGLAssembler.h" 28 29namespace android { 30 31// ---------------------------------------------------------------------------- 32 33GGLAssembler::GGLAssembler(ARMAssemblerInterface* target) 34 : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7) 35{ 36} 37 38GGLAssembler::~GGLAssembler() 39{ 40} 41 42void GGLAssembler::prolog() 43{ 44 ARMAssemblerProxy::prolog(); 45} 46 47void GGLAssembler::epilog(uint32_t touched) 48{ 49 ARMAssemblerProxy::epilog(touched); 50} 51 52void GGLAssembler::reset(int opt_level) 53{ 54 ARMAssemblerProxy::reset(); 55 RegisterAllocator::reset(); 56 mOptLevel = opt_level; 57} 58 59// --------------------------------------------------------------------------- 60 61int GGLAssembler::scanline(const needs_t& needs, context_t const* c) 62{ 63 int err = 0; 64 int opt_level = mOptLevel; 65 while (opt_level >= 0) { 66 reset(opt_level); 67 err = scanline_core(needs, c); 68 if (err == 0) 69 break; 70 opt_level--; 71 } 72 73 // XXX: in theory, pcForLabel is not valid before generate() 74 uint32_t* fragment_start_pc = pcForLabel("fragment_loop"); 75 uint32_t* fragment_end_pc = pcForLabel("epilog"); 76 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc); 77 78 // build a name for our pipeline 79 char name[64]; 80 sprintf(name, 81 "scanline__%08X:%08X_%08X_%08X [%3d ipp]", 82 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops); 83 84 if (err) { 85 LOGE("Error while generating ""%s""\n", name); 86 disassemble(name); 87 return -1; 88 } 89 90 return generate(name); 91} 92 93int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) 94{ 95 int64_t duration = ggl_system_time(); 96 97 mBlendFactorCached = 0; 98 mBlending = 0; 99 mMasking = 0; 100 mAA = GGL_READ_NEEDS(P_AA, needs.p); 101 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p); 102 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER; 103 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER; 104 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0; 105 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0; 106 mBuilderContext.needs = needs; 107 mBuilderContext.c = c; 108 mBuilderContext.Rctx = reserveReg(R0); // context always in R0 109 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ]; 110 111 // ------------------------------------------------------------------------ 112 113 decodeLogicOpNeeds(needs); 114 115 decodeTMUNeeds(needs, c); 116 117 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n)); 118 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n)); 119 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n)); 120 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n)); 121 122 if (!mCbFormat.c[GGLFormat::ALPHA].h) { 123 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) || 124 (mBlendSrc == GGL_DST_ALPHA)) { 125 mBlendSrc = GGL_ONE; 126 } 127 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) || 128 (mBlendSrcA == GGL_DST_ALPHA)) { 129 mBlendSrcA = GGL_ONE; 130 } 131 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) || 132 (mBlendDst == GGL_DST_ALPHA)) { 133 mBlendDst = GGL_ONE; 134 } 135 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) || 136 (mBlendDstA == GGL_DST_ALPHA)) { 137 mBlendDstA = GGL_ONE; 138 } 139 } 140 141 // if we need the framebuffer, read it now 142 const int blending = blending_codes(mBlendSrc, mBlendDst) | 143 blending_codes(mBlendSrcA, mBlendDstA); 144 145 // XXX: handle special cases, destination not modified... 146 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 147 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) { 148 // Destination unmodified (beware of logic ops) 149 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 150 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) { 151 // Destination is zero (beware of logic ops) 152 } 153 154 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n); 155 for (int i=0 ; i<4 ; i++) { 156 const int mask = 1<<i; 157 component_info_t& info = mInfo[i]; 158 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 159 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 160 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA) 161 fs = GGL_ONE; 162 info.masked = !!(masking & mask); 163 info.inDest = !info.masked && mCbFormat.c[i].h && 164 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp)); 165 if (mCbFormat.components >= GGL_LUMINANCE && 166 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) { 167 info.inDest = false; 168 } 169 info.needed = (i==GGLFormat::ALPHA) && 170 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS); 171 info.replaced = !!(mTextureMachine.replaced & mask); 172 info.iterated = (!info.replaced && (info.inDest || info.needed)); 173 info.smooth = mSmooth && info.iterated; 174 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA); 175 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 176 177 mBlending |= (info.blend ? mask : 0); 178 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0; 179 } 180 181 182 fragment_parts_t parts; 183 184 // ------------------------------------------------------------------------ 185 prolog(); 186 // ------------------------------------------------------------------------ 187 188 build_scanline_prolog(parts, needs); 189 190 if (registerFile().status()) 191 return registerFile().status(); 192 193 // ------------------------------------------------------------------------ 194 label("fragment_loop"); 195 // ------------------------------------------------------------------------ 196 { 197 Scratch regs(registerFile()); 198 199 if (mDithering) { 200 // update the dither index. 201 MOV(AL, 0, parts.count.reg, 202 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT)); 203 ADD(AL, 0, parts.count.reg, parts.count.reg, 204 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT))); 205 MOV(AL, 0, parts.count.reg, 206 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT)); 207 } 208 209 // XXX: could we do an early alpha-test here in some cases? 210 // It would probaly be used only with smooth-alpha and no texture 211 // (or no alpha component in the texture). 212 213 // Early z-test 214 if (mAlphaTest==GGL_ALWAYS) { 215 build_depth_test(parts, Z_TEST|Z_WRITE); 216 } else { 217 // we cannot do the z-write here, because 218 // it might be killed by the alpha-test later 219 build_depth_test(parts, Z_TEST); 220 } 221 222 { // texture coordinates 223 Scratch scratches(registerFile()); 224 225 // texel generation 226 build_textures(parts, regs); 227 } 228 229 if ((blending & (FACTOR_DST|BLEND_DST)) || mMasking || 230 (mLogicOp & LOGIC_OP_DST)) { 231 // blending / logic_op / masking need the framebuffer 232 mDstPixel.setTo(regs.obtain(), &mCbFormat); 233 234 // load the framebuffer pixel 235 comment("fetch color-buffer"); 236 load(parts.cbPtr, mDstPixel); 237 } 238 239 if (registerFile().status()) 240 return registerFile().status(); 241 242 pixel_t pixel; 243 int directTex = mTextureMachine.directTexture; 244 if (directTex | parts.packed) { 245 // note: we can't have both here 246 // iterated color or direct texture 247 pixel = directTex ? parts.texel[directTex-1] : parts.iterated; 248 pixel.flags &= ~CORRUPTIBLE; 249 } else { 250 if (mDithering) { 251 const int ctxtReg = mBuilderContext.Rctx; 252 const int mask = GGL_DITHER_SIZE-1; 253 parts.dither = reg_t(regs.obtain()); 254 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask)); 255 ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg); 256 LDRB(AL, parts.dither.reg, parts.dither.reg, 257 immed12_pre(GGL_OFFSETOF(ditherMatrix))); 258 } 259 260 // allocate a register for the resulting pixel 261 pixel.setTo(regs.obtain(), &mCbFormat, FIRST); 262 263 build_component(pixel, parts, GGLFormat::ALPHA, regs); 264 265 if (mAlphaTest!=GGL_ALWAYS) { 266 // only handle the z-write part here. We know z-test 267 // was successful, as well as alpha-test. 268 build_depth_test(parts, Z_WRITE); 269 } 270 271 build_component(pixel, parts, GGLFormat::RED, regs); 272 build_component(pixel, parts, GGLFormat::GREEN, regs); 273 build_component(pixel, parts, GGLFormat::BLUE, regs); 274 275 pixel.flags |= CORRUPTIBLE; 276 } 277 278 if (registerFile().status()) 279 return registerFile().status(); 280 281 if (pixel.reg == -1) { 282 // be defensive here. if we're here it's probably 283 // that this whole fragment is a no-op. 284 pixel = mDstPixel; 285 } 286 287 // logic operation 288 build_logic_op(pixel, regs); 289 290 // masking 291 build_masking(pixel, regs); 292 293 comment("store"); 294 store(parts.cbPtr, pixel, WRITE_BACK); 295 } 296 297 if (registerFile().status()) 298 return registerFile().status(); 299 300 // update the iterated color... 301 if (parts.reload != 3) { 302 build_smooth_shade(parts); 303 } 304 305 // update iterated z 306 build_iterate_z(parts); 307 308 // update iterated fog 309 build_iterate_f(parts); 310 311 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 312 B(PL, "fragment_loop"); 313 label("epilog"); 314 epilog(registerFile().touched()); 315 316 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) { 317 if (mDepthTest!=GGL_ALWAYS) { 318 label("discard_before_textures"); 319 build_iterate_texture_coordinates(parts); 320 } 321 label("discard_after_textures"); 322 build_smooth_shade(parts); 323 build_iterate_z(parts); 324 build_iterate_f(parts); 325 ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); 326 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 327 B(PL, "fragment_loop"); 328 epilog(registerFile().touched()); 329 } 330 331 return registerFile().status(); 332} 333 334// --------------------------------------------------------------------------- 335 336void GGLAssembler::build_scanline_prolog( 337 fragment_parts_t& parts, const needs_t& needs) 338{ 339 Scratch scratches(registerFile()); 340 int Rctx = mBuilderContext.Rctx; 341 342 // compute count 343 comment("compute ct (# of pixels to process)"); 344 parts.count.setTo(obtainReg()); 345 int Rx = scratches.obtain(); 346 int Ry = scratches.obtain(); 347 CONTEXT_LOAD(Rx, iterators.xl); 348 CONTEXT_LOAD(parts.count.reg, iterators.xr); 349 CONTEXT_LOAD(Ry, iterators.y); 350 351 // parts.count = iterators.xr - Rx 352 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx); 353 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1)); 354 355 if (mDithering) { 356 // parts.count.reg = 0xNNNNXXDD 357 // NNNN = count-1 358 // DD = dither offset 359 // XX = 0xxxxxxx (x = garbage) 360 Scratch scratches(registerFile()); 361 int tx = scratches.obtain(); 362 int ty = scratches.obtain(); 363 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK)); 364 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK)); 365 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT)); 366 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16)); 367 } else { 368 // parts.count.reg = 0xNNNN0000 369 // NNNN = count-1 370 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16)); 371 } 372 373 // compute dst ptr 374 comment("compute color-buffer pointer"); 375 const int cb_bits = mCbFormat.size*8; 376 int Rs = scratches.obtain(); 377 parts.cbPtr.setTo(obtainReg(), cb_bits); 378 CONTEXT_LOAD(Rs, state.buffers.color.stride); 379 CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data); 380 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs 381 base_offset(parts.cbPtr, parts.cbPtr, Rs); 382 scratches.recycle(Rs); 383 384 // init fog 385 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p); 386 if (need_fog) { 387 comment("compute initial fog coordinate"); 388 Scratch scratches(registerFile()); 389 int dfdx = scratches.obtain(); 390 int ydfdy = scratches.obtain(); 391 int f = ydfdy; 392 CONTEXT_LOAD(dfdx, generated_vars.dfdx); 393 CONTEXT_LOAD(ydfdy, iterators.ydfdy); 394 MLA(AL, 0, f, Rx, dfdx, ydfdy); 395 CONTEXT_STORE(f, generated_vars.f); 396 } 397 398 // init Z coordinate 399 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 400 parts.z = reg_t(obtainReg()); 401 comment("compute initial Z coordinate"); 402 Scratch scratches(registerFile()); 403 int dzdx = scratches.obtain(); 404 int ydzdy = parts.z.reg; 405 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point 406 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point 407 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy); 408 409 // we're going to index zbase of parts.count 410 // zbase = base + (xl-count + stride*y)*2 411 int Rs = dzdx; 412 int zbase = scratches.obtain(); 413 CONTEXT_LOAD(Rs, state.buffers.depth.stride); 414 CONTEXT_LOAD(zbase, state.buffers.depth.data); 415 SMLABB(AL, Rs, Ry, Rs, Rx); 416 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16)); 417 ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); 418 CONTEXT_STORE(zbase, generated_vars.zbase); 419 } 420 421 // init texture coordinates 422 init_textures(parts.coords, reg_t(Rx), reg_t(Ry)); 423 scratches.recycle(Ry); 424 425 // iterated color 426 init_iterated_color(parts, reg_t(Rx)); 427 428 // init coverage factor application (anti-aliasing) 429 if (mAA) { 430 parts.covPtr.setTo(obtainReg(), 16); 431 CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage); 432 ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); 433 } 434} 435 436// --------------------------------------------------------------------------- 437 438void GGLAssembler::build_component( pixel_t& pixel, 439 const fragment_parts_t& parts, 440 int component, 441 Scratch& regs) 442{ 443 static char const * comments[] = {"alpha", "red", "green", "blue"}; 444 comment(comments[component]); 445 446 // local register file 447 Scratch scratches(registerFile()); 448 const int dst_component_size = pixel.component_size(component); 449 450 component_t temp(-1); 451 build_incoming_component( temp, dst_component_size, 452 parts, component, scratches, regs); 453 454 if (mInfo[component].inDest) { 455 456 // blending... 457 build_blending( temp, mDstPixel, component, scratches ); 458 459 // downshift component and rebuild pixel... 460 downshift(pixel, component, temp, parts.dither); 461 } 462} 463 464void GGLAssembler::build_incoming_component( 465 component_t& temp, 466 int dst_size, 467 const fragment_parts_t& parts, 468 int component, 469 Scratch& scratches, 470 Scratch& global_regs) 471{ 472 const uint32_t component_mask = 1<<component; 473 474 // Figure out what we need for the blending stage... 475 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 476 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 477 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) { 478 fs = GGL_ONE; 479 } 480 481 // Figure out what we need to extract and for what reason 482 const int blending = blending_codes(fs, fd); 483 484 // Are we actually going to blend? 485 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 486 487 // expand the source if the destination has more bits 488 int need_expander = false; 489 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) { 490 texture_unit_t& tmu = mTextureMachine.tmu[i]; 491 if ((tmu.format_idx) && 492 (parts.texel[i].component_size(component) < dst_size)) { 493 need_expander = true; 494 } 495 } 496 497 // do we need to extract this component? 498 const bool multiTexture = mTextureMachine.activeUnits > 1; 499 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) && 500 (isAlphaSourceNeeded()); 501 int need_extract = mInfo[component].needed; 502 if (mInfo[component].inDest) 503 { 504 need_extract |= ((need_blending ? 505 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander)); 506 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced); 507 need_extract |= mInfo[component].smooth; 508 need_extract |= mInfo[component].fog; 509 need_extract |= mDithering; 510 need_extract |= multiTexture; 511 } 512 513 if (need_extract) { 514 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches; 515 component_t fragment; 516 517 // iterated color 518 build_iterated_color(fragment, parts, component, regs); 519 520 // texture environement (decal, modulate, replace) 521 build_texture_environment(fragment, parts, component, regs); 522 523 // expand the source if the destination has more bits 524 if (need_expander && (fragment.size() < dst_size)) { 525 // we're here only if we fetched a texel 526 // (so we know for sure fragment is CORRUPTIBLE) 527 expand(fragment, fragment, dst_size); 528 } 529 530 // We have a few specific things to do for the alpha-channel 531 if ((component==GGLFormat::ALPHA) && 532 (mInfo[component].needed || fragment.size()<dst_size)) 533 { 534 // convert to integer_t first and make sure 535 // we don't corrupt a needed register 536 if (fragment.l) { 537 component_t incoming(fragment); 538 modify(fragment, regs); 539 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l)); 540 fragment.h -= fragment.l; 541 fragment.l = 0; 542 } 543 544 // coverage factor application 545 build_coverage_application(fragment, parts, regs); 546 547 // alpha-test 548 build_alpha_test(fragment, parts); 549 550 if (blend_needs_alpha_source) { 551 // We keep only 8 bits for the blending stage 552 const int shift = fragment.h <= 8 ? 0 : fragment.h-8; 553 if (fragment.flags & CORRUPTIBLE) { 554 fragment.flags &= ~CORRUPTIBLE; 555 mAlphaSource.setTo(fragment.reg, 556 fragment.size(), fragment.flags); 557 if (shift) { 558 MOV(AL, 0, mAlphaSource.reg, 559 reg_imm(mAlphaSource.reg, LSR, shift)); 560 } 561 } else { 562 // XXX: it would better to do this in build_blend_factor() 563 // so we can avoid the extra MOV below. 564 mAlphaSource.setTo(regs.obtain(), 565 fragment.size(), CORRUPTIBLE); 566 if (shift) { 567 MOV(AL, 0, mAlphaSource.reg, 568 reg_imm(fragment.reg, LSR, shift)); 569 } else { 570 MOV(AL, 0, mAlphaSource.reg, fragment.reg); 571 } 572 } 573 mAlphaSource.s -= shift; 574 } 575 } 576 577 // fog... 578 build_fog( fragment, component, regs ); 579 580 temp = fragment; 581 } else { 582 if (mInfo[component].inDest) { 583 // extraction not needed and replace 584 // we just select the right component 585 if ((mTextureMachine.replaced & component_mask) == 0) { 586 // component wasn't replaced, so use it! 587 temp = component_t(parts.iterated, component); 588 } 589 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 590 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 591 if ((tmu.mask & component_mask) && 592 ((tmu.replaced & component_mask) == 0)) { 593 temp = component_t(parts.texel[i], component); 594 } 595 } 596 } 597 } 598} 599 600bool GGLAssembler::isAlphaSourceNeeded() const 601{ 602 // XXX: also needed for alpha-test 603 const int bs = mBlendSrc; 604 const int bd = mBlendDst; 605 return bs==GGL_SRC_ALPHA_SATURATE || 606 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA || 607 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ; 608} 609 610// --------------------------------------------------------------------------- 611 612void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts) 613{ 614 if (mSmooth && !parts.iterated_packed) { 615 // update the iterated color in a pipelined way... 616 comment("update iterated color"); 617 Scratch scratches(registerFile()); 618 619 const int reload = parts.reload; 620 for (int i=0 ; i<4 ; i++) { 621 if (!mInfo[i].iterated) 622 continue; 623 624 int c = parts.argb[i].reg; 625 int dx = parts.argb_dx[i].reg; 626 627 if (reload & 1) { 628 c = scratches.obtain(); 629 CONTEXT_LOAD(c, generated_vars.argb[i].c); 630 } 631 if (reload & 2) { 632 dx = scratches.obtain(); 633 CONTEXT_LOAD(dx, generated_vars.argb[i].dx); 634 } 635 636 if (mSmooth) { 637 ADD(AL, 0, c, c, dx); 638 } 639 640 if (reload & 1) { 641 CONTEXT_STORE(c, generated_vars.argb[i].c); 642 scratches.recycle(c); 643 } 644 if (reload & 2) { 645 scratches.recycle(dx); 646 } 647 } 648 } 649} 650 651// --------------------------------------------------------------------------- 652 653void GGLAssembler::build_coverage_application(component_t& fragment, 654 const fragment_parts_t& parts, Scratch& regs) 655{ 656 // here fragment.l is guarenteed to be 0 657 if (mAA) { 658 // coverages are 1.15 fixed-point numbers 659 comment("coverage application"); 660 661 component_t incoming(fragment); 662 modify(fragment, regs); 663 664 Scratch scratches(registerFile()); 665 int cf = scratches.obtain(); 666 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2)); 667 if (fragment.h > 31) { 668 fragment.h--; 669 SMULWB(AL, fragment.reg, incoming.reg, cf); 670 } else { 671 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1)); 672 SMULWB(AL, fragment.reg, fragment.reg, cf); 673 } 674 } 675} 676 677// --------------------------------------------------------------------------- 678 679void GGLAssembler::build_alpha_test(component_t& fragment, 680 const fragment_parts_t& parts) 681{ 682 if (mAlphaTest != GGL_ALWAYS) { 683 comment("Alpha Test"); 684 Scratch scratches(registerFile()); 685 int ref = scratches.obtain(); 686 const int shift = GGL_COLOR_BITS-fragment.size(); 687 CONTEXT_LOAD(ref, state.alpha_test.ref); 688 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift)); 689 else CMP(AL, fragment.reg, ref); 690 int cc = NV; 691 switch (mAlphaTest) { 692 case GGL_NEVER: cc = NV; break; 693 case GGL_LESS: cc = LT; break; 694 case GGL_EQUAL: cc = EQ; break; 695 case GGL_LEQUAL: cc = LS; break; 696 case GGL_GREATER: cc = HI; break; 697 case GGL_NOTEQUAL: cc = NE; break; 698 case GGL_GEQUAL: cc = HS; break; 699 } 700 B(cc^1, "discard_after_textures"); 701 } 702} 703 704// --------------------------------------------------------------------------- 705 706void GGLAssembler::build_depth_test( 707 const fragment_parts_t& parts, uint32_t mask) 708{ 709 mask &= Z_TEST|Z_WRITE; 710 const needs_t& needs = mBuilderContext.needs; 711 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p); 712 Scratch scratches(registerFile()); 713 714 if (mDepthTest != GGL_ALWAYS || zmask) { 715 int cc=AL, ic=AL; 716 switch (mDepthTest) { 717 case GGL_LESS: ic = HI; break; 718 case GGL_EQUAL: ic = EQ; break; 719 case GGL_LEQUAL: ic = HS; break; 720 case GGL_GREATER: ic = LT; break; 721 case GGL_NOTEQUAL: ic = NE; break; 722 case GGL_GEQUAL: ic = LS; break; 723 case GGL_NEVER: 724 // this never happens, because it's taken care of when 725 // computing the needs. but we keep it for completness. 726 comment("Depth Test (NEVER)"); 727 B(AL, "discard_before_textures"); 728 return; 729 case GGL_ALWAYS: 730 // we're here because zmask is enabled 731 mask &= ~Z_TEST; // test always passes. 732 break; 733 } 734 735 // inverse the condition 736 cc = ic^1; 737 738 if ((mask & Z_WRITE) && !zmask) { 739 mask &= ~Z_WRITE; 740 } 741 742 if (!mask) 743 return; 744 745 comment("Depth Test"); 746 747 int zbase = scratches.obtain(); 748 int depth = scratches.obtain(); 749 int z = parts.z.reg; 750 751 CONTEXT_LOAD(zbase, generated_vars.zbase); // stall 752 SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); 753 // above does zbase = zbase + ((count >> 16) << 1) 754 755 if (mask & Z_TEST) { 756 LDRH(AL, depth, zbase); // stall 757 CMP(AL, depth, reg_imm(z, LSR, 16)); 758 B(cc, "discard_before_textures"); 759 } 760 if (mask & Z_WRITE) { 761 if (mask == Z_WRITE) { 762 // only z-write asked, cc is meaningless 763 ic = AL; 764 } 765 MOV(AL, 0, depth, reg_imm(z, LSR, 16)); 766 STRH(ic, depth, zbase); 767 } 768 } 769} 770 771void GGLAssembler::build_iterate_z(const fragment_parts_t& parts) 772{ 773 const needs_t& needs = mBuilderContext.needs; 774 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 775 Scratch scratches(registerFile()); 776 int dzdx = scratches.obtain(); 777 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall 778 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx); 779 } 780} 781 782void GGLAssembler::build_iterate_f(const fragment_parts_t& parts) 783{ 784 const needs_t& needs = mBuilderContext.needs; 785 if (GGL_READ_NEEDS(P_FOG, needs.p)) { 786 Scratch scratches(registerFile()); 787 int dfdx = scratches.obtain(); 788 int f = scratches.obtain(); 789 CONTEXT_LOAD(f, generated_vars.f); 790 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall 791 ADD(AL, 0, f, f, dfdx); 792 CONTEXT_STORE(f, generated_vars.f); 793 } 794} 795 796// --------------------------------------------------------------------------- 797 798void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs) 799{ 800 const needs_t& needs = mBuilderContext.needs; 801 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 802 if (opcode == GGL_COPY) 803 return; 804 805 comment("logic operation"); 806 807 pixel_t s(pixel); 808 if (!(pixel.flags & CORRUPTIBLE)) { 809 pixel.reg = regs.obtain(); 810 pixel.flags |= CORRUPTIBLE; 811 } 812 813 pixel_t d(mDstPixel); 814 switch(opcode) { 815 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break; 816 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break; 817 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break; 818 case GGL_COPY: break; 819 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break; 820 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break; 821 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break; 822 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break; 823 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg); 824 MVN(AL, 0, pixel.reg, pixel.reg); break; 825 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg); 826 MVN(AL, 0, pixel.reg, pixel.reg); break; 827 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break; 828 case GGL_OR_REVERSE: // s | ~d == ~(~s & d) 829 BIC(AL, 0, pixel.reg, d.reg, s.reg); 830 MVN(AL, 0, pixel.reg, pixel.reg); break; 831 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break; 832 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d) 833 BIC(AL, 0, pixel.reg, s.reg, d.reg); 834 MVN(AL, 0, pixel.reg, pixel.reg); break; 835 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg); 836 MVN(AL, 0, pixel.reg, pixel.reg); break; 837 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break; 838 }; 839} 840 841// --------------------------------------------------------------------------- 842 843static uint32_t find_bottom(uint32_t val) 844{ 845 uint32_t i = 0; 846 while (!(val & (3<<i))) 847 i+= 2; 848 return i; 849} 850 851static void normalize(uint32_t& val, uint32_t& rot) 852{ 853 rot = 0; 854 while (!(val&3) || (val & 0xFC000000)) { 855 uint32_t newval; 856 newval = val >> 2; 857 newval |= (val&3) << 30; 858 val = newval; 859 rot += 2; 860 if (rot == 32) { 861 rot = 0; 862 break; 863 } 864 } 865} 866 867void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits) 868{ 869 uint32_t rot; 870 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 871 mask &= size; 872 873 if (mask == size) { 874 if (d != s) 875 MOV( AL, 0, d, s); 876 return; 877 } 878 879 int negative_logic = !isValidImmediate(mask); 880 if (negative_logic) { 881 mask = ~mask & size; 882 } 883 normalize(mask, rot); 884 885 if (mask) { 886 while (mask) { 887 uint32_t bitpos = find_bottom(mask); 888 int shift = rot + bitpos; 889 uint32_t m = mask & (0xff << bitpos); 890 mask &= ~m; 891 m >>= bitpos; 892 int32_t newMask = (m<<shift) | (m>>(32-shift)); 893 if (!negative_logic) { 894 AND( AL, 0, d, s, imm(newMask) ); 895 } else { 896 BIC( AL, 0, d, s, imm(newMask) ); 897 } 898 s = d; 899 } 900 } else { 901 MOV( AL, 0, d, imm(0)); 902 } 903} 904 905void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs) 906{ 907 if (!mMasking) 908 return; 909 910 comment("color mask"); 911 912 pixel_t fb(mDstPixel); 913 pixel_t s(pixel); 914 if (!(pixel.flags & CORRUPTIBLE)) { 915 pixel.reg = regs.obtain(); 916 pixel.flags |= CORRUPTIBLE; 917 } 918 919 int mask = 0; 920 for (int i=0 ; i<4 ; i++) { 921 const int component_mask = 1<<i; 922 const int h = fb.format.c[i].h; 923 const int l = fb.format.c[i].l; 924 if (h && (!(mMasking & component_mask))) { 925 mask |= ((1<<(h-l))-1) << l; 926 } 927 } 928 929 // There is no need to clear the masked components of the source 930 // (unless we applied a logic op), because they're already zeroed 931 // by contruction (masked components are not computed) 932 933 if (mLogicOp) { 934 const needs_t& needs = mBuilderContext.needs; 935 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 936 if (opcode != GGL_CLEAR) { 937 // clear masked component of source 938 build_and_immediate(pixel.reg, s.reg, mask, fb.size()); 939 s = pixel; 940 } 941 } 942 943 // clear non masked components of destination 944 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size()); 945 946 // or back the channels that were masked 947 if (s.reg == fb.reg) { 948 // this is in fact a MOV 949 if (s.reg == pixel.reg) { 950 // ugh. this in in fact a nop 951 } else { 952 MOV(AL, 0, pixel.reg, fb.reg); 953 } 954 } else { 955 ORR(AL, 0, pixel.reg, s.reg, fb.reg); 956 } 957} 958 959// --------------------------------------------------------------------------- 960 961void GGLAssembler::base_offset( 962 const pointer_t& d, const pointer_t& b, const reg_t& o) 963{ 964 switch (b.size) { 965 case 32: 966 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); 967 break; 968 case 24: 969 if (d.reg == b.reg) { 970 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 971 ADD(AL, 0, d.reg, d.reg, o.reg); 972 } else { 973 ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); 974 ADD(AL, 0, d.reg, d.reg, b.reg); 975 } 976 break; 977 case 16: 978 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 979 break; 980 case 8: 981 ADD(AL, 0, d.reg, b.reg, o.reg); 982 break; 983 } 984} 985 986// ---------------------------------------------------------------------------- 987// cheezy register allocator... 988// ---------------------------------------------------------------------------- 989 990void RegisterAllocator::reset() 991{ 992 mRegs.reset(); 993} 994 995int RegisterAllocator::reserveReg(int reg) 996{ 997 return mRegs.reserve(reg); 998} 999 1000int RegisterAllocator::obtainReg() 1001{ 1002 return mRegs.obtain(); 1003} 1004 1005void RegisterAllocator::recycleReg(int reg) 1006{ 1007 mRegs.recycle(reg); 1008} 1009 1010RegisterAllocator::RegisterFile& RegisterAllocator::registerFile() 1011{ 1012 return mRegs; 1013} 1014 1015// ---------------------------------------------------------------------------- 1016 1017RegisterAllocator::RegisterFile::RegisterFile() 1018 : mRegs(0), mTouched(0), mStatus(0) 1019{ 1020 reserve(ARMAssemblerInterface::SP); 1021 reserve(ARMAssemblerInterface::PC); 1022} 1023 1024RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs) 1025 : mRegs(rhs.mRegs), mTouched(rhs.mTouched) 1026{ 1027} 1028 1029RegisterAllocator::RegisterFile::~RegisterFile() 1030{ 1031} 1032 1033bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const 1034{ 1035 return (mRegs == rhs.mRegs); 1036} 1037 1038void RegisterAllocator::RegisterFile::reset() 1039{ 1040 mRegs = mTouched = mStatus = 0; 1041 reserve(ARMAssemblerInterface::SP); 1042 reserve(ARMAssemblerInterface::PC); 1043} 1044 1045int RegisterAllocator::RegisterFile::reserve(int reg) 1046{ 1047 LOG_ALWAYS_FATAL_IF(isUsed(reg), 1048 "reserving register %d, but already in use", 1049 reg); 1050 mRegs |= (1<<reg); 1051 mTouched |= mRegs; 1052 return reg; 1053} 1054 1055void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) 1056{ 1057 mRegs |= regMask; 1058 mTouched |= regMask; 1059} 1060 1061int RegisterAllocator::RegisterFile::isUsed(int reg) const 1062{ 1063 LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg); 1064 return mRegs & (1<<reg); 1065} 1066 1067int RegisterAllocator::RegisterFile::obtain() 1068{ 1069 const char priorityList[14] = { 0, 1, 2, 3, 1070 12, 14, 4, 5, 1071 6, 7, 8, 9, 1072 10, 11 }; 1073 const int nbreg = sizeof(priorityList); 1074 int i, r; 1075 for (i=0 ; i<nbreg ; i++) { 1076 r = priorityList[i]; 1077 if (!isUsed(r)) { 1078 break; 1079 } 1080 } 1081 // this is not an error anymore because, we'll try again with 1082 // a lower optimization level. 1083 //LOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n"); 1084 if (i >= nbreg) { 1085 mStatus |= OUT_OF_REGISTERS; 1086 // we return SP so we can more easily debug things 1087 // the code will never be run anyway. 1088 return ARMAssemblerInterface::SP; 1089 } 1090 reserve(r); 1091 return r; 1092} 1093 1094bool RegisterAllocator::RegisterFile::hasFreeRegs() const 1095{ 1096 return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true; 1097} 1098 1099int RegisterAllocator::RegisterFile::countFreeRegs() const 1100{ 1101 int f = ~mRegs & 0xFFFF; 1102 // now count number of 1 1103 f = (f & 0x5555) + ((f>>1) & 0x5555); 1104 f = (f & 0x3333) + ((f>>2) & 0x3333); 1105 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F); 1106 f = (f & 0x00FF) + ((f>>8) & 0x00FF); 1107 return f; 1108} 1109 1110void RegisterAllocator::RegisterFile::recycle(int reg) 1111{ 1112 LOG_FATAL_IF(!isUsed(reg), 1113 "recycling unallocated register %d", 1114 reg); 1115 mRegs &= ~(1<<reg); 1116} 1117 1118void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) 1119{ 1120 LOG_FATAL_IF((mRegs & regMask)!=regMask, 1121 "recycling unallocated registers " 1122 "(recycle=%08x, allocated=%08x, unallocated=%08x)", 1123 regMask, mRegs, mRegs®Mask); 1124 mRegs &= ~regMask; 1125} 1126 1127uint32_t RegisterAllocator::RegisterFile::touched() const 1128{ 1129 return mTouched; 1130} 1131 1132// ---------------------------------------------------------------------------- 1133 1134}; // namespace android 1135 1136