texturing.cpp revision 35237d135807af84bf9b0e5b8d7f8633e58db6f5
1/* libs/pixelflinger/codeflinger/texturing.cpp 2** 3** Copyright 2006, The Android Open Source Project 4** 5** Licensed under the Apache License, Version 2.0 (the "License"); 6** you may not use this file except in compliance with the License. 7** You may obtain a copy of the License at 8** 9** http://www.apache.org/licenses/LICENSE-2.0 10** 11** Unless required by applicable law or agreed to in writing, software 12** distributed under the License is distributed on an "AS IS" BASIS, 13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14** See the License for the specific language governing permissions and 15** limitations under the License. 16*/ 17 18#include <assert.h> 19#include <stdint.h> 20#include <stdlib.h> 21#include <stdio.h> 22#include <sys/types.h> 23 24#include <cutils/log.h> 25 26#include "codeflinger/GGLAssembler.h" 27 28 29namespace android { 30 31// --------------------------------------------------------------------------- 32 33// iterators are initialized like this: 34// (intToFixedCenter(x) * dx)>>16 + x0 35// ((x<<16 + 0x8000) * dx)>>16 + x0 36// ((x<<16)*dx + (0x8000*dx))>>16 + x0 37// ( (x*dx) + dx>>1 ) + x0 38// (x*dx) + (dx>>1 + x0) 39 40void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x) 41{ 42 context_t const* c = mBuilderContext.c; 43 const needs_t& needs = mBuilderContext.needs; 44 45 if (mSmooth) { 46 // NOTE: we could take this case in the mDithering + !mSmooth case, 47 // but this would use up to 4 more registers for the color components 48 // for only a little added quality. 49 // Currently, this causes the system to run out of registers in 50 // some case (see issue #719496) 51 52 comment("compute initial iterated color (smooth and/or dither case)"); 53 54 parts.iterated_packed = 0; 55 parts.packed = 0; 56 57 // 0x1: color component 58 // 0x2: iterators 59 const int optReload = mOptLevel >> 1; 60 if (optReload >= 3) parts.reload = 0; // reload nothing 61 else if (optReload == 2) parts.reload = 2; // reload iterators 62 else if (optReload == 1) parts.reload = 1; // reload colors 63 else if (optReload <= 0) parts.reload = 3; // reload both 64 65 if (!mSmooth) { 66 // we're not smoothing (just dithering), we never have to 67 // reload the iterators 68 parts.reload &= ~2; 69 } 70 71 Scratch scratches(registerFile()); 72 const int t0 = (parts.reload & 1) ? scratches.obtain() : 0; 73 const int t1 = (parts.reload & 2) ? scratches.obtain() : 0; 74 for (int i=0 ; i<4 ; i++) { 75 if (!mInfo[i].iterated) 76 continue; 77 78 // this component exists in the destination and is not replaced 79 // by a texture unit. 80 const int c = (parts.reload & 1) ? t0 : obtainReg(); 81 if (i==0) CONTEXT_LOAD(c, iterators.ydady); 82 if (i==1) CONTEXT_LOAD(c, iterators.ydrdy); 83 if (i==2) CONTEXT_LOAD(c, iterators.ydgdy); 84 if (i==3) CONTEXT_LOAD(c, iterators.ydbdy); 85 parts.argb[i].reg = c; 86 87 if (mInfo[i].smooth) { 88 parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg(); 89 const int dvdx = parts.argb_dx[i].reg; 90 CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx); 91 MLA(AL, 0, c, x.reg, dvdx, c); 92 93 // adjust the color iterator to make sure it won't overflow 94 if (!mAA) { 95 // this is not needed when we're using anti-aliasing 96 // because we will (have to) clamp the components 97 // anyway. 98 int end = scratches.obtain(); 99 MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16)); 100 MLA(AL, 1, end, dvdx, end, c); 101 SUB(MI, 0, c, c, end); 102 BIC(AL, 0, c, c, reg_imm(c, ASR, 31)); 103 scratches.recycle(end); 104 } 105 } 106 107 if (parts.reload & 1) { 108 CONTEXT_STORE(c, generated_vars.argb[i].c); 109 } 110 } 111 } else { 112 // We're not smoothed, so we can 113 // just use a packed version of the color and extract the 114 // components as needed (or not at all if we don't blend) 115 116 // figure out if we need the iterated color 117 int load = 0; 118 for (int i=0 ; i<4 ; i++) { 119 component_info_t& info = mInfo[i]; 120 if ((info.inDest || info.needed) && !info.replaced) 121 load |= 1; 122 } 123 124 parts.iterated_packed = 1; 125 parts.packed = (!mTextureMachine.mask && !mBlending 126 && !mFog && !mDithering); 127 parts.reload = 0; 128 if (load || parts.packed) { 129 if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) { 130 comment("load initial iterated color (8888 packed)"); 131 parts.iterated.setTo(obtainReg(), 132 &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888])); 133 CONTEXT_LOAD(parts.iterated.reg, packed8888); 134 } else { 135 comment("load initial iterated color (dest format packed)"); 136 137 parts.iterated.setTo(obtainReg(), &mCbFormat); 138 139 // pre-mask the iterated color 140 const int bits = parts.iterated.size(); 141 const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 142 uint32_t mask = 0; 143 if (mMasking) { 144 for (int i=0 ; i<4 ; i++) { 145 const int component_mask = 1<<i; 146 const int h = parts.iterated.format.c[i].h; 147 const int l = parts.iterated.format.c[i].l; 148 if (h && (!(mMasking & component_mask))) { 149 mask |= ((1<<(h-l))-1) << l; 150 } 151 } 152 } 153 154 if (mMasking && ((mask & size)==0)) { 155 // none of the components are present in the mask 156 } else { 157 CONTEXT_LOAD(parts.iterated.reg, packed); 158 if (mCbFormat.size == 1) { 159 AND(AL, 0, parts.iterated.reg, 160 parts.iterated.reg, imm(0xFF)); 161 } else if (mCbFormat.size == 2) { 162 MOV(AL, 0, parts.iterated.reg, 163 reg_imm(parts.iterated.reg, LSR, 16)); 164 } 165 } 166 167 // pre-mask the iterated color 168 if (mMasking) { 169 build_and_immediate(parts.iterated.reg, parts.iterated.reg, 170 mask, bits); 171 } 172 } 173 } 174 } 175} 176 177void GGLAssembler::build_iterated_color( 178 component_t& fragment, 179 const fragment_parts_t& parts, 180 int component, 181 Scratch& regs) 182{ 183 fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE); 184 185 if (!mInfo[component].iterated) 186 return; 187 188 if (parts.iterated_packed) { 189 // iterated colors are packed, extract the one we need 190 extract(fragment, parts.iterated, component); 191 } else { 192 fragment.h = GGL_COLOR_BITS; 193 fragment.l = GGL_COLOR_BITS - 8; 194 fragment.flags |= CLEAR_LO; 195 // iterated colors are held in their own register, 196 // (smooth and/or dithering case) 197 if (parts.reload==3) { 198 // this implies mSmooth 199 Scratch scratches(registerFile()); 200 int dx = scratches.obtain(); 201 CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); 202 CONTEXT_LOAD(dx, generated_vars.argb[component].dx); 203 ADD(AL, 0, dx, fragment.reg, dx); 204 CONTEXT_STORE(dx, generated_vars.argb[component].c); 205 } else if (parts.reload & 1) { 206 CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); 207 } else { 208 // we don't reload, so simply rename the register and mark as 209 // non CORRUPTIBLE so that the texture env or blending code 210 // won't modify this (renamed) register 211 regs.recycle(fragment.reg); 212 fragment.reg = parts.argb[component].reg; 213 fragment.flags &= ~CORRUPTIBLE; 214 } 215 if (mInfo[component].smooth && mAA) { 216 // when using smooth shading AND anti-aliasing, we need to clamp 217 // the iterators because there is always an extra pixel on the 218 // edges, which most of the time will cause an overflow 219 // (since technically its outside of the domain). 220 BIC(AL, 0, fragment.reg, fragment.reg, 221 reg_imm(fragment.reg, ASR, 31)); 222 component_sat(fragment); 223 } 224 } 225} 226 227// --------------------------------------------------------------------------- 228 229void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs) 230{ 231 // gather some informations about the components we need to process... 232 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 233 switch(opcode) { 234 case GGL_COPY: 235 mLogicOp = 0; 236 break; 237 case GGL_CLEAR: 238 case GGL_SET: 239 mLogicOp = LOGIC_OP; 240 break; 241 case GGL_AND: 242 case GGL_AND_REVERSE: 243 case GGL_AND_INVERTED: 244 case GGL_XOR: 245 case GGL_OR: 246 case GGL_NOR: 247 case GGL_EQUIV: 248 case GGL_OR_REVERSE: 249 case GGL_OR_INVERTED: 250 case GGL_NAND: 251 mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST; 252 break; 253 case GGL_NOOP: 254 case GGL_INVERT: 255 mLogicOp = LOGIC_OP|LOGIC_OP_DST; 256 break; 257 case GGL_COPY_INVERTED: 258 mLogicOp = LOGIC_OP|LOGIC_OP_SRC; 259 break; 260 }; 261} 262 263void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c) 264{ 265 uint8_t replaced=0; 266 mTextureMachine.mask = 0; 267 mTextureMachine.activeUnits = 0; 268 for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) { 269 texture_unit_t& tmu = mTextureMachine.tmu[i]; 270 if (replaced == 0xF) { 271 // all components are replaced, skip this TMU. 272 tmu.format_idx = 0; 273 tmu.mask = 0; 274 tmu.replaced = replaced; 275 continue; 276 } 277 tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]); 278 tmu.format = c->formats[tmu.format_idx]; 279 tmu.bits = tmu.format.size*8; 280 tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]); 281 tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]); 282 tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i])); 283 tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]); 284 tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i]) 285 && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now 286 287 // 5551 linear filtering is not supported 288 if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551) 289 tmu.linear = 0; 290 291 tmu.mask = 0; 292 tmu.replaced = replaced; 293 294 if (tmu.format_idx) { 295 mTextureMachine.activeUnits++; 296 if (tmu.format.c[0].h) tmu.mask |= 0x1; 297 if (tmu.format.c[1].h) tmu.mask |= 0x2; 298 if (tmu.format.c[2].h) tmu.mask |= 0x4; 299 if (tmu.format.c[3].h) tmu.mask |= 0x8; 300 if (tmu.env == GGL_REPLACE) { 301 replaced |= tmu.mask; 302 } else if (tmu.env == GGL_DECAL) { 303 if (!tmu.format.c[GGLFormat::ALPHA].h) { 304 // if we don't have alpha, decal does nothing 305 tmu.mask = 0; 306 } else { 307 // decal always ignores At 308 tmu.mask &= ~(1<<GGLFormat::ALPHA); 309 } 310 } 311 } 312 mTextureMachine.mask |= tmu.mask; 313 //printf("%d: mask=%08lx, replaced=%08lx\n", 314 // i, int(tmu.mask), int(tmu.replaced)); 315 } 316 mTextureMachine.replaced = replaced; 317 mTextureMachine.directTexture = 0; 318 //printf("replaced=%08lx\n", mTextureMachine.replaced); 319} 320 321 322void GGLAssembler::init_textures( 323 tex_coord_t* coords, 324 const reg_t& x, const reg_t& y) 325{ 326 context_t const* c = mBuilderContext.c; 327 const needs_t& needs = mBuilderContext.needs; 328 int Rctx = mBuilderContext.Rctx; 329 int Rx = x.reg; 330 int Ry = y.reg; 331 332 if (mTextureMachine.mask) { 333 comment("compute texture coordinates"); 334 } 335 336 // init texture coordinates for each tmu 337 const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n); 338 const bool multiTexture = mTextureMachine.activeUnits > 1; 339 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 340 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 341 if (tmu.format_idx == 0) 342 continue; 343 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 344 (tmu.twrap == GGL_NEEDS_WRAP_11)) 345 { 346 // 1:1 texture 347 pointer_t& txPtr = coords[i].ptr; 348 txPtr.setTo(obtainReg(), tmu.bits); 349 CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy); 350 ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16) 351 CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy); 352 ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16) 353 // merge base & offset 354 CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride); 355 SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride 356 CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); 357 base_offset(txPtr, txPtr, Rx); 358 } else { 359 Scratch scratches(registerFile()); 360 reg_t& s = coords[i].s; 361 reg_t& t = coords[i].t; 362 // s = (x * dsdx)>>16 + ydsdy 363 // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0 364 // t = (x * dtdx)>>16 + ydtdy 365 // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0 366 s.setTo(obtainReg()); 367 t.setTo(obtainReg()); 368 const int need_w = GGL_READ_NEEDS(W, needs.n); 369 if (need_w) { 370 CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy); 371 CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy); 372 } else { 373 int ydsdy = scratches.obtain(); 374 int ydtdy = scratches.obtain(); 375 CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx); 376 CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy); 377 CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx); 378 CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy); 379 MLA(AL, 0, s.reg, Rx, s.reg, ydsdy); 380 MLA(AL, 0, t.reg, Rx, t.reg, ydtdy); 381 } 382 383 if ((mOptLevel&1)==0) { 384 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); 385 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); 386 recycleReg(s.reg); 387 recycleReg(t.reg); 388 } 389 } 390 391 // direct texture? 392 if (!multiTexture && !mBlending && !mDithering && !mFog && 393 cb_format_idx == tmu.format_idx && !tmu.linear && 394 mTextureMachine.replaced == tmu.mask) 395 { 396 mTextureMachine.directTexture = i + 1; 397 } 398 } 399} 400 401void GGLAssembler::build_textures( fragment_parts_t& parts, 402 Scratch& regs) 403{ 404 context_t const* c = mBuilderContext.c; 405 const needs_t& needs = mBuilderContext.needs; 406 int Rctx = mBuilderContext.Rctx; 407 408 // We don't have a way to spill registers automatically 409 // spill depth and AA regs, when we know we may have to. 410 // build the spill list... 411 uint32_t spill_list = 0; 412 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 413 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 414 if (tmu.format_idx == 0) 415 continue; 416 if (tmu.linear) { 417 // we may run out of register if we have linear filtering 418 // at 1 or 4 bytes / pixel on any texture unit. 419 if (tmu.format.size == 1) { 420 // if depth and AA enabled, we'll run out of 1 register 421 if (parts.z.reg > 0 && parts.covPtr.reg > 0) 422 spill_list |= 1<<parts.covPtr.reg; 423 } 424 if (tmu.format.size == 4) { 425 // if depth or AA enabled, we'll run out of 1 or 2 registers 426 if (parts.z.reg > 0) 427 spill_list |= 1<<parts.z.reg; 428 if (parts.covPtr.reg > 0) 429 spill_list |= 1<<parts.covPtr.reg; 430 } 431 } 432 } 433 434 Spill spill(registerFile(), *this, spill_list); 435 436 const bool multiTexture = mTextureMachine.activeUnits > 1; 437 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 438 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 439 if (tmu.format_idx == 0) 440 continue; 441 442 pointer_t& txPtr = parts.coords[i].ptr; 443 pixel_t& texel = parts.texel[i]; 444 445 // repeat... 446 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 447 (tmu.twrap == GGL_NEEDS_WRAP_11)) 448 { // 1:1 textures 449 comment("fetch texel"); 450 texel.setTo(regs.obtain(), &tmu.format); 451 load(txPtr, texel, WRITE_BACK); 452 } else { 453 Scratch scratches(registerFile()); 454 reg_t& s = parts.coords[i].s; 455 reg_t& t = parts.coords[i].t; 456 if ((mOptLevel&1)==0) { 457 comment("reload s/t (multitexture or linear filtering)"); 458 s.reg = scratches.obtain(); 459 t.reg = scratches.obtain(); 460 CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); 461 CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); 462 } 463 464 comment("compute repeat/clamp"); 465 int u = scratches.obtain(); 466 int v = scratches.obtain(); 467 int width = scratches.obtain(); 468 int height = scratches.obtain(); 469 int U = 0; 470 int V = 0; 471 472 CONTEXT_LOAD(width, generated_vars.texture[i].width); 473 CONTEXT_LOAD(height, generated_vars.texture[i].height); 474 475 int FRAC_BITS = 0; 476 if (tmu.linear) { 477 // linear interpolation 478 if (tmu.format.size == 1) { 479 // for 8-bits textures, we can afford 480 // 7 bits of fractional precision at no 481 // additional cost (we can't do 8 bits 482 // because filter8 uses signed 16 bits muls) 483 FRAC_BITS = 7; 484 } else if (tmu.format.size == 2) { 485 // filter16() is internally limited to 4 bits, so: 486 // FRAC_BITS=2 generates less instructions, 487 // FRAC_BITS=3,4,5 creates unpleasant artifacts, 488 // FRAC_BITS=6+ looks good 489 FRAC_BITS = 6; 490 } else if (tmu.format.size == 4) { 491 // filter32() is internally limited to 8 bits, so: 492 // FRAC_BITS=4 looks good 493 // FRAC_BITS=5+ looks better, but generates 3 extra ipp 494 FRAC_BITS = 6; 495 } else { 496 // for all other cases we use 4 bits. 497 FRAC_BITS = 4; 498 } 499 } 500 wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS); 501 wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS); 502 503 if (tmu.linear) { 504 comment("compute linear filtering offsets"); 505 // pixel size scale 506 const int shift = 31 - gglClz(tmu.format.size); 507 U = scratches.obtain(); 508 V = scratches.obtain(); 509 510 // sample the texel center 511 SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1))); 512 SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1))); 513 514 // get the fractionnal part of U,V 515 AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1)); 516 AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1)); 517 518 // compute width-1 and height-1 519 SUB(AL, 0, width, width, imm(1)); 520 SUB(AL, 0, height, height, imm(1)); 521 522 // get the integer part of U,V and clamp/wrap 523 // and compute offset to the next texel 524 if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) { 525 // u has already been REPEATed 526 MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); 527 MOV(MI, 0, u, width); 528 CMP(AL, u, width); 529 MOV(LT, 0, width, imm(1 << shift)); 530 if (shift) 531 MOV(GE, 0, width, reg_imm(width, LSL, shift)); 532 RSB(GE, 0, width, width, imm(0)); 533 } else { 534 // u has not been CLAMPed yet 535 // algorithm: 536 // if ((u>>4) >= width) 537 // u = width<<4 538 // width = 0 539 // else 540 // width = 1<<shift 541 // u = u>>4; // get integer part 542 // if (u<0) 543 // u = 0 544 // width = 0 545 // generated_vars.rt = width 546 547 CMP(AL, width, reg_imm(u, ASR, FRAC_BITS)); 548 MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS)); 549 MOV(LE, 0, width, imm(0)); 550 MOV(GT, 0, width, imm(1 << shift)); 551 MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); 552 MOV(MI, 0, u, imm(0)); 553 MOV(MI, 0, width, imm(0)); 554 } 555 CONTEXT_STORE(width, generated_vars.rt); 556 557 const int stride = width; 558 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); 559 if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) { 560 // v has already been REPEATed 561 MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); 562 MOV(MI, 0, v, height); 563 CMP(AL, v, height); 564 MOV(LT, 0, height, imm(1 << shift)); 565 if (shift) 566 MOV(GE, 0, height, reg_imm(height, LSL, shift)); 567 RSB(GE, 0, height, height, imm(0)); 568 MUL(AL, 0, height, stride, height); 569 } else { 570 // u has not been CLAMPed yet 571 CMP(AL, height, reg_imm(v, ASR, FRAC_BITS)); 572 MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS)); 573 MOV(LE, 0, height, imm(0)); 574 if (shift) { 575 MOV(GT, 0, height, reg_imm(stride, LSL, shift)); 576 } else { 577 MOV(GT, 0, height, stride); 578 } 579 MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); 580 MOV(MI, 0, v, imm(0)); 581 MOV(MI, 0, height, imm(0)); 582 } 583 CONTEXT_STORE(height, generated_vars.lb); 584 } 585 586 scratches.recycle(width); 587 scratches.recycle(height); 588 589 // iterate texture coordinates... 590 comment("iterate s,t"); 591 int dsdx = scratches.obtain(); 592 int dtdx = scratches.obtain(); 593 CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); 594 CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); 595 ADD(AL, 0, s.reg, s.reg, dsdx); 596 ADD(AL, 0, t.reg, t.reg, dtdx); 597 if ((mOptLevel&1)==0) { 598 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); 599 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); 600 scratches.recycle(s.reg); 601 scratches.recycle(t.reg); 602 } 603 scratches.recycle(dsdx); 604 scratches.recycle(dtdx); 605 606 // merge base & offset... 607 comment("merge base & offset"); 608 texel.setTo(regs.obtain(), &tmu.format); 609 txPtr.setTo(texel.reg, tmu.bits); 610 int stride = scratches.obtain(); 611 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); 612 CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); 613 SMLABB(AL, u, v, stride, u); // u+v*stride 614 base_offset(txPtr, txPtr, u); 615 616 // load texel 617 if (!tmu.linear) { 618 comment("fetch texel"); 619 load(txPtr, texel, 0); 620 } else { 621 // recycle registers we don't need anymore 622 scratches.recycle(u); 623 scratches.recycle(v); 624 scratches.recycle(stride); 625 626 comment("fetch texel, bilinear"); 627 switch (tmu.format.size) { 628 case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 629 case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 630 case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 631 case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 632 } 633 } 634 } 635 } 636} 637 638void GGLAssembler::build_iterate_texture_coordinates( 639 const fragment_parts_t& parts) 640{ 641 const bool multiTexture = mTextureMachine.activeUnits > 1; 642 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 643 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 644 if (tmu.format_idx == 0) 645 continue; 646 647 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 648 (tmu.twrap == GGL_NEEDS_WRAP_11)) 649 { // 1:1 textures 650 const pointer_t& txPtr = parts.coords[i].ptr; 651 ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3)); 652 } else { 653 Scratch scratches(registerFile()); 654 int s = parts.coords[i].s.reg; 655 int t = parts.coords[i].t.reg; 656 if ((mOptLevel&1)==0) { 657 s = scratches.obtain(); 658 t = scratches.obtain(); 659 CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]); 660 CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]); 661 } 662 int dsdx = scratches.obtain(); 663 int dtdx = scratches.obtain(); 664 CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); 665 CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); 666 ADD(AL, 0, s, s, dsdx); 667 ADD(AL, 0, t, t, dtdx); 668 if ((mOptLevel&1)==0) { 669 CONTEXT_STORE(s, generated_vars.texture[i].spill[0]); 670 CONTEXT_STORE(t, generated_vars.texture[i].spill[1]); 671 } 672 } 673 } 674} 675 676void GGLAssembler::filter8( 677 const fragment_parts_t& parts, 678 pixel_t& texel, const texture_unit_t& tmu, 679 int U, int V, pointer_t& txPtr, 680 int FRAC_BITS) 681{ 682 if (tmu.format.components != GGL_ALPHA && 683 tmu.format.components != GGL_LUMINANCE) 684 { 685 // this is a packed format, and we don't support 686 // linear filtering (it's probably RGB 332) 687 // Should not happen with OpenGL|ES 688 LDRB(AL, texel.reg, txPtr.reg); 689 return; 690 } 691 692 // ------------------------ 693 // about ~22 cycles / pixel 694 Scratch scratches(registerFile()); 695 696 int pixel= scratches.obtain(); 697 int d = scratches.obtain(); 698 int u = scratches.obtain(); 699 int k = scratches.obtain(); 700 int rt = scratches.obtain(); 701 int lb = scratches.obtain(); 702 703 // RB -> U * V 704 705 CONTEXT_LOAD(rt, generated_vars.rt); 706 CONTEXT_LOAD(lb, generated_vars.lb); 707 708 int offset = pixel; 709 ADD(AL, 0, offset, lb, rt); 710 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 711 SMULBB(AL, u, U, V); 712 SMULBB(AL, d, pixel, u); 713 RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2))); 714 715 // LB -> (1-U) * V 716 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 717 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb)); 718 SMULBB(AL, u, U, V); 719 SMLABB(AL, d, pixel, u, d); 720 SUB(AL, 0, k, k, u); 721 722 // LT -> (1-U)*(1-V) 723 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 724 LDRB(AL, pixel, txPtr.reg); 725 SMULBB(AL, u, U, V); 726 SMLABB(AL, d, pixel, u, d); 727 728 // RT -> U*(1-V) 729 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt)); 730 SUB(AL, 0, u, k, u); 731 SMLABB(AL, texel.reg, pixel, u, d); 732 733 for (int i=0 ; i<4 ; i++) { 734 if (!texel.format.c[i].h) continue; 735 texel.format.c[i].h = FRAC_BITS*2+8; 736 texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough 737 } 738 texel.format.size = 4; 739 texel.format.bitsPerPixel = 32; 740 texel.flags |= CLEAR_LO; 741} 742 743void GGLAssembler::filter16( 744 const fragment_parts_t& parts, 745 pixel_t& texel, const texture_unit_t& tmu, 746 int U, int V, pointer_t& txPtr, 747 int FRAC_BITS) 748{ 749 // compute the mask 750 // XXX: it would be nice if the mask below could be computed 751 // automatically. 752 uint32_t mask = 0; 753 int shift = 0; 754 int prec = 0; 755 switch (tmu.format_idx) { 756 case GGL_PIXEL_FORMAT_RGB_565: 757 // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb 758 // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb 759 mask = 0x07E0F81F; 760 shift = 16; 761 prec = 5; 762 break; 763 case GGL_PIXEL_FORMAT_RGBA_4444: 764 // 0000,1111,0000,1111 | 0000,1111,0000,1111 765 mask = 0x0F0F0F0F; 766 shift = 12; 767 prec = 4; 768 break; 769 case GGL_PIXEL_FORMAT_LA_88: 770 // 0000,0000,1111,1111 | 0000,0000,1111,1111 771 // AALL -> 00AA | 00LL 772 mask = 0x00FF00FF; 773 shift = 8; 774 prec = 8; 775 break; 776 default: 777 // unsupported format, do something sensical... 778 LOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx); 779 LDRH(AL, texel.reg, txPtr.reg); 780 return; 781 } 782 783 const int adjust = FRAC_BITS*2 - prec; 784 const int round = 0; 785 786 // update the texel format 787 texel.format.size = 4; 788 texel.format.bitsPerPixel = 32; 789 texel.flags |= CLEAR_HI|CLEAR_LO; 790 for (int i=0 ; i<4 ; i++) { 791 if (!texel.format.c[i].h) continue; 792 const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift; 793 texel.format.c[i].h = tmu.format.c[i].h + offset + prec; 794 texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec); 795 } 796 797 // ------------------------ 798 // about ~40 cycles / pixel 799 Scratch scratches(registerFile()); 800 801 int pixel= scratches.obtain(); 802 int d = scratches.obtain(); 803 int u = scratches.obtain(); 804 int k = scratches.obtain(); 805 806 // RB -> U * V 807 int offset = pixel; 808 CONTEXT_LOAD(offset, generated_vars.rt); 809 CONTEXT_LOAD(u, generated_vars.lb); 810 ADD(AL, 0, offset, offset, u); 811 812 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 813 SMULBB(AL, u, U, V); 814 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 815 build_and_immediate(pixel, pixel, mask, 32); 816 if (adjust) { 817 if (round) 818 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 819 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 820 } 821 MUL(AL, 0, d, pixel, u); 822 RSB(AL, 0, k, u, imm(1<<prec)); 823 824 // LB -> (1-U) * V 825 CONTEXT_LOAD(offset, generated_vars.lb); 826 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 827 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 828 SMULBB(AL, u, U, V); 829 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 830 build_and_immediate(pixel, pixel, mask, 32); 831 if (adjust) { 832 if (round) 833 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 834 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 835 } 836 MLA(AL, 0, d, pixel, u, d); 837 SUB(AL, 0, k, k, u); 838 839 // LT -> (1-U)*(1-V) 840 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 841 LDRH(AL, pixel, txPtr.reg); 842 SMULBB(AL, u, U, V); 843 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 844 build_and_immediate(pixel, pixel, mask, 32); 845 if (adjust) { 846 if (round) 847 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 848 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 849 } 850 MLA(AL, 0, d, pixel, u, d); 851 852 // RT -> U*(1-V) 853 CONTEXT_LOAD(offset, generated_vars.rt); 854 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 855 SUB(AL, 0, u, k, u); 856 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 857 build_and_immediate(pixel, pixel, mask, 32); 858 MLA(AL, 0, texel.reg, pixel, u, d); 859} 860 861void GGLAssembler::filter24( 862 const fragment_parts_t& parts, 863 pixel_t& texel, const texture_unit_t& tmu, 864 int U, int V, pointer_t& txPtr, 865 int FRAC_BITS) 866{ 867 // not supported yet (currently disabled) 868 load(txPtr, texel, 0); 869} 870 871void GGLAssembler::filter32( 872 const fragment_parts_t& parts, 873 pixel_t& texel, const texture_unit_t& tmu, 874 int U, int V, pointer_t& txPtr, 875 int FRAC_BITS) 876{ 877 const int adjust = FRAC_BITS*2 - 8; 878 const int round = 0; 879 880 // ------------------------ 881 // about ~38 cycles / pixel 882 Scratch scratches(registerFile()); 883 884 int pixel= scratches.obtain(); 885 int dh = scratches.obtain(); 886 int u = scratches.obtain(); 887 int k = scratches.obtain(); 888 889 int temp = scratches.obtain(); 890 int dl = scratches.obtain(); 891 int mask = scratches.obtain(); 892 893 MOV(AL, 0, mask, imm(0xFF)); 894 ORR(AL, 0, mask, mask, imm(0xFF0000)); 895 896 // RB -> U * V 897 int offset = pixel; 898 CONTEXT_LOAD(offset, generated_vars.rt); 899 CONTEXT_LOAD(u, generated_vars.lb); 900 ADD(AL, 0, offset, offset, u); 901 902 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 903 SMULBB(AL, u, U, V); 904 AND(AL, 0, temp, mask, pixel); 905 if (adjust) { 906 if (round) 907 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 908 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 909 } 910 MUL(AL, 0, dh, temp, u); 911 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 912 MUL(AL, 0, dl, temp, u); 913 RSB(AL, 0, k, u, imm(0x100)); 914 915 // LB -> (1-U) * V 916 CONTEXT_LOAD(offset, generated_vars.lb); 917 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 918 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 919 SMULBB(AL, u, U, V); 920 AND(AL, 0, temp, mask, pixel); 921 if (adjust) { 922 if (round) 923 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 924 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 925 } 926 MLA(AL, 0, dh, temp, u, dh); 927 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 928 MLA(AL, 0, dl, temp, u, dl); 929 SUB(AL, 0, k, k, u); 930 931 // LT -> (1-U)*(1-V) 932 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 933 LDR(AL, pixel, txPtr.reg); 934 SMULBB(AL, u, U, V); 935 AND(AL, 0, temp, mask, pixel); 936 if (adjust) { 937 if (round) 938 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 939 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 940 } 941 MLA(AL, 0, dh, temp, u, dh); 942 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 943 MLA(AL, 0, dl, temp, u, dl); 944 945 // RT -> U*(1-V) 946 CONTEXT_LOAD(offset, generated_vars.rt); 947 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 948 SUB(AL, 0, u, k, u); 949 AND(AL, 0, temp, mask, pixel); 950 MLA(AL, 0, dh, temp, u, dh); 951 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 952 MLA(AL, 0, dl, temp, u, dl); 953 954 AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8)); 955 AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8)); 956 ORR(AL, 0, texel.reg, dh, dl); 957} 958 959void GGLAssembler::build_texture_environment( 960 component_t& fragment, 961 const fragment_parts_t& parts, 962 int component, 963 Scratch& regs) 964{ 965 const uint32_t component_mask = 1<<component; 966 const bool multiTexture = mTextureMachine.activeUnits > 1; 967 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 968 texture_unit_t& tmu = mTextureMachine.tmu[i]; 969 970 if (tmu.mask & component_mask) { 971 // replace or modulate with this texture 972 if ((tmu.replaced & component_mask) == 0) { 973 // not replaced by a later tmu... 974 975 Scratch scratches(registerFile()); 976 pixel_t texel(parts.texel[i]); 977 if (multiTexture && 978 tmu.swrap == GGL_NEEDS_WRAP_11 && 979 tmu.twrap == GGL_NEEDS_WRAP_11) 980 { 981 texel.reg = scratches.obtain(); 982 texel.flags |= CORRUPTIBLE; 983 comment("fetch texel (multitexture 1:1)"); 984 load(parts.coords[i].ptr, texel, WRITE_BACK); 985 } 986 987 component_t incoming(fragment); 988 modify(fragment, regs); 989 990 switch (tmu.env) { 991 case GGL_REPLACE: 992 extract(fragment, texel, component); 993 break; 994 case GGL_MODULATE: 995 modulate(fragment, incoming, texel, component); 996 break; 997 case GGL_DECAL: 998 decal(fragment, incoming, texel, component); 999 break; 1000 case GGL_BLEND: 1001 blend(fragment, incoming, texel, component, i); 1002 break; 1003 case GGL_ADD: 1004 add(fragment, incoming, texel, component); 1005 break; 1006 } 1007 } 1008 } 1009 } 1010} 1011 1012// --------------------------------------------------------------------------- 1013 1014void GGLAssembler::wrapping( 1015 int d, 1016 int coord, int size, 1017 int tx_wrap, int tx_linear) 1018{ 1019 // notes: 1020 // if tx_linear is set, we need 4 extra bits of precision on the result 1021 // SMULL/UMULL is 3 cycles 1022 Scratch scratches(registerFile()); 1023 int c = coord; 1024 if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) { 1025 // UMULL takes 4 cycles (interlocked), and we can get away with 1026 // 2 cycles using SMULWB, but we're loosing 16 bits of precision 1027 // out of 32 (this is not a problem because the iterator keeps 1028 // its full precision) 1029 // UMULL(AL, 0, size, d, c, size); 1030 // note: we can't use SMULTB because it's signed. 1031 MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear)); 1032 SMULWB(AL, d, d, size); 1033 } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) { 1034 if (tx_linear) { 1035 // 1 cycle 1036 MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear)); 1037 } else { 1038 // 4 cycles (common case) 1039 MOV(AL, 0, d, reg_imm(coord, ASR, 16)); 1040 BIC(AL, 0, d, d, reg_imm(d, ASR, 31)); 1041 CMP(AL, d, size); 1042 SUB(GE, 0, d, size, imm(1)); 1043 } 1044 } 1045} 1046 1047// --------------------------------------------------------------------------- 1048 1049void GGLAssembler::modulate( 1050 component_t& dest, 1051 const component_t& incoming, 1052 const pixel_t& incomingTexel, int component) 1053{ 1054 Scratch locals(registerFile()); 1055 integer_t texel(locals.obtain(), 32, CORRUPTIBLE); 1056 extract(texel, incomingTexel, component); 1057 1058 const int Nt = texel.size(); 1059 // Nt should always be less than 10 bits because it comes 1060 // from the TMU. 1061 1062 int Ni = incoming.size(); 1063 // Ni could be big because it comes from previous MODULATEs 1064 1065 if (Nt == 1) { 1066 // texel acts as a bit-mask 1067 // dest = incoming & ((texel << incoming.h)-texel) 1068 RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h)); 1069 AND(AL, 0, dest.reg, dest.reg, incoming.reg); 1070 dest.l = incoming.l; 1071 dest.h = incoming.h; 1072 dest.flags |= (incoming.flags & CLEAR_LO); 1073 } else if (Ni == 1) { 1074 MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h)); 1075 AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31)); 1076 dest.l = 0; 1077 dest.h = Nt; 1078 } else { 1079 int inReg = incoming.reg; 1080 int shift = incoming.l; 1081 if ((Nt + Ni) > 32) { 1082 // we will overflow, reduce the precision of Ni to 8 bits 1083 // (Note Nt cannot be more than 10 bits which happens with 1084 // 565 textures and GGL_LINEAR) 1085 shift += Ni-8; 1086 Ni = 8; 1087 } 1088 1089 // modulate by the component with the lowest precision 1090 if (Nt >= Ni) { 1091 if (shift) { 1092 // XXX: we should be able to avoid this shift 1093 // when shift==16 && Nt<16 && Ni<16, in which 1094 // we could use SMULBT below. 1095 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); 1096 inReg = dest.reg; 1097 shift = 0; 1098 } 1099 // operation: (Cf*Ct)/((1<<Ni)-1) 1100 // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni 1101 // this operation doesn't change texel's size 1102 ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1)); 1103 if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg); 1104 else MUL(AL, 0, dest.reg, texel.reg, dest.reg); 1105 dest.l = Ni; 1106 dest.h = Nt + Ni; 1107 } else { 1108 if (shift && (shift != 16)) { 1109 // if shift==16, we can use 16-bits mul instructions later 1110 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); 1111 inReg = dest.reg; 1112 shift = 0; 1113 } 1114 // operation: (Cf*Ct)/((1<<Nt)-1) 1115 // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt 1116 // this operation doesn't change incoming's size 1117 Scratch scratches(registerFile()); 1118 int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg; 1119 if (t == inReg) 1120 t = scratches.obtain(); 1121 ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1)); 1122 if (Nt<16 && Ni<16) { 1123 if (shift==16) SMULBT(AL, dest.reg, t, inReg); 1124 else SMULBB(AL, dest.reg, t, inReg); 1125 } else MUL(AL, 0, dest.reg, t, inReg); 1126 dest.l = Nt; 1127 dest.h = Nt + Ni; 1128 } 1129 1130 // low bits are not valid 1131 dest.flags |= CLEAR_LO; 1132 1133 // no need to keep more than 8 bits/component 1134 if (dest.size() > 8) 1135 dest.l = dest.h-8; 1136 } 1137} 1138 1139void GGLAssembler::decal( 1140 component_t& dest, 1141 const component_t& incoming, 1142 const pixel_t& incomingTexel, int component) 1143{ 1144 // RGBA: 1145 // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At 1146 // Av = Af 1147 Scratch locals(registerFile()); 1148 integer_t texel(locals.obtain(), 32, CORRUPTIBLE); 1149 integer_t factor(locals.obtain(), 32, CORRUPTIBLE); 1150 extract(texel, incomingTexel, component); 1151 extract(factor, incomingTexel, GGLFormat::ALPHA); 1152 1153 // no need to keep more than 8-bits for decal 1154 int Ni = incoming.size(); 1155 int shift = incoming.l; 1156 if (Ni > 8) { 1157 shift += Ni-8; 1158 Ni = 8; 1159 } 1160 integer_t incomingNorm(incoming.reg, Ni, incoming.flags); 1161 if (shift) { 1162 MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); 1163 incomingNorm.reg = dest.reg; 1164 incomingNorm.flags |= CORRUPTIBLE; 1165 } 1166 ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); 1167 build_blendOneMinusFF(dest, factor, incomingNorm, texel); 1168} 1169 1170void GGLAssembler::blend( 1171 component_t& dest, 1172 const component_t& incoming, 1173 const pixel_t& incomingTexel, int component, int tmu) 1174{ 1175 // RGBA: 1176 // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct 1177 // Av = At*Af 1178 1179 if (component == GGLFormat::ALPHA) { 1180 modulate(dest, incoming, incomingTexel, component); 1181 return; 1182 } 1183 1184 Scratch locals(registerFile()); 1185 integer_t color(locals.obtain(), 8, CORRUPTIBLE); 1186 integer_t factor(locals.obtain(), 32, CORRUPTIBLE); 1187 LDRB(AL, color.reg, mBuilderContext.Rctx, 1188 immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component]))); 1189 extract(factor, incomingTexel, component); 1190 1191 // no need to keep more than 8-bits for blend 1192 int Ni = incoming.size(); 1193 int shift = incoming.l; 1194 if (Ni > 8) { 1195 shift += Ni-8; 1196 Ni = 8; 1197 } 1198 integer_t incomingNorm(incoming.reg, Ni, incoming.flags); 1199 if (shift) { 1200 MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); 1201 incomingNorm.reg = dest.reg; 1202 incomingNorm.flags |= CORRUPTIBLE; 1203 } 1204 ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); 1205 build_blendOneMinusFF(dest, factor, incomingNorm, color); 1206} 1207 1208void GGLAssembler::add( 1209 component_t& dest, 1210 const component_t& incoming, 1211 const pixel_t& incomingTexel, int component) 1212{ 1213 // RGBA: 1214 // Cv = Cf + Ct; 1215 Scratch locals(registerFile()); 1216 1217 component_t incomingTemp(incoming); 1218 1219 // use "dest" as a temporary for extracting the texel, unless "dest" 1220 // overlaps "incoming". 1221 integer_t texel(dest.reg, 32, CORRUPTIBLE); 1222 if (dest.reg == incomingTemp.reg) 1223 texel.reg = locals.obtain(); 1224 extract(texel, incomingTexel, component); 1225 1226 if (texel.s < incomingTemp.size()) { 1227 expand(texel, texel, incomingTemp.size()); 1228 } else if (texel.s > incomingTemp.size()) { 1229 if (incomingTemp.flags & CORRUPTIBLE) { 1230 expand(incomingTemp, incomingTemp, texel.s); 1231 } else { 1232 incomingTemp.reg = locals.obtain(); 1233 expand(incomingTemp, incoming, texel.s); 1234 } 1235 } 1236 1237 if (incomingTemp.l) { 1238 ADD(AL, 0, dest.reg, texel.reg, 1239 reg_imm(incomingTemp.reg, LSR, incomingTemp.l)); 1240 } else { 1241 ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg); 1242 } 1243 dest.l = 0; 1244 dest.h = texel.size(); 1245 component_sat(dest); 1246} 1247 1248// ---------------------------------------------------------------------------- 1249 1250}; // namespace android 1251 1252