scanline.cpp revision 4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53
1/* libs/pixelflinger/scanline.cpp 2** 3** Copyright 2006, The Android Open Source Project 4** 5** Licensed under the Apache License, Version 2.0 (the "License"); 6** you may not use this file except in compliance with the License. 7** You may obtain a copy of the License at 8** 9** http://www.apache.org/licenses/LICENSE-2.0 10** 11** Unless required by applicable law or agreed to in writing, software 12** distributed under the License is distributed on an "AS IS" BASIS, 13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14** See the License for the specific language governing permissions and 15** limitations under the License. 16*/ 17 18 19#define LOG_TAG "pixelflinger" 20 21#include <assert.h> 22#include <stdlib.h> 23#include <stdio.h> 24#include <string.h> 25 26#include <cutils/memory.h> 27#include <cutils/log.h> 28 29#include "buffer.h" 30#include "scanline.h" 31 32#include "codeflinger/CodeCache.h" 33#include "codeflinger/GGLAssembler.h" 34#include "codeflinger/ARMAssembler.h" 35//#include "codeflinger/ARMAssemblerOptimizer.h" 36 37// ---------------------------------------------------------------------------- 38 39#define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline 40#define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic 41#define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic 42#define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen 43 44#ifdef NDEBUG 45# define ANDROID_RELEASE 46# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 47#else 48# define ANDROID_DEBUG 49# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 50#endif 51 52#if defined(__arm__) 53# define ANDROID_ARM_CODEGEN 1 54#else 55# define ANDROID_ARM_CODEGEN 0 56#endif 57 58 59#define DEBUG__CODEGEN_ONLY 0 60 61// ---------------------------------------------------------------------------- 62namespace android { 63// ---------------------------------------------------------------------------- 64 65static void init_y(context_t*, int32_t); 66static void init_y_noop(context_t*, int32_t); 67static void init_y_packed(context_t*, int32_t); 68static void init_y_error(context_t*, int32_t); 69 70static void step_y__generic(context_t* c); 71static void step_y__nop(context_t*); 72static void step_y__smooth(context_t* c); 73static void step_y__tmu(context_t* c); 74static void step_y__w(context_t* c); 75 76static void scanline(context_t* c); 77static void scanline_perspective(context_t* c); 78static void scanline_perspective_single(context_t* c); 79static void scanline_t32cb16blend(context_t* c); 80static void scanline_t32cb16(context_t* c); 81static void scanline_memcpy(context_t* c); 82static void scanline_memset8(context_t* c); 83static void scanline_memset16(context_t* c); 84static void scanline_memset32(context_t* c); 85static void scanline_noop(context_t* c); 86static void scanline_set(context_t* c); 87static void scanline_clear(context_t* c); 88 89static void rect_generic(context_t* c, size_t yc); 90static void rect_memcpy(context_t* c, size_t yc); 91 92extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); 93extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); 94 95// ---------------------------------------------------------------------------- 96 97struct shortcut_t { 98 needs_filter_t filter; 99 const char* desc; 100 void (*scanline)(context_t*); 101 void (*init_y)(context_t*, int32_t); 102}; 103 104// Keep in sync with needs 105static shortcut_t shortcuts[] = { 106 { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } }, 107 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 108 "565 fb, 8888 tx, blend", scanline_t32cb16blend, init_y_noop }, 109 { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } }, 110 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 111 "565 fb, 8888 tx", scanline_t32cb16, init_y_noop }, 112 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 113 { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } }, 114 "(nop) alpha test", scanline_noop, init_y_noop }, 115 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 116 { 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } }, 117 "(nop) depth test", scanline_noop, init_y_noop }, 118 { { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } }, 119 { 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 120 "(nop) logic_op", scanline_noop, init_y_noop }, 121 { { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } }, 122 { 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 123 "(nop) color mask", scanline_noop, init_y_noop }, 124 { { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } }, 125 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 126 "(set) logic_op", scanline_set, init_y_noop }, 127 { { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } }, 128 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 129 "(clear) logic_op", scanline_clear, init_y_noop }, 130 { { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } }, 131 { 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } }, 132 "(clear) blending 0/0", scanline_clear, init_y_noop }, 133 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 134 { 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } }, 135 "(error) invalid color-buffer format", scanline_noop, init_y_error }, 136}; 137static const needs_filter_t noblend1to1 = { 138 // (disregard dithering, see below) 139 { 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } }, 140 { 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } } 141}; 142static const needs_filter_t fill16noblend = { 143 { 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } }, 144 { 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } } 145}; 146 147// ---------------------------------------------------------------------------- 148 149#if ANDROID_ARM_CODEGEN 150static CodeCache gCodeCache(12 * 1024); 151 152class ScanlineAssembly : public Assembly { 153 AssemblyKey<needs_t> mKey; 154public: 155 ScanlineAssembly(needs_t needs, size_t size) 156 : Assembly(size), mKey(needs) { } 157 const AssemblyKey<needs_t>& key() const { return mKey; } 158}; 159#endif 160 161// ---------------------------------------------------------------------------- 162 163void ggl_init_scanline(context_t* c) 164{ 165 c->init_y = init_y; 166 c->step_y = step_y__generic; 167 c->scanline = scanline; 168} 169 170void ggl_uninit_scanline(context_t* c) 171{ 172 if (c->state.buffers.coverage) 173 free(c->state.buffers.coverage); 174#if ANDROID_ARM_CODEGEN 175 if (c->scanline_as) 176 c->scanline_as->decStrong(c); 177#endif 178} 179 180// ---------------------------------------------------------------------------- 181 182static void pick_scanline(context_t* c) 183{ 184#if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0)) 185 186#if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC 187 c->init_y = init_y; 188 c->step_y = step_y__generic; 189 c->scanline = scanline; 190 return; 191#endif 192 193 //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n", 194 // c->state.needs.n, c->state.needs.p, 195 // c->state.needs.t[0], c->state.needs.t[1]); 196 197 // first handle the special case that we cannot test with a filter 198 const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n); 199 if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) { 200 if (c->state.needs.match(noblend1to1)) { 201 // this will match regardless of dithering state, since both 202 // src and dest have the same format anyway, there is no dithering 203 // to be done. 204 const GGLFormat* f = 205 &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]); 206 if ((f->components == GGL_RGB) || 207 (f->components == GGL_RGBA) || 208 (f->components == GGL_LUMINANCE) || 209 (f->components == GGL_LUMINANCE_ALPHA)) 210 { 211 // format must have all of RGB components 212 // (so the current color doesn't show through) 213 c->scanline = scanline_memcpy; 214 c->init_y = init_y_noop; 215 return; 216 } 217 } 218 } 219 220 if (c->state.needs.match(fill16noblend)) { 221 c->init_y = init_y_packed; 222 switch (c->formats[cb_format].size) { 223 case 1: c->scanline = scanline_memset8; return; 224 case 2: c->scanline = scanline_memset16; return; 225 case 4: c->scanline = scanline_memset32; return; 226 } 227 } 228 229 const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t); 230 for (int i=0 ; i<numFilters ; i++) { 231 if (c->state.needs.match(shortcuts[i].filter)) { 232 c->scanline = shortcuts[i].scanline; 233 c->init_y = shortcuts[i].init_y; 234 return; 235 } 236 } 237 238#endif // DEBUG__CODEGEN_ONLY 239 240 c->init_y = init_y; 241 c->step_y = step_y__generic; 242 243#if ANDROID_ARM_CODEGEN 244 // we're going to have to generate some code... 245 // here, generate code for our pixel pipeline 246 const AssemblyKey<needs_t> key(c->state.needs); 247 sp<Assembly> assembly = gCodeCache.lookup(key); 248 if (assembly == 0) { 249 // create a new assembly region 250 sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, 1024); 251 // initialize our assembler 252 GGLAssembler assembler( new ARMAssembler(a) ); 253 //GGLAssembler assembler( 254 // new ARMAssemblerOptimizer(new ARMAssembler(a)) ); 255 // generate the scanline code for the given needs 256 int err = assembler.scanline(c->state.needs, c); 257 if (ggl_likely(!err)) { 258 // finally, cache this assembly 259 err = gCodeCache.cache(a->key(), a); 260 } 261 if (ggl_unlikely(err)) { 262 LOGE("error generating or caching assembly. Reverting to NOP."); 263 c->scanline = scanline_noop; 264 c->init_y = init_y_noop; 265 c->step_y = step_y__nop; 266 return; 267 } 268 assembly = a; 269 } 270 271 // release the previous assembly 272 if (c->scanline_as) { 273 c->scanline_as->decStrong(c); 274 } 275 276 //LOGI("using generated pixel-pipeline"); 277 c->scanline_as = assembly.get(); 278 c->scanline_as->incStrong(c); // hold on to assembly 279 c->scanline = (void(*)(context_t* c))assembly->base(); 280#else 281// LOGW("using generic (slow) pixel-pipeline"); 282 c->scanline = scanline; 283#endif 284} 285 286void ggl_pick_scanline(context_t* c) 287{ 288 pick_scanline(c); 289 if ((c->state.enables & GGL_ENABLE_W) && 290 (c->state.enables & GGL_ENABLE_TMUS)) 291 { 292 c->span = c->scanline; 293 c->scanline = scanline_perspective; 294 if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) { 295 // only one TMU enabled 296 c->scanline = scanline_perspective_single; 297 } 298 } 299} 300 301// ---------------------------------------------------------------------------- 302 303static void blending(context_t* c, pixel_t* fragment, pixel_t* fb); 304static void blend_factor(context_t* c, pixel_t* r, uint32_t factor, 305 const pixel_t* src, const pixel_t* dst); 306static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv); 307 308#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 309 310// no need to compile the generic-pipeline, it can't be reached 311void scanline(context_t*) 312{ 313} 314 315#else 316 317void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv) 318{ 319 if (su && sv) { 320 if (su > sv) { 321 v = ggl_expand(v, sv, su); 322 sv = su; 323 } else if (su < sv) { 324 u = ggl_expand(u, su, sv); 325 su = sv; 326 } 327 } 328} 329 330void blending(context_t* c, pixel_t* fragment, pixel_t* fb) 331{ 332 rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]); 333 rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]); 334 rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]); 335 rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]); 336 337 pixel_t sf, df; 338 blend_factor(c, &sf, c->state.blend.src, fragment, fb); 339 blend_factor(c, &df, c->state.blend.dst, fragment, fb); 340 341 fragment->c[1] = 342 gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1])); 343 fragment->c[2] = 344 gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2])); 345 fragment->c[3] = 346 gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3])); 347 348 if (c->state.blend.alpha_separate) { 349 blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb); 350 blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb); 351 } 352 353 fragment->c[0] = 354 gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0])); 355 356 // clamp to 1.0 357 if (fragment->c[0] >= (1LU<<fragment->s[0])) 358 fragment->c[0] = (1<<fragment->s[0])-1; 359 if (fragment->c[1] >= (1LU<<fragment->s[1])) 360 fragment->c[1] = (1<<fragment->s[1])-1; 361 if (fragment->c[2] >= (1LU<<fragment->s[2])) 362 fragment->c[2] = (1<<fragment->s[2])-1; 363 if (fragment->c[3] >= (1LU<<fragment->s[3])) 364 fragment->c[3] = (1<<fragment->s[3])-1; 365} 366 367static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0) 368{ 369 if (!size) 370 return def; 371 372 // scale to 16 bits 373 if (size > 16) { 374 x >>= (size - 16); 375 } else if (size < 16) { 376 x = ggl_expand(x, size, 16); 377 } 378 x += x >> 15; 379 return x; 380} 381 382void blend_factor(context_t* c, pixel_t* r, 383 uint32_t factor, const pixel_t* src, const pixel_t* dst) 384{ 385 switch (factor) { 386 case GGL_ZERO: 387 r->c[1] = 388 r->c[2] = 389 r->c[3] = 390 r->c[0] = 0; 391 break; 392 case GGL_ONE: 393 r->c[1] = 394 r->c[2] = 395 r->c[3] = 396 r->c[0] = FIXED_ONE; 397 break; 398 case GGL_DST_COLOR: 399 r->c[1] = blendfactor(dst->c[1], dst->s[1]); 400 r->c[2] = blendfactor(dst->c[2], dst->s[2]); 401 r->c[3] = blendfactor(dst->c[3], dst->s[3]); 402 r->c[0] = blendfactor(dst->c[0], dst->s[0]); 403 break; 404 case GGL_SRC_COLOR: 405 r->c[1] = blendfactor(src->c[1], src->s[1]); 406 r->c[2] = blendfactor(src->c[2], src->s[2]); 407 r->c[3] = blendfactor(src->c[3], src->s[3]); 408 r->c[0] = blendfactor(src->c[0], src->s[0]); 409 break; 410 case GGL_ONE_MINUS_DST_COLOR: 411 r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]); 412 r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]); 413 r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]); 414 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]); 415 break; 416 case GGL_ONE_MINUS_SRC_COLOR: 417 r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]); 418 r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]); 419 r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]); 420 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]); 421 break; 422 case GGL_SRC_ALPHA: 423 r->c[1] = 424 r->c[2] = 425 r->c[3] = 426 r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE); 427 break; 428 case GGL_ONE_MINUS_SRC_ALPHA: 429 r->c[1] = 430 r->c[2] = 431 r->c[3] = 432 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE); 433 break; 434 case GGL_DST_ALPHA: 435 r->c[1] = 436 r->c[2] = 437 r->c[3] = 438 r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 439 break; 440 case GGL_ONE_MINUS_DST_ALPHA: 441 r->c[1] = 442 r->c[2] = 443 r->c[3] = 444 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 445 break; 446 case GGL_SRC_ALPHA_SATURATE: 447 // XXX: GGL_SRC_ALPHA_SATURATE 448 break; 449 } 450} 451 452static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap) 453{ 454 GGLfixed d; 455 if (tx_wrap == GGL_REPEAT) { 456 d = (uint32_t(coord)>>16) * size; 457 } else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics 458 const GGLfixed clamp_min = FIXED_HALF; 459 const GGLfixed clamp_max = (size << 16) - FIXED_HALF; 460 if (coord < clamp_min) coord = clamp_min; 461 if (coord > clamp_max) coord = clamp_max; 462 d = coord; 463 } else { // 1:1 464 const GGLfixed clamp_min = 0; 465 const GGLfixed clamp_max = (size << 16); 466 if (coord < clamp_min) coord = clamp_min; 467 if (coord > clamp_max) coord = clamp_max; 468 d = coord; 469 } 470 return d; 471} 472 473static inline 474GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len) 475{ 476 const int32_t end = dvdx * (len-1) + v; 477 if (end < 0) 478 v -= end; 479 v &= ~(v>>31); 480 return v; 481} 482 483void scanline(context_t* c) 484{ 485 const uint32_t enables = c->state.enables; 486 const int xs = c->iterators.xl; 487 const int x1 = c->iterators.xr; 488 int xc = x1 - xs; 489 const int16_t* covPtr = c->state.buffers.coverage + xs; 490 491 // All iterated values are sampled at the pixel center 492 493 // reset iterators for that scanline... 494 GGLcolor r, g, b, a; 495 iterators_t& ci = c->iterators; 496 if (enables & GGL_ENABLE_SMOOTH) { 497 r = (xs * c->shade.drdx) + ci.ydrdy; 498 g = (xs * c->shade.dgdx) + ci.ydgdy; 499 b = (xs * c->shade.dbdx) + ci.ydbdy; 500 a = (xs * c->shade.dadx) + ci.ydady; 501 r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc); 502 g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc); 503 b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc); 504 a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc); 505 } else { 506 r = ci.ydrdy; 507 g = ci.ydgdy; 508 b = ci.ydbdy; 509 a = ci.ydady; 510 } 511 512 // z iterators are 1.31 513 GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy; 514 GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy; 515 516 struct { 517 GGLfixed s, t; 518 } tc[GGL_TEXTURE_UNIT_COUNT]; 519 if (enables & GGL_ENABLE_TMUS) { 520 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 521 if (c->state.texture[i].enable) { 522 texture_iterators_t& ti = c->state.texture[i].iterators; 523 if (enables & GGL_ENABLE_W) { 524 tc[i].s = ti.ydsdy; 525 tc[i].t = ti.ydtdy; 526 } else { 527 tc[i].s = (xs * ti.dsdx) + ti.ydsdy; 528 tc[i].t = (xs * ti.dtdx) + ti.ydtdy; 529 } 530 } 531 } 532 } 533 534 pixel_t fragment; 535 pixel_t texel; 536 pixel_t fb; 537 538 uint32_t x = xs; 539 uint32_t y = c->iterators.y; 540 541 while (xc--) { 542 543 { // just a scope 544 545 // read color (convert to 8 bits by keeping only the integer part) 546 fragment.s[1] = fragment.s[2] = 547 fragment.s[3] = fragment.s[0] = 8; 548 fragment.c[1] = r >> (GGL_COLOR_BITS-8); 549 fragment.c[2] = g >> (GGL_COLOR_BITS-8); 550 fragment.c[3] = b >> (GGL_COLOR_BITS-8); 551 fragment.c[0] = a >> (GGL_COLOR_BITS-8); 552 553 // texturing 554 if (enables & GGL_ENABLE_TMUS) { 555 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 556 texture_t& tx = c->state.texture[i]; 557 if (!tx.enable) 558 continue; 559 texture_iterators_t& ti = tx.iterators; 560 int32_t u, v; 561 562 // s-coordinate 563 if (tx.s_coord != GGL_ONE_TO_ONE) { 564 const int w = tx.surface.width; 565 u = wrapping(tc[i].s, w, tx.s_wrap); 566 tc[i].s += ti.dsdx; 567 } else { 568 u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF; 569 } 570 571 // t-coordinate 572 if (tx.t_coord != GGL_ONE_TO_ONE) { 573 const int h = tx.surface.height; 574 v = wrapping(tc[i].t, h, tx.t_wrap); 575 tc[i].t += ti.dtdx; 576 } else { 577 v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF; 578 } 579 580 // read texture 581 if (tx.mag_filter == GGL_NEAREST && 582 tx.min_filter == GGL_NEAREST) 583 { 584 u >>= 16; 585 v >>= 16; 586 tx.surface.read(&tx.surface, c, u, v, &texel); 587 } else { 588 const int w = tx.surface.width; 589 const int h = tx.surface.height; 590 u -= FIXED_HALF; 591 v -= FIXED_HALF; 592 int u0 = u >> 16; 593 int v0 = v >> 16; 594 int u1 = u0 + 1; 595 int v1 = v0 + 1; 596 if (tx.s_wrap == GGL_REPEAT) { 597 if (u0<0) u0 += w; 598 if (u1<0) u1 += w; 599 if (u0>=w) u0 -= w; 600 if (u1>=w) u1 -= w; 601 } else { 602 if (u0<0) u0 = 0; 603 if (u1<0) u1 = 0; 604 if (u0>=w) u0 = w-1; 605 if (u1>=w) u1 = w-1; 606 } 607 if (tx.t_wrap == GGL_REPEAT) { 608 if (v0<0) v0 += h; 609 if (v1<0) v1 += h; 610 if (v0>=h) v0 -= h; 611 if (v1>=h) v1 -= h; 612 } else { 613 if (v0<0) v0 = 0; 614 if (v1<0) v1 = 0; 615 if (v0>=h) v0 = h-1; 616 if (v1>=h) v1 = h-1; 617 } 618 pixel_t texels[4]; 619 uint32_t mm[4]; 620 tx.surface.read(&tx.surface, c, u0, v0, &texels[0]); 621 tx.surface.read(&tx.surface, c, u0, v1, &texels[1]); 622 tx.surface.read(&tx.surface, c, u1, v0, &texels[2]); 623 tx.surface.read(&tx.surface, c, u1, v1, &texels[3]); 624 u = (u >> 12) & 0xF; 625 v = (v >> 12) & 0xF; 626 u += u>>3; 627 v += v>>3; 628 mm[0] = (0x10 - u) * (0x10 - v); 629 mm[1] = (0x10 - u) * v; 630 mm[2] = u * (0x10 - v); 631 mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]); 632 for (int j=0 ; j<4 ; j++) { 633 texel.s[j] = texels[0].s[j]; 634 if (!texel.s[j]) continue; 635 texel.s[j] += 8; 636 texel.c[j] = texels[0].c[j]*mm[0] + 637 texels[1].c[j]*mm[1] + 638 texels[2].c[j]*mm[2] + 639 texels[3].c[j]*mm[3] ; 640 } 641 } 642 643 // Texture environnement... 644 for (int j=0 ; j<4 ; j++) { 645 uint32_t& Cf = fragment.c[j]; 646 uint32_t& Ct = texel.c[j]; 647 uint8_t& sf = fragment.s[j]; 648 uint8_t& st = texel.s[j]; 649 uint32_t At = texel.c[0]; 650 uint8_t sat = texel.s[0]; 651 switch (tx.env) { 652 case GGL_REPLACE: 653 if (st) { 654 Cf = Ct; 655 sf = st; 656 } 657 break; 658 case GGL_MODULATE: 659 if (st) { 660 uint32_t factor = Ct + (Ct>>(st-1)); 661 Cf = (Cf * factor) >> st; 662 } 663 break; 664 case GGL_DECAL: 665 if (sat) { 666 rescale(Cf, sf, Ct, st); 667 Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat; 668 } 669 break; 670 case GGL_BLEND: 671 if (st) { 672 uint32_t Cc = tx.env_color[i]; 673 if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8; 674 else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf); 675 uint32_t factor = Ct + (Ct>>(st-1)); 676 Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st; 677 } 678 break; 679 } 680 } 681 } 682 } 683 684 // coverage application 685 if (enables & GGL_ENABLE_AA) { 686 int16_t cf = *covPtr++; 687 fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15; 688 } 689 690 // alpha-test 691 if (enables & GGL_ENABLE_ALPHA_TEST) { 692 GGLcolor ref = c->state.alpha_test.ref; 693 GGLcolor alpha = (uint64_t(fragment.c[0]) * 694 ((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1); 695 switch (c->state.alpha_test.func) { 696 case GGL_NEVER: goto discard; 697 case GGL_LESS: if (alpha<ref) break; goto discard; 698 case GGL_EQUAL: if (alpha==ref) break; goto discard; 699 case GGL_LEQUAL: if (alpha<=ref) break; goto discard; 700 case GGL_GREATER: if (alpha>ref) break; goto discard; 701 case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard; 702 case GGL_GEQUAL: if (alpha>=ref) break; goto discard; 703 } 704 } 705 706 // depth test 707 if (c->state.buffers.depth.format) { 708 if (enables & GGL_ENABLE_DEPTH_TEST) { 709 surface_t* cb = &(c->state.buffers.depth); 710 uint16_t* p = (uint16_t*)(cb->data)+(x+(cb->stride*y)); 711 uint16_t zz = uint32_t(z)>>(16); 712 uint16_t depth = *p; 713 switch (c->state.depth_test.func) { 714 case GGL_NEVER: goto discard; 715 case GGL_LESS: if (zz<depth) break; goto discard; 716 case GGL_EQUAL: if (zz==depth) break; goto discard; 717 case GGL_LEQUAL: if (zz<=depth) break; goto discard; 718 case GGL_GREATER: if (zz>depth) break; goto discard; 719 case GGL_NOTEQUAL: if (zz!=depth) break; goto discard; 720 case GGL_GEQUAL: if (zz>=depth) break; goto discard; 721 } 722 // depth buffer is not enabled, if depth-test is not enabled 723/* 724 fragment.s[1] = fragment.s[2] = 725 fragment.s[3] = fragment.s[0] = 8; 726 fragment.c[1] = 727 fragment.c[2] = 728 fragment.c[3] = 729 fragment.c[0] = 255 - (zz>>8); 730*/ 731 if (c->state.mask.depth) { 732 *p = zz; 733 } 734 } 735 } 736 737 // fog 738 if (enables & GGL_ENABLE_FOG) { 739 for (int i=1 ; i<=3 ; i++) { 740 GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF; 741 uint32_t& c = fragment.c[i]; 742 uint8_t& s = fragment.s[i]; 743 c = (c * 0x10000) / ((1<<s)-1); 744 c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f)); 745 s = 16; 746 } 747 } 748 749 // blending 750 if (enables & GGL_ENABLE_BLENDING) { 751 fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind 752 fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0; 753 c->state.buffers.color.read( 754 &(c->state.buffers.color), c, x, y, &fb); 755 blending( c, &fragment, &fb ); 756 } 757 758 // write 759 c->state.buffers.color.write( 760 &(c->state.buffers.color), c, x, y, &fragment); 761 } 762 763discard: 764 // iterate... 765 x += 1; 766 if (enables & GGL_ENABLE_SMOOTH) { 767 r += c->shade.drdx; 768 g += c->shade.dgdx; 769 b += c->shade.dbdx; 770 a += c->shade.dadx; 771 } 772 z += c->shade.dzdx; 773 f += c->shade.dfdx; 774 } 775} 776 777#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 778 779// ---------------------------------------------------------------------------- 780#if 0 781#pragma mark - 782#pragma mark Scanline 783#endif 784 785template <typename T, typename U> 786static inline __attribute__((const)) 787T interpolate(int y, T v0, U dvdx, U dvdy) { 788 // interpolates in pixel's centers 789 // v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx) 790 return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1)); 791} 792 793// ---------------------------------------------------------------------------- 794#if 0 795#pragma mark - 796#endif 797 798void init_y(context_t* c, int32_t ys) 799{ 800 const uint32_t enables = c->state.enables; 801 802 // compute iterators... 803 iterators_t& ci = c->iterators; 804 805 // sample in the center 806 ci.y = ys; 807 808 if (enables & (GGL_ENABLE_DEPTH_TEST|GGL_ENABLE_W|GGL_ENABLE_FOG)) { 809 ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy); 810 ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy); 811 ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy); 812 } 813 814 if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) { 815 ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy); 816 ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy); 817 ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy); 818 ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady); 819 c->step_y = step_y__smooth; 820 } else { 821 ci.ydrdy = c->shade.r0; 822 ci.ydgdy = c->shade.g0; 823 ci.ydbdy = c->shade.b0; 824 ci.ydady = c->shade.a0; 825 // XXX: do only if needed, or make sure this is fast 826 c->packed = ggl_pack_color(c, c->state.buffers.color.format, 827 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 828 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, 829 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 830 } 831 832 // initialize the variables we need in the shader 833 generated_vars_t& gen = c->generated_vars; 834 gen.argb[GGLFormat::ALPHA].c = ci.ydady; 835 gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx; 836 gen.argb[GGLFormat::RED ].c = ci.ydrdy; 837 gen.argb[GGLFormat::RED ].dx = c->shade.drdx; 838 gen.argb[GGLFormat::GREEN].c = ci.ydgdy; 839 gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx; 840 gen.argb[GGLFormat::BLUE ].c = ci.ydbdy; 841 gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx; 842 gen.dzdx = c->shade.dzdx; 843 gen.f = ci.ydfdy; 844 gen.dfdx = c->shade.dfdx; 845 846 if (enables & GGL_ENABLE_TMUS) { 847 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 848 texture_t& t = c->state.texture[i]; 849 if (!t.enable) continue; 850 851 texture_iterators_t& ti = t.iterators; 852 if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) { 853 // we need to set all of these to 0 because in some cases 854 // step_y__generic() or step_y__tmu() will be used and 855 // therefore will update dtdy, however, in 1:1 mode 856 // this is always done by the scanline rasterizer. 857 ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0; 858 ti.ydsdy = t.shade.is0; 859 ti.ydtdy = t.shade.it0; 860 } else { 861 const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16); 862 const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16); 863 ti.sscale = t.shade.sscale + adjustSWrap; 864 ti.tscale = t.shade.tscale + adjustTWrap; 865 if (!(enables & GGL_ENABLE_W)) { 866 // S coordinate 867 const int32_t sscale = ti.sscale; 868 const int32_t sy = interpolate(ys, 869 t.shade.is0, t.shade.idsdx, t.shade.idsdy); 870 if (sscale>=0) { 871 ti.ydsdy= sy << sscale; 872 ti.dsdx = t.shade.idsdx << sscale; 873 ti.dsdy = t.shade.idsdy << sscale; 874 } else { 875 ti.ydsdy= sy >> -sscale; 876 ti.dsdx = t.shade.idsdx >> -sscale; 877 ti.dsdy = t.shade.idsdy >> -sscale; 878 } 879 // T coordinate 880 const int32_t tscale = ti.tscale; 881 const int32_t ty = interpolate(ys, 882 t.shade.it0, t.shade.idtdx, t.shade.idtdy); 883 if (tscale>=0) { 884 ti.ydtdy= ty << tscale; 885 ti.dtdx = t.shade.idtdx << tscale; 886 ti.dtdy = t.shade.idtdy << tscale; 887 } else { 888 ti.ydtdy= ty >> -tscale; 889 ti.dtdx = t.shade.idtdx >> -tscale; 890 ti.dtdy = t.shade.idtdy >> -tscale; 891 } 892 } 893 } 894 // mirror for generated code... 895 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 896 gen.width = t.surface.width; 897 gen.height = t.surface.height; 898 gen.stride = t.surface.stride; 899 gen.data = int32_t(t.surface.data); 900 gen.dsdx = ti.dsdx; 901 gen.dtdx = ti.dtdx; 902 } 903 } 904 905 // choose the y-stepper 906 c->step_y = step_y__nop; 907 if (enables & GGL_ENABLE_FOG) { 908 c->step_y = step_y__generic; 909 } else if (enables & GGL_ENABLE_TMUS) { 910 if (enables & GGL_ENABLE_SMOOTH) { 911 c->step_y = step_y__generic; 912 } else if (enables & GGL_ENABLE_W) { 913 c->step_y = step_y__w; 914 } else { 915 c->step_y = step_y__tmu; 916 } 917 } else { 918 if (enables & GGL_ENABLE_SMOOTH) { 919 c->step_y = step_y__smooth; 920 } 921 } 922 923 // choose the rectangle blitter 924 c->rect = rect_generic; 925 if ((c->step_y == step_y__nop) && 926 (c->scanline == scanline_memcpy)) 927 { 928 c->rect = rect_memcpy; 929 } 930} 931 932void init_y_packed(context_t* c, int32_t y0) 933{ 934 uint8_t f = c->state.buffers.color.format; 935 c->packed = ggl_pack_color(c, f, 936 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); 937 c->iterators.y = y0; 938 c->step_y = step_y__nop; 939 // choose the rectangle blitter 940 c->rect = rect_generic; 941 if (c->scanline == scanline_memcpy) { 942 c->rect = rect_memcpy; 943 } 944} 945 946void init_y_noop(context_t* c, int32_t y0) 947{ 948 c->iterators.y = y0; 949 c->step_y = step_y__nop; 950 // choose the rectangle blitter 951 c->rect = rect_generic; 952 if (c->scanline == scanline_memcpy) { 953 c->rect = rect_memcpy; 954 } 955} 956 957void init_y_error(context_t* c, int32_t y0) 958{ 959 // woooops, shoud never happen, 960 // fail gracefully (don't display anything) 961 init_y_noop(c, y0); 962 LOGE("color-buffer has an invalid format!"); 963} 964 965// ---------------------------------------------------------------------------- 966#if 0 967#pragma mark - 968#endif 969 970void step_y__generic(context_t* c) 971{ 972 const uint32_t enables = c->state.enables; 973 974 // iterate... 975 iterators_t& ci = c->iterators; 976 ci.y += 1; 977 978 if (enables & GGL_ENABLE_SMOOTH) { 979 ci.ydrdy += c->shade.drdy; 980 ci.ydgdy += c->shade.dgdy; 981 ci.ydbdy += c->shade.dbdy; 982 ci.ydady += c->shade.dady; 983 } 984 985 const uint32_t mask = 986 GGL_ENABLE_DEPTH_TEST | 987 GGL_ENABLE_W | 988 GGL_ENABLE_FOG; 989 if (enables & mask) { 990 ci.ydzdy += c->shade.dzdy; 991 ci.ydwdy += c->shade.dwdy; 992 ci.ydfdy += c->shade.dfdy; 993 } 994 995 if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) { 996 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 997 if (c->state.texture[i].enable) { 998 texture_iterators_t& ti = c->state.texture[i].iterators; 999 ti.ydsdy += ti.dsdy; 1000 ti.ydtdy += ti.dtdy; 1001 } 1002 } 1003 } 1004} 1005 1006void step_y__nop(context_t* c) 1007{ 1008 c->iterators.y += 1; 1009 c->iterators.ydzdy += c->shade.dzdy; 1010} 1011 1012void step_y__smooth(context_t* c) 1013{ 1014 iterators_t& ci = c->iterators; 1015 ci.y += 1; 1016 ci.ydrdy += c->shade.drdy; 1017 ci.ydgdy += c->shade.dgdy; 1018 ci.ydbdy += c->shade.dbdy; 1019 ci.ydady += c->shade.dady; 1020 ci.ydzdy += c->shade.dzdy; 1021} 1022 1023void step_y__w(context_t* c) 1024{ 1025 iterators_t& ci = c->iterators; 1026 ci.y += 1; 1027 ci.ydzdy += c->shade.dzdy; 1028 ci.ydwdy += c->shade.dwdy; 1029} 1030 1031void step_y__tmu(context_t* c) 1032{ 1033 iterators_t& ci = c->iterators; 1034 ci.y += 1; 1035 ci.ydzdy += c->shade.dzdy; 1036 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1037 if (c->state.texture[i].enable) { 1038 texture_iterators_t& ti = c->state.texture[i].iterators; 1039 ti.ydsdy += ti.dsdy; 1040 ti.ydtdy += ti.dtdy; 1041 } 1042 } 1043} 1044 1045// ---------------------------------------------------------------------------- 1046#if 0 1047#pragma mark - 1048#endif 1049 1050void scanline_perspective(context_t* c) 1051{ 1052 struct { 1053 union { 1054 struct { 1055 int32_t s, sq; 1056 int32_t t, tq; 1057 }; 1058 struct { 1059 int32_t v, q; 1060 } st[2]; 1061 }; 1062 } tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16))); 1063 1064 // XXX: we should have a special case when dwdx = 0 1065 1066 // 32 pixels spans works okay. 16 is a lot better, 1067 // but hey, it's a software renderer... 1068 const uint32_t SPAN_BITS = 5; 1069 const uint32_t ys = c->iterators.y; 1070 const uint32_t xs = c->iterators.xl; 1071 const uint32_t x1 = c->iterators.xr; 1072 const uint32_t xc = x1 - xs; 1073 uint32_t remainder = xc & ((1<<SPAN_BITS)-1); 1074 uint32_t numSpans = xc >> SPAN_BITS; 1075 1076 const iterators_t& ci = c->iterators; 1077 int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy; 1078 int32_t q0 = gglRecipQ(w0, 30); 1079 const int iwscale = 32 - gglClz(q0); 1080 1081 const int32_t dwdx = c->shade.dwdx << SPAN_BITS; 1082 int32_t xl = c->iterators.xl; 1083 1084 // We process s & t with a loop to reduce the code size 1085 // (and i-cache pressure). 1086 1087 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1088 const texture_t& tmu = c->state.texture[i]; 1089 if (!tmu.enable) continue; 1090 int32_t s = tmu.shade.is0 + 1091 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 1092 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 1093 int32_t t = tmu.shade.it0 + 1094 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 1095 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 1096 tc[i].s = s; 1097 tc[i].t = t; 1098 tc[i].sq = gglMulx(s, q0, iwscale); 1099 tc[i].tq = gglMulx(t, q0, iwscale); 1100 } 1101 1102 int32_t span = 0; 1103 do { 1104 int32_t w1; 1105 if (ggl_likely(numSpans)) { 1106 w1 = w0 + dwdx; 1107 } else { 1108 if (remainder) { 1109 // finish off the scanline... 1110 span = remainder; 1111 w1 = (c->shade.dwdx * span) + w0; 1112 } else { 1113 break; 1114 } 1115 } 1116 int32_t q1 = gglRecipQ(w1, 30); 1117 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1118 texture_t& tmu = c->state.texture[i]; 1119 if (!tmu.enable) continue; 1120 texture_iterators_t& ti = tmu.iterators; 1121 1122 for (int j=0 ; j<2 ; j++) { 1123 int32_t v = tc[i].st[j].v; 1124 if (span) v += (tmu.shade.st[j].dx)*span; 1125 else v += (tmu.shade.st[j].dx)<<SPAN_BITS; 1126 const int32_t v0 = tc[i].st[j].q; 1127 const int32_t v1 = gglMulx(v, q1, iwscale); 1128 int32_t dvdx = v1 - v0; 1129 if (span) dvdx /= span; 1130 else dvdx >>= SPAN_BITS; 1131 tc[i].st[j].v = v; 1132 tc[i].st[j].q = v1; 1133 1134 const int scale = ti.st[j].scale + (iwscale - 30); 1135 if (scale >= 0) { 1136 ti.st[j].ydvdy = v0 << scale; 1137 ti.st[j].dvdx = dvdx << scale; 1138 } else { 1139 ti.st[j].ydvdy = v0 >> -scale; 1140 ti.st[j].dvdx = dvdx >> -scale; 1141 } 1142 } 1143 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1144 gen.dsdx = ti.st[0].dvdx; 1145 gen.dtdx = ti.st[1].dvdx; 1146 } 1147 c->iterators.xl = xl; 1148 c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS)); 1149 w0 = w1; 1150 q0 = q1; 1151 c->span(c); 1152 } while(numSpans--); 1153} 1154 1155void scanline_perspective_single(context_t* c) 1156{ 1157 // 32 pixels spans works okay. 16 is a lot better, 1158 // but hey, it's a software renderer... 1159 const uint32_t SPAN_BITS = 5; 1160 const uint32_t ys = c->iterators.y; 1161 const uint32_t xs = c->iterators.xl; 1162 const uint32_t x1 = c->iterators.xr; 1163 const uint32_t xc = x1 - xs; 1164 1165 const iterators_t& ci = c->iterators; 1166 int32_t w = (xs * c->shade.dwdx) + ci.ydwdy; 1167 int32_t iw = gglRecipQ(w, 30); 1168 const int iwscale = 32 - gglClz(iw); 1169 1170 const int i = 31 - gglClz(c->state.enabled_tmu); 1171 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1172 texture_t& tmu = c->state.texture[i]; 1173 texture_iterators_t& ti = tmu.iterators; 1174 const int sscale = ti.sscale + (iwscale - 30); 1175 const int tscale = ti.tscale + (iwscale - 30); 1176 int32_t s = tmu.shade.is0 + 1177 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 1178 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 1179 int32_t t = tmu.shade.it0 + 1180 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 1181 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 1182 int32_t s0 = gglMulx(s, iw, iwscale); 1183 int32_t t0 = gglMulx(t, iw, iwscale); 1184 int32_t xl = c->iterators.xl; 1185 1186 int32_t sq, tq, dsdx, dtdx; 1187 int32_t premainder = xc & ((1<<SPAN_BITS)-1); 1188 uint32_t numSpans = xc >> SPAN_BITS; 1189 if (c->shade.dwdx == 0) { 1190 // XXX: we could choose to do this if the error is small enough 1191 numSpans = 0; 1192 premainder = xc; 1193 goto no_perspective; 1194 } 1195 1196 if (premainder) { 1197 w += c->shade.dwdx * premainder; 1198 iw = gglRecipQ(w, 30); 1199no_perspective: 1200 s += tmu.shade.idsdx * premainder; 1201 t += tmu.shade.idtdx * premainder; 1202 sq = gglMulx(s, iw, iwscale); 1203 tq = gglMulx(t, iw, iwscale); 1204 dsdx = (sq - s0) / premainder; 1205 dtdx = (tq - t0) / premainder; 1206 c->iterators.xl = xl; 1207 c->iterators.xr = xl = xl + premainder; 1208 goto finish; 1209 } 1210 1211 while (numSpans--) { 1212 w += c->shade.dwdx << SPAN_BITS; 1213 s += tmu.shade.idsdx << SPAN_BITS; 1214 t += tmu.shade.idtdx << SPAN_BITS; 1215 iw = gglRecipQ(w, 30); 1216 sq = gglMulx(s, iw, iwscale); 1217 tq = gglMulx(t, iw, iwscale); 1218 dsdx = (sq - s0) >> SPAN_BITS; 1219 dtdx = (tq - t0) >> SPAN_BITS; 1220 c->iterators.xl = xl; 1221 c->iterators.xr = xl = xl + (1<<SPAN_BITS); 1222finish: 1223 if (sscale >= 0) { 1224 ti.ydsdy = s0 << sscale; 1225 ti.dsdx = dsdx << sscale; 1226 } else { 1227 ti.ydsdy = s0 >>-sscale; 1228 ti.dsdx = dsdx >>-sscale; 1229 } 1230 if (tscale >= 0) { 1231 ti.ydtdy = t0 << tscale; 1232 ti.dtdx = dtdx << tscale; 1233 } else { 1234 ti.ydtdy = t0 >>-tscale; 1235 ti.dtdx = dtdx >>-tscale; 1236 } 1237 s0 = sq; 1238 t0 = tq; 1239 gen.dsdx = ti.dsdx; 1240 gen.dtdx = ti.dtdx; 1241 c->span(c); 1242 } 1243} 1244 1245// ---------------------------------------------------------------------------- 1246 1247void scanline_t32cb16(context_t* c) 1248{ 1249 int32_t x = c->iterators.xl; 1250 size_t ct = c->iterators.xr - x; 1251 int32_t y = c->iterators.y; 1252 surface_t* cb = &(c->state.buffers.color); 1253 union { 1254 uint16_t* dst; 1255 uint32_t* dst32; 1256 }; 1257 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1258 1259 surface_t* tex = &(c->state.texture[0].surface); 1260 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1261 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1262 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 1263 int sR, sG, sB; 1264 uint32_t s, d; 1265 1266 if (ct==1 || uint32_t(dst)&2) { 1267last_one: 1268 s = GGL_RGBA_TO_HOST( *src++ ); 1269 sR = (s >> ( 3))&0x1F; 1270 sG = (s >> ( 8+2))&0x3F; 1271 sB = (s >> (16+3))&0x1F; 1272 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 1273 ct--; 1274 } 1275 1276 while (ct > 0) { 1277 s = GGL_RGBA_TO_HOST( *src++ ); 1278 sR = (s >> ( 3))&0x1F; 1279 sG = (s >> ( 8+2))&0x3F; 1280 sB = (s >> (16+3))&0x1F; 1281 d = (sR<<11)|(sG<<5)|sB; 1282 1283 s = GGL_RGBA_TO_HOST( *src++ ); 1284 sR = (s >> ( 3))&0x1F; 1285 sG = (s >> ( 8+2))&0x3F; 1286 sB = (s >> (16+3))&0x1F; 1287 d |= ((sR<<11)|(sG<<5)|sB)<<16; 1288 1289#if BYTE_ORDER == BIG_ENDIAN 1290 d = (d>>16) | (d<<16); 1291#endif 1292 1293 *dst32++ = d; 1294 ct -= 2; 1295 } 1296 1297 if (ct > 0) { 1298 goto last_one; 1299 } 1300} 1301 1302void scanline_t32cb16blend(context_t* c) 1303{ 1304 int32_t x = c->iterators.xl; 1305 size_t ct = c->iterators.xr - x; 1306 int32_t y = c->iterators.y; 1307 surface_t* cb = &(c->state.buffers.color); 1308 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1309 1310 surface_t* tex = &(c->state.texture[0].surface); 1311 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1312 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1313 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 1314 1315#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) 1316 scanline_t32cb16blend_arm(dst, src, ct); 1317#else 1318 while (ct--) { 1319 uint32_t s = *src++; 1320 if (!s) { 1321 dst++; 1322 continue; 1323 } 1324 uint16_t d = *dst; 1325 s = GGL_RGBA_TO_HOST(s); 1326 int sR = (s >> ( 3))&0x1F; 1327 int sG = (s >> ( 8+2))&0x3F; 1328 int sB = (s >> (16+3))&0x1F; 1329 int sA = (s>>24); 1330 int f = 0x100 - (sA + (sA>>7)); 1331 int dR = (d>>11)&0x1f; 1332 int dG = (d>>5)&0x3f; 1333 int dB = (d)&0x1f; 1334 sR += (f*dR)>>8; 1335 sG += (f*dG)>>8; 1336 sB += (f*dB)>>8; 1337 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 1338 } 1339#endif 1340} 1341 1342void scanline_memcpy(context_t* c) 1343{ 1344 int32_t x = c->iterators.xl; 1345 size_t ct = c->iterators.xr - x; 1346 int32_t y = c->iterators.y; 1347 surface_t* cb = &(c->state.buffers.color); 1348 const GGLFormat* fp = &(c->formats[cb->format]); 1349 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1350 (x + (cb->stride * y)) * fp->size; 1351 1352 surface_t* tex = &(c->state.texture[0].surface); 1353 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1354 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1355 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 1356 (u + (tex->stride * v)) * fp->size; 1357 1358 const size_t size = ct * fp->size; 1359 memcpy(dst, src, size); 1360} 1361 1362void scanline_memset8(context_t* c) 1363{ 1364 int32_t x = c->iterators.xl; 1365 size_t ct = c->iterators.xr - x; 1366 int32_t y = c->iterators.y; 1367 surface_t* cb = &(c->state.buffers.color); 1368 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + (x+(cb->stride*y)); 1369 uint32_t packed = c->packed; 1370 memset(dst, packed, ct); 1371} 1372 1373void scanline_memset16(context_t* c) 1374{ 1375 int32_t x = c->iterators.xl; 1376 size_t ct = c->iterators.xr - x; 1377 int32_t y = c->iterators.y; 1378 surface_t* cb = &(c->state.buffers.color); 1379 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1380 uint32_t packed = c->packed; 1381 android_memset16(dst, packed, ct*2); 1382} 1383 1384void scanline_memset32(context_t* c) 1385{ 1386 int32_t x = c->iterators.xl; 1387 size_t ct = c->iterators.xr - x; 1388 int32_t y = c->iterators.y; 1389 surface_t* cb = &(c->state.buffers.color); 1390 uint32_t* dst = reinterpret_cast<uint32_t*>(cb->data) + (x+(cb->stride*y)); 1391 uint32_t packed = GGL_HOST_TO_RGBA(c->packed); 1392 android_memset32(dst, packed, ct*4); 1393} 1394 1395void scanline_clear(context_t* c) 1396{ 1397 int32_t x = c->iterators.xl; 1398 size_t ct = c->iterators.xr - x; 1399 int32_t y = c->iterators.y; 1400 surface_t* cb = &(c->state.buffers.color); 1401 const GGLFormat* fp = &(c->formats[cb->format]); 1402 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1403 (x + (cb->stride * y)) * fp->size; 1404 const size_t size = ct * fp->size; 1405 memset(dst, 0, size); 1406} 1407 1408void scanline_set(context_t* c) 1409{ 1410 int32_t x = c->iterators.xl; 1411 size_t ct = c->iterators.xr - x; 1412 int32_t y = c->iterators.y; 1413 surface_t* cb = &(c->state.buffers.color); 1414 const GGLFormat* fp = &(c->formats[cb->format]); 1415 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1416 (x + (cb->stride * y)) * fp->size; 1417 const size_t size = ct * fp->size; 1418 memset(dst, 0xFF, size); 1419} 1420 1421void scanline_noop(context_t* c) 1422{ 1423} 1424 1425void rect_generic(context_t* c, size_t yc) 1426{ 1427 do { 1428 c->scanline(c); 1429 c->step_y(c); 1430 } while (--yc); 1431} 1432 1433void rect_memcpy(context_t* c, size_t yc) 1434{ 1435 int32_t x = c->iterators.xl; 1436 size_t ct = c->iterators.xr - x; 1437 int32_t y = c->iterators.y; 1438 surface_t* cb = &(c->state.buffers.color); 1439 const GGLFormat* fp = &(c->formats[cb->format]); 1440 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1441 (x + (cb->stride * y)) * fp->size; 1442 1443 surface_t* tex = &(c->state.texture[0].surface); 1444 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1445 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1446 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 1447 (u + (tex->stride * v)) * fp->size; 1448 1449 if (cb->stride == tex->stride && ct == size_t(cb->stride)) { 1450 memcpy(dst, src, ct * fp->size * yc); 1451 } else { 1452 const size_t size = ct * fp->size; 1453 const size_t dbpr = cb->stride * fp->size; 1454 const size_t sbpr = tex->stride * fp->size; 1455 do { 1456 memcpy(dst, src, size); 1457 dst += dbpr; 1458 src += sbpr; 1459 } while (--yc); 1460 } 1461} 1462// ---------------------------------------------------------------------------- 1463}; // namespace android 1464 1465using namespace android; 1466extern "C" void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1) 1467{ 1468#if ANDROID_ARM_CODEGEN 1469 GGLContext* c; 1470 gglInit(&c); 1471 needs_t needs; 1472 needs.n = n; 1473 needs.p = p; 1474 needs.t[0] = t0; 1475 needs.t[1] = t1; 1476 sp<ScanlineAssembly> a(new ScanlineAssembly(needs, 1024)); 1477 GGLAssembler assembler( new ARMAssembler(a) ); 1478 int err = assembler.scanline(needs, (context_t*)c); 1479 if (err != 0) { 1480 printf("error %08x (%s)\n", err, strerror(-err)); 1481 } 1482 gglUninit(c); 1483#else 1484 printf("This test runs only on ARM\n"); 1485#endif 1486} 1487 1488