scanline.cpp revision c2659e72d7723f8dcb8659a1cb1d8ff3d6edae88
1/* libs/pixelflinger/scanline.cpp 2** 3** Copyright 2006-2011, The Android Open Source Project 4** 5** Licensed under the Apache License, Version 2.0 (the "License"); 6** you may not use this file except in compliance with the License. 7** You may obtain a copy of the License at 8** 9** http://www.apache.org/licenses/LICENSE-2.0 10** 11** Unless required by applicable law or agreed to in writing, software 12** distributed under the License is distributed on an "AS IS" BASIS, 13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14** See the License for the specific language governing permissions and 15** limitations under the License. 16*/ 17 18 19#define LOG_TAG "pixelflinger" 20 21#include <assert.h> 22#include <stdlib.h> 23#include <stdio.h> 24#include <string.h> 25 26#include <cutils/memory.h> 27#include <cutils/log.h> 28 29#include "buffer.h" 30#include "scanline.h" 31 32#include "codeflinger/CodeCache.h" 33#include "codeflinger/GGLAssembler.h" 34#if defined(__arm__) 35#include "codeflinger/ARMAssembler.h" 36#elif defined(__aarch64__) 37#include "codeflinger/Arm64Assembler.h" 38#elif defined(__mips__) 39#include "codeflinger/MIPSAssembler.h" 40#endif 41//#include "codeflinger/ARMAssemblerOptimizer.h" 42 43// ---------------------------------------------------------------------------- 44 45#define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline 46#define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic 47#define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic 48#define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen 49 50#ifdef NDEBUG 51# define ANDROID_RELEASE 52# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 53#else 54# define ANDROID_DEBUG 55# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 56#endif 57 58#if defined(__arm__) || defined(__mips__) || defined(__aarch64__) 59# define ANDROID_ARM_CODEGEN 1 60#else 61# define ANDROID_ARM_CODEGEN 0 62#endif 63 64#define DEBUG__CODEGEN_ONLY 0 65 66/* Set to 1 to dump to the log the states that need a new 67 * code-generated scanline callback, i.e. those that don't 68 * have a corresponding shortcut function. 69 */ 70#define DEBUG_NEEDS 0 71 72#ifdef __mips__ 73#define ASSEMBLY_SCRATCH_SIZE 4096 74#elif defined(__aarch64__) 75#define ASSEMBLY_SCRATCH_SIZE 8192 76#else 77#define ASSEMBLY_SCRATCH_SIZE 2048 78#endif 79 80// ---------------------------------------------------------------------------- 81namespace android { 82// ---------------------------------------------------------------------------- 83 84static void init_y(context_t*, int32_t); 85static void init_y_noop(context_t*, int32_t); 86static void init_y_packed(context_t*, int32_t); 87static void init_y_error(context_t*, int32_t); 88 89static void step_y__generic(context_t* c); 90static void step_y__nop(context_t*); 91static void step_y__smooth(context_t* c); 92static void step_y__tmu(context_t* c); 93static void step_y__w(context_t* c); 94 95static void scanline(context_t* c); 96static void scanline_perspective(context_t* c); 97static void scanline_perspective_single(context_t* c); 98static void scanline_t32cb16blend(context_t* c); 99static void scanline_t32cb16blend_dither(context_t* c); 100static void scanline_t32cb16blend_srca(context_t* c); 101static void scanline_t32cb16blend_clamp(context_t* c); 102static void scanline_t32cb16blend_clamp_dither(context_t* c); 103static void scanline_t32cb16blend_clamp_mod(context_t* c); 104static void scanline_x32cb16blend_clamp_mod(context_t* c); 105static void scanline_t32cb16blend_clamp_mod_dither(context_t* c); 106static void scanline_x32cb16blend_clamp_mod_dither(context_t* c); 107static void scanline_t32cb16(context_t* c); 108static void scanline_t32cb16_dither(context_t* c); 109static void scanline_t32cb16_clamp(context_t* c); 110static void scanline_t32cb16_clamp_dither(context_t* c); 111static void scanline_col32cb16blend(context_t* c); 112static void scanline_t16cb16_clamp(context_t* c); 113static void scanline_t16cb16blend_clamp_mod(context_t* c); 114static void scanline_memcpy(context_t* c); 115static void scanline_memset8(context_t* c); 116static void scanline_memset16(context_t* c); 117static void scanline_memset32(context_t* c); 118static void scanline_noop(context_t* c); 119static void scanline_set(context_t* c); 120static void scanline_clear(context_t* c); 121 122static void rect_generic(context_t* c, size_t yc); 123static void rect_memcpy(context_t* c, size_t yc); 124 125#if defined( __arm__) 126extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); 127extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); 128extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); 129extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); 130#elif defined(__aarch64__) 131extern "C" void scanline_t32cb16blend_arm64(uint16_t*, uint32_t*, size_t); 132extern "C" void scanline_col32cb16blend_arm64(uint16_t *dst, uint32_t col, size_t ct); 133#elif defined(__mips__) 134extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t); 135#endif 136 137// ---------------------------------------------------------------------------- 138 139static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix) 140{ 141 return uint16_t( ((pix << 8) & 0xf800) | 142 ((pix >> 5) & 0x07e0) | 143 ((pix >> 19) & 0x001f) ); 144} 145 146struct shortcut_t { 147 needs_filter_t filter; 148 const char* desc; 149 void (*scanline)(context_t*); 150 void (*init_y)(context_t*, int32_t); 151}; 152 153// Keep in sync with needs 154 155/* To understand the values here, have a look at: 156 * system/core/include/private/pixelflinger/ggl_context.h 157 * 158 * Especially the lines defining and using GGL_RESERVE_NEEDS 159 * 160 * Quick reminders: 161 * - the last nibble of the first value is the destination buffer format. 162 * - the last nibble of the third value is the source texture format 163 * - formats: 4=rgb565 1=abgr8888 2=xbgr8888 164 * 165 * In the descriptions below: 166 * 167 * SRC means we copy the source pixels to the destination 168 * 169 * SRC_OVER means we blend the source pixels to the destination 170 * with dstFactor = 1-srcA, srcFactor=1 (premultiplied source). 171 * This mode is otherwise called 'blend'. 172 * 173 * SRCA_OVER means we blend the source pixels to the destination 174 * with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source). 175 * This mode is otherwise called 'blend_srca' 176 * 177 * clamp means we fetch source pixels from a texture with u/v clamping 178 * 179 * mod means the source pixels are modulated (multiplied) by the 180 * a/r/g/b of the current context's color. Typically used for 181 * fade-in / fade-out. 182 * 183 * dither means we dither 32 bit values to 16 bits 184 */ 185static shortcut_t shortcuts[] = { 186 { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } }, 187 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 188 "565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop }, 189 { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } }, 190 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 191 "565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop }, 192 /* same as first entry, but with dithering */ 193 { { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } }, 194 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 195 "565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop }, 196 /* same as second entry, but with dithering */ 197 { { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } }, 198 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 199 "565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop }, 200 /* this is used during the boot animation - CHEAT: ignore dithering */ 201 { { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } }, 202 { 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } }, 203 "565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop }, 204 /* special case for arbitrary texture coordinates (think scaling) */ 205 { { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } }, 206 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 207 "565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y }, 208 { { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } }, 209 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 210 "565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y }, 211 /* another case used during emulation */ 212 { { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } }, 213 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 214 "565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y }, 215 /* and this */ 216 { { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } }, 217 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 218 "565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y }, 219 { { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } }, 220 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 221 "565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y }, 222 { { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } }, 223 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 224 "565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y }, 225 { { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } }, 226 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 227 "565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y }, 228 { { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } }, 229 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 230 "565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y }, 231 { { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } }, 232 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 233 "565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y }, 234 { { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } }, 235 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 236 "565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y }, 237 { { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } }, 238 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 239 "565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y }, 240 { { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } }, 241 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 242 "565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y }, 243 { { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } }, 244 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } }, 245 "565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed }, 246 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 247 { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } }, 248 "(nop) alpha test", scanline_noop, init_y_noop }, 249 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 250 { 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } }, 251 "(nop) depth test", scanline_noop, init_y_noop }, 252 { { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } }, 253 { 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 254 "(nop) logic_op", scanline_noop, init_y_noop }, 255 { { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } }, 256 { 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 257 "(nop) color mask", scanline_noop, init_y_noop }, 258 { { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } }, 259 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 260 "(set) logic_op", scanline_set, init_y_noop }, 261 { { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } }, 262 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 263 "(clear) logic_op", scanline_clear, init_y_noop }, 264 { { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } }, 265 { 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } }, 266 "(clear) blending 0/0", scanline_clear, init_y_noop }, 267 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 268 { 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } }, 269 "(error) invalid color-buffer format", scanline_noop, init_y_error }, 270}; 271static const needs_filter_t noblend1to1 = { 272 // (disregard dithering, see below) 273 { 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } }, 274 { 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } } 275}; 276static const needs_filter_t fill16noblend = { 277 { 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } }, 278 { 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } } 279}; 280 281// ---------------------------------------------------------------------------- 282 283#if ANDROID_ARM_CODEGEN 284 285#if defined(__mips__) 286static CodeCache gCodeCache(32 * 1024); 287#elif defined(__aarch64__) 288static CodeCache gCodeCache(48 * 1024); 289#else 290static CodeCache gCodeCache(12 * 1024); 291#endif 292 293class ScanlineAssembly : public Assembly { 294 AssemblyKey<needs_t> mKey; 295public: 296 ScanlineAssembly(needs_t needs, size_t size) 297 : Assembly(size), mKey(needs) { } 298 const AssemblyKey<needs_t>& key() const { return mKey; } 299}; 300#endif 301 302// ---------------------------------------------------------------------------- 303 304void ggl_init_scanline(context_t* c) 305{ 306 c->init_y = init_y; 307 c->step_y = step_y__generic; 308 c->scanline = scanline; 309} 310 311void ggl_uninit_scanline(context_t* c) 312{ 313 if (c->state.buffers.coverage) 314 free(c->state.buffers.coverage); 315#if ANDROID_ARM_CODEGEN 316 if (c->scanline_as) 317 c->scanline_as->decStrong(c); 318#endif 319} 320 321// ---------------------------------------------------------------------------- 322 323static void pick_scanline(context_t* c) 324{ 325#if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0)) 326 327#if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC 328 c->init_y = init_y; 329 c->step_y = step_y__generic; 330 c->scanline = scanline; 331 return; 332#endif 333 334 //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n", 335 // c->state.needs.n, c->state.needs.p, 336 // c->state.needs.t[0], c->state.needs.t[1]); 337 338 // first handle the special case that we cannot test with a filter 339 const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n); 340 if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) { 341 if (c->state.needs.match(noblend1to1)) { 342 // this will match regardless of dithering state, since both 343 // src and dest have the same format anyway, there is no dithering 344 // to be done. 345 const GGLFormat* f = 346 &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]); 347 if ((f->components == GGL_RGB) || 348 (f->components == GGL_RGBA) || 349 (f->components == GGL_LUMINANCE) || 350 (f->components == GGL_LUMINANCE_ALPHA)) 351 { 352 // format must have all of RGB components 353 // (so the current color doesn't show through) 354 c->scanline = scanline_memcpy; 355 c->init_y = init_y_noop; 356 return; 357 } 358 } 359 } 360 361 if (c->state.needs.match(fill16noblend)) { 362 c->init_y = init_y_packed; 363 switch (c->formats[cb_format].size) { 364 case 1: c->scanline = scanline_memset8; return; 365 case 2: c->scanline = scanline_memset16; return; 366 case 4: c->scanline = scanline_memset32; return; 367 } 368 } 369 370 const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t); 371 for (int i=0 ; i<numFilters ; i++) { 372 if (c->state.needs.match(shortcuts[i].filter)) { 373 c->scanline = shortcuts[i].scanline; 374 c->init_y = shortcuts[i].init_y; 375 return; 376 } 377 } 378 379#if DEBUG_NEEDS 380 ALOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x", 381 c->state.needs.n, c->state.needs.p, 382 c->state.needs.t[0], c->state.needs.t[1]); 383#endif 384 385#endif // DEBUG__CODEGEN_ONLY 386 387 c->init_y = init_y; 388 c->step_y = step_y__generic; 389 390#if ANDROID_ARM_CODEGEN 391 // we're going to have to generate some code... 392 // here, generate code for our pixel pipeline 393 const AssemblyKey<needs_t> key(c->state.needs); 394 sp<Assembly> assembly = gCodeCache.lookup(key); 395 if (assembly == 0) { 396 // create a new assembly region 397 sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, 398 ASSEMBLY_SCRATCH_SIZE); 399 // initialize our assembler 400#if defined(__arm__) 401 GGLAssembler assembler( new ARMAssembler(a) ); 402 //GGLAssembler assembler( 403 // new ARMAssemblerOptimizer(new ARMAssembler(a)) ); 404#endif 405#if defined(__mips__) 406 GGLAssembler assembler( new ArmToMipsAssembler(a) ); 407#elif defined(__aarch64__) 408 GGLAssembler assembler( new ArmToArm64Assembler(a) ); 409#endif 410 // generate the scanline code for the given needs 411 bool err = assembler.scanline(c->state.needs, c) != 0; 412 if (ggl_likely(!err)) { 413 // finally, cache this assembly 414 err = gCodeCache.cache(a->key(), a) < 0; 415 } 416 if (ggl_unlikely(err)) { 417 ALOGE("error generating or caching assembly. Reverting to NOP."); 418 c->scanline = scanline_noop; 419 c->init_y = init_y_noop; 420 c->step_y = step_y__nop; 421 return; 422 } 423 assembly = a; 424 } 425 426 // release the previous assembly 427 if (c->scanline_as) { 428 c->scanline_as->decStrong(c); 429 } 430 431 //ALOGI("using generated pixel-pipeline"); 432 c->scanline_as = assembly.get(); 433 c->scanline_as->incStrong(c); // hold on to assembly 434 c->scanline = (void(*)(context_t* c))assembly->base(); 435#else 436// ALOGW("using generic (slow) pixel-pipeline"); 437 c->scanline = scanline; 438#endif 439} 440 441void ggl_pick_scanline(context_t* c) 442{ 443 pick_scanline(c); 444 if ((c->state.enables & GGL_ENABLE_W) && 445 (c->state.enables & GGL_ENABLE_TMUS)) 446 { 447 c->span = c->scanline; 448 c->scanline = scanline_perspective; 449 if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) { 450 // only one TMU enabled 451 c->scanline = scanline_perspective_single; 452 } 453 } 454} 455 456// ---------------------------------------------------------------------------- 457 458static void blending(context_t* c, pixel_t* fragment, pixel_t* fb); 459static void blend_factor(context_t* c, pixel_t* r, uint32_t factor, 460 const pixel_t* src, const pixel_t* dst); 461static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv); 462 463#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 464 465// no need to compile the generic-pipeline, it can't be reached 466void scanline(context_t*) 467{ 468} 469 470#else 471 472void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv) 473{ 474 if (su && sv) { 475 if (su > sv) { 476 v = ggl_expand(v, sv, su); 477 sv = su; 478 } else if (su < sv) { 479 u = ggl_expand(u, su, sv); 480 su = sv; 481 } 482 } 483} 484 485void blending(context_t* c, pixel_t* fragment, pixel_t* fb) 486{ 487 rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]); 488 rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]); 489 rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]); 490 rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]); 491 492 pixel_t sf, df; 493 blend_factor(c, &sf, c->state.blend.src, fragment, fb); 494 blend_factor(c, &df, c->state.blend.dst, fragment, fb); 495 496 fragment->c[1] = 497 gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1])); 498 fragment->c[2] = 499 gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2])); 500 fragment->c[3] = 501 gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3])); 502 503 if (c->state.blend.alpha_separate) { 504 blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb); 505 blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb); 506 } 507 508 fragment->c[0] = 509 gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0])); 510 511 // clamp to 1.0 512 if (fragment->c[0] >= (1LU<<fragment->s[0])) 513 fragment->c[0] = (1<<fragment->s[0])-1; 514 if (fragment->c[1] >= (1LU<<fragment->s[1])) 515 fragment->c[1] = (1<<fragment->s[1])-1; 516 if (fragment->c[2] >= (1LU<<fragment->s[2])) 517 fragment->c[2] = (1<<fragment->s[2])-1; 518 if (fragment->c[3] >= (1LU<<fragment->s[3])) 519 fragment->c[3] = (1<<fragment->s[3])-1; 520} 521 522static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0) 523{ 524 if (!size) 525 return def; 526 527 // scale to 16 bits 528 if (size > 16) { 529 x >>= (size - 16); 530 } else if (size < 16) { 531 x = ggl_expand(x, size, 16); 532 } 533 x += x >> 15; 534 return x; 535} 536 537void blend_factor(context_t* /*c*/, pixel_t* r, 538 uint32_t factor, const pixel_t* src, const pixel_t* dst) 539{ 540 switch (factor) { 541 case GGL_ZERO: 542 r->c[1] = 543 r->c[2] = 544 r->c[3] = 545 r->c[0] = 0; 546 break; 547 case GGL_ONE: 548 r->c[1] = 549 r->c[2] = 550 r->c[3] = 551 r->c[0] = FIXED_ONE; 552 break; 553 case GGL_DST_COLOR: 554 r->c[1] = blendfactor(dst->c[1], dst->s[1]); 555 r->c[2] = blendfactor(dst->c[2], dst->s[2]); 556 r->c[3] = blendfactor(dst->c[3], dst->s[3]); 557 r->c[0] = blendfactor(dst->c[0], dst->s[0]); 558 break; 559 case GGL_SRC_COLOR: 560 r->c[1] = blendfactor(src->c[1], src->s[1]); 561 r->c[2] = blendfactor(src->c[2], src->s[2]); 562 r->c[3] = blendfactor(src->c[3], src->s[3]); 563 r->c[0] = blendfactor(src->c[0], src->s[0]); 564 break; 565 case GGL_ONE_MINUS_DST_COLOR: 566 r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]); 567 r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]); 568 r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]); 569 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]); 570 break; 571 case GGL_ONE_MINUS_SRC_COLOR: 572 r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]); 573 r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]); 574 r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]); 575 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]); 576 break; 577 case GGL_SRC_ALPHA: 578 r->c[1] = 579 r->c[2] = 580 r->c[3] = 581 r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE); 582 break; 583 case GGL_ONE_MINUS_SRC_ALPHA: 584 r->c[1] = 585 r->c[2] = 586 r->c[3] = 587 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE); 588 break; 589 case GGL_DST_ALPHA: 590 r->c[1] = 591 r->c[2] = 592 r->c[3] = 593 r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 594 break; 595 case GGL_ONE_MINUS_DST_ALPHA: 596 r->c[1] = 597 r->c[2] = 598 r->c[3] = 599 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 600 break; 601 case GGL_SRC_ALPHA_SATURATE: 602 // XXX: GGL_SRC_ALPHA_SATURATE 603 break; 604 } 605} 606 607static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap) 608{ 609 GGLfixed d; 610 if (tx_wrap == GGL_REPEAT) { 611 d = (uint32_t(coord)>>16) * size; 612 } else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics 613 const GGLfixed clamp_min = FIXED_HALF; 614 const GGLfixed clamp_max = (size << 16) - FIXED_HALF; 615 if (coord < clamp_min) coord = clamp_min; 616 if (coord > clamp_max) coord = clamp_max; 617 d = coord; 618 } else { // 1:1 619 const GGLfixed clamp_min = 0; 620 const GGLfixed clamp_max = (size << 16); 621 if (coord < clamp_min) coord = clamp_min; 622 if (coord > clamp_max) coord = clamp_max; 623 d = coord; 624 } 625 return d; 626} 627 628static inline 629GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len) 630{ 631 const int32_t end = dvdx * (len-1) + v; 632 if (end < 0) 633 v -= end; 634 v &= ~(v>>31); 635 return v; 636} 637 638void scanline(context_t* c) 639{ 640 const uint32_t enables = c->state.enables; 641 const int xs = c->iterators.xl; 642 const int x1 = c->iterators.xr; 643 int xc = x1 - xs; 644 const int16_t* covPtr = c->state.buffers.coverage + xs; 645 646 // All iterated values are sampled at the pixel center 647 648 // reset iterators for that scanline... 649 GGLcolor r, g, b, a; 650 iterators_t& ci = c->iterators; 651 if (enables & GGL_ENABLE_SMOOTH) { 652 r = (xs * c->shade.drdx) + ci.ydrdy; 653 g = (xs * c->shade.dgdx) + ci.ydgdy; 654 b = (xs * c->shade.dbdx) + ci.ydbdy; 655 a = (xs * c->shade.dadx) + ci.ydady; 656 r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc); 657 g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc); 658 b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc); 659 a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc); 660 } else { 661 r = ci.ydrdy; 662 g = ci.ydgdy; 663 b = ci.ydbdy; 664 a = ci.ydady; 665 } 666 667 // z iterators are 1.31 668 GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy; 669 GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy; 670 671 struct { 672 GGLfixed s, t; 673 } tc[GGL_TEXTURE_UNIT_COUNT]; 674 if (enables & GGL_ENABLE_TMUS) { 675 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 676 if (c->state.texture[i].enable) { 677 texture_iterators_t& ti = c->state.texture[i].iterators; 678 if (enables & GGL_ENABLE_W) { 679 tc[i].s = ti.ydsdy; 680 tc[i].t = ti.ydtdy; 681 } else { 682 tc[i].s = (xs * ti.dsdx) + ti.ydsdy; 683 tc[i].t = (xs * ti.dtdx) + ti.ydtdy; 684 } 685 } 686 } 687 } 688 689 pixel_t fragment; 690 pixel_t texel; 691 pixel_t fb; 692 693 uint32_t x = xs; 694 uint32_t y = c->iterators.y; 695 696 while (xc--) { 697 698 { // just a scope 699 700 // read color (convert to 8 bits by keeping only the integer part) 701 fragment.s[1] = fragment.s[2] = 702 fragment.s[3] = fragment.s[0] = 8; 703 fragment.c[1] = r >> (GGL_COLOR_BITS-8); 704 fragment.c[2] = g >> (GGL_COLOR_BITS-8); 705 fragment.c[3] = b >> (GGL_COLOR_BITS-8); 706 fragment.c[0] = a >> (GGL_COLOR_BITS-8); 707 708 // texturing 709 if (enables & GGL_ENABLE_TMUS) { 710 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 711 texture_t& tx = c->state.texture[i]; 712 if (!tx.enable) 713 continue; 714 texture_iterators_t& ti = tx.iterators; 715 int32_t u, v; 716 717 // s-coordinate 718 if (tx.s_coord != GGL_ONE_TO_ONE) { 719 const int w = tx.surface.width; 720 u = wrapping(tc[i].s, w, tx.s_wrap); 721 tc[i].s += ti.dsdx; 722 } else { 723 u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF; 724 } 725 726 // t-coordinate 727 if (tx.t_coord != GGL_ONE_TO_ONE) { 728 const int h = tx.surface.height; 729 v = wrapping(tc[i].t, h, tx.t_wrap); 730 tc[i].t += ti.dtdx; 731 } else { 732 v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF; 733 } 734 735 // read texture 736 if (tx.mag_filter == GGL_NEAREST && 737 tx.min_filter == GGL_NEAREST) 738 { 739 u >>= 16; 740 v >>= 16; 741 tx.surface.read(&tx.surface, c, u, v, &texel); 742 } else { 743 const int w = tx.surface.width; 744 const int h = tx.surface.height; 745 u -= FIXED_HALF; 746 v -= FIXED_HALF; 747 int u0 = u >> 16; 748 int v0 = v >> 16; 749 int u1 = u0 + 1; 750 int v1 = v0 + 1; 751 if (tx.s_wrap == GGL_REPEAT) { 752 if (u0<0) u0 += w; 753 if (u1<0) u1 += w; 754 if (u0>=w) u0 -= w; 755 if (u1>=w) u1 -= w; 756 } else { 757 if (u0<0) u0 = 0; 758 if (u1<0) u1 = 0; 759 if (u0>=w) u0 = w-1; 760 if (u1>=w) u1 = w-1; 761 } 762 if (tx.t_wrap == GGL_REPEAT) { 763 if (v0<0) v0 += h; 764 if (v1<0) v1 += h; 765 if (v0>=h) v0 -= h; 766 if (v1>=h) v1 -= h; 767 } else { 768 if (v0<0) v0 = 0; 769 if (v1<0) v1 = 0; 770 if (v0>=h) v0 = h-1; 771 if (v1>=h) v1 = h-1; 772 } 773 pixel_t texels[4]; 774 uint32_t mm[4]; 775 tx.surface.read(&tx.surface, c, u0, v0, &texels[0]); 776 tx.surface.read(&tx.surface, c, u0, v1, &texels[1]); 777 tx.surface.read(&tx.surface, c, u1, v0, &texels[2]); 778 tx.surface.read(&tx.surface, c, u1, v1, &texels[3]); 779 u = (u >> 12) & 0xF; 780 v = (v >> 12) & 0xF; 781 u += u>>3; 782 v += v>>3; 783 mm[0] = (0x10 - u) * (0x10 - v); 784 mm[1] = (0x10 - u) * v; 785 mm[2] = u * (0x10 - v); 786 mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]); 787 for (int j=0 ; j<4 ; j++) { 788 texel.s[j] = texels[0].s[j]; 789 if (!texel.s[j]) continue; 790 texel.s[j] += 8; 791 texel.c[j] = texels[0].c[j]*mm[0] + 792 texels[1].c[j]*mm[1] + 793 texels[2].c[j]*mm[2] + 794 texels[3].c[j]*mm[3] ; 795 } 796 } 797 798 // Texture environnement... 799 for (int j=0 ; j<4 ; j++) { 800 uint32_t& Cf = fragment.c[j]; 801 uint32_t& Ct = texel.c[j]; 802 uint8_t& sf = fragment.s[j]; 803 uint8_t& st = texel.s[j]; 804 uint32_t At = texel.c[0]; 805 uint8_t sat = texel.s[0]; 806 switch (tx.env) { 807 case GGL_REPLACE: 808 if (st) { 809 Cf = Ct; 810 sf = st; 811 } 812 break; 813 case GGL_MODULATE: 814 if (st) { 815 uint32_t factor = Ct + (Ct>>(st-1)); 816 Cf = (Cf * factor) >> st; 817 } 818 break; 819 case GGL_DECAL: 820 if (sat) { 821 rescale(Cf, sf, Ct, st); 822 Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat; 823 } 824 break; 825 case GGL_BLEND: 826 if (st) { 827 uint32_t Cc = tx.env_color[i]; 828 if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8; 829 else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf); 830 uint32_t factor = Ct + (Ct>>(st-1)); 831 Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st; 832 } 833 break; 834 case GGL_ADD: 835 if (st) { 836 rescale(Cf, sf, Ct, st); 837 Cf += Ct; 838 } 839 break; 840 } 841 } 842 } 843 } 844 845 // coverage application 846 if (enables & GGL_ENABLE_AA) { 847 int16_t cf = *covPtr++; 848 fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15; 849 } 850 851 // alpha-test 852 if (enables & GGL_ENABLE_ALPHA_TEST) { 853 GGLcolor ref = c->state.alpha_test.ref; 854 GGLcolor alpha = (uint64_t(fragment.c[0]) * 855 ((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1); 856 switch (c->state.alpha_test.func) { 857 case GGL_NEVER: goto discard; 858 case GGL_LESS: if (alpha<ref) break; goto discard; 859 case GGL_EQUAL: if (alpha==ref) break; goto discard; 860 case GGL_LEQUAL: if (alpha<=ref) break; goto discard; 861 case GGL_GREATER: if (alpha>ref) break; goto discard; 862 case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard; 863 case GGL_GEQUAL: if (alpha>=ref) break; goto discard; 864 } 865 } 866 867 // depth test 868 if (c->state.buffers.depth.format) { 869 if (enables & GGL_ENABLE_DEPTH_TEST) { 870 surface_t* cb = &(c->state.buffers.depth); 871 uint16_t* p = (uint16_t*)(cb->data)+(x+(cb->stride*y)); 872 uint16_t zz = uint32_t(z)>>(16); 873 uint16_t depth = *p; 874 switch (c->state.depth_test.func) { 875 case GGL_NEVER: goto discard; 876 case GGL_LESS: if (zz<depth) break; goto discard; 877 case GGL_EQUAL: if (zz==depth) break; goto discard; 878 case GGL_LEQUAL: if (zz<=depth) break; goto discard; 879 case GGL_GREATER: if (zz>depth) break; goto discard; 880 case GGL_NOTEQUAL: if (zz!=depth) break; goto discard; 881 case GGL_GEQUAL: if (zz>=depth) break; goto discard; 882 } 883 // depth buffer is not enabled, if depth-test is not enabled 884/* 885 fragment.s[1] = fragment.s[2] = 886 fragment.s[3] = fragment.s[0] = 8; 887 fragment.c[1] = 888 fragment.c[2] = 889 fragment.c[3] = 890 fragment.c[0] = 255 - (zz>>8); 891*/ 892 if (c->state.mask.depth) { 893 *p = zz; 894 } 895 } 896 } 897 898 // fog 899 if (enables & GGL_ENABLE_FOG) { 900 for (int i=1 ; i<=3 ; i++) { 901 GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF; 902 uint32_t& c = fragment.c[i]; 903 uint8_t& s = fragment.s[i]; 904 c = (c * 0x10000) / ((1<<s)-1); 905 c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f)); 906 s = 16; 907 } 908 } 909 910 // blending 911 if (enables & GGL_ENABLE_BLENDING) { 912 fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind 913 fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0; 914 c->state.buffers.color.read( 915 &(c->state.buffers.color), c, x, y, &fb); 916 blending( c, &fragment, &fb ); 917 } 918 919 // write 920 c->state.buffers.color.write( 921 &(c->state.buffers.color), c, x, y, &fragment); 922 } 923 924discard: 925 // iterate... 926 x += 1; 927 if (enables & GGL_ENABLE_SMOOTH) { 928 r += c->shade.drdx; 929 g += c->shade.dgdx; 930 b += c->shade.dbdx; 931 a += c->shade.dadx; 932 } 933 z += c->shade.dzdx; 934 f += c->shade.dfdx; 935 } 936} 937 938#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 939 940// ---------------------------------------------------------------------------- 941#if 0 942#pragma mark - 943#pragma mark Scanline 944#endif 945 946/* Used to parse a 32-bit source texture linearly. Usage is: 947 * 948 * horz_iterator32 hi(context); 949 * while (...) { 950 * uint32_t src_pixel = hi.get_pixel32(); 951 * ... 952 * } 953 * 954 * Use only for one-to-one texture mapping. 955 */ 956struct horz_iterator32 { 957 horz_iterator32(context_t* c) { 958 const int x = c->iterators.xl; 959 const int y = c->iterators.y; 960 texture_t& tx = c->state.texture[0]; 961 const int32_t u = (tx.shade.is0>>16) + x; 962 const int32_t v = (tx.shade.it0>>16) + y; 963 m_src = reinterpret_cast<uint32_t*>(tx.surface.data)+(u+(tx.surface.stride*v)); 964 } 965 uint32_t get_pixel32() { 966 return *m_src++; 967 } 968protected: 969 uint32_t* m_src; 970}; 971 972/* A variant for 16-bit source textures. */ 973struct horz_iterator16 { 974 horz_iterator16(context_t* c) { 975 const int x = c->iterators.xl; 976 const int y = c->iterators.y; 977 texture_t& tx = c->state.texture[0]; 978 const int32_t u = (tx.shade.is0>>16) + x; 979 const int32_t v = (tx.shade.it0>>16) + y; 980 m_src = reinterpret_cast<uint16_t*>(tx.surface.data)+(u+(tx.surface.stride*v)); 981 } 982 uint16_t get_pixel16() { 983 return *m_src++; 984 } 985protected: 986 uint16_t* m_src; 987}; 988 989/* A clamp iterator is used to iterate inside a texture with GGL_CLAMP. 990 * After initialization, call get_src16() or get_src32() to get the current 991 * texture pixel value. 992 */ 993struct clamp_iterator { 994 clamp_iterator(context_t* c) { 995 const int xs = c->iterators.xl; 996 texture_t& tx = c->state.texture[0]; 997 texture_iterators_t& ti = tx.iterators; 998 m_s = (xs * ti.dsdx) + ti.ydsdy; 999 m_t = (xs * ti.dtdx) + ti.ydtdy; 1000 m_ds = ti.dsdx; 1001 m_dt = ti.dtdx; 1002 m_width_m1 = tx.surface.width - 1; 1003 m_height_m1 = tx.surface.height - 1; 1004 m_data = tx.surface.data; 1005 m_stride = tx.surface.stride; 1006 } 1007 uint16_t get_pixel16() { 1008 int u, v; 1009 get_uv(u, v); 1010 uint16_t* src = reinterpret_cast<uint16_t*>(m_data) + (u + (m_stride*v)); 1011 return src[0]; 1012 } 1013 uint32_t get_pixel32() { 1014 int u, v; 1015 get_uv(u, v); 1016 uint32_t* src = reinterpret_cast<uint32_t*>(m_data) + (u + (m_stride*v)); 1017 return src[0]; 1018 } 1019private: 1020 void get_uv(int& u, int& v) { 1021 int uu = m_s >> 16; 1022 int vv = m_t >> 16; 1023 if (uu < 0) 1024 uu = 0; 1025 if (uu > m_width_m1) 1026 uu = m_width_m1; 1027 if (vv < 0) 1028 vv = 0; 1029 if (vv > m_height_m1) 1030 vv = m_height_m1; 1031 u = uu; 1032 v = vv; 1033 m_s += m_ds; 1034 m_t += m_dt; 1035 } 1036 1037 GGLfixed m_s, m_t; 1038 GGLfixed m_ds, m_dt; 1039 int m_width_m1, m_height_m1; 1040 uint8_t* m_data; 1041 int m_stride; 1042}; 1043 1044/* 1045 * The 'horizontal clamp iterator' variant corresponds to the case where 1046 * the 'v' coordinate doesn't change. This is useful to avoid one mult and 1047 * extra adds / checks per pixels, if the blending/processing operation after 1048 * this is very fast. 1049 */ 1050static int is_context_horizontal(const context_t* c) { 1051 return (c->state.texture[0].iterators.dtdx == 0); 1052} 1053 1054struct horz_clamp_iterator { 1055 uint16_t get_pixel16() { 1056 int u = m_s >> 16; 1057 m_s += m_ds; 1058 if (u < 0) 1059 u = 0; 1060 if (u > m_width_m1) 1061 u = m_width_m1; 1062 const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data); 1063 return src[u]; 1064 } 1065 uint32_t get_pixel32() { 1066 int u = m_s >> 16; 1067 m_s += m_ds; 1068 if (u < 0) 1069 u = 0; 1070 if (u > m_width_m1) 1071 u = m_width_m1; 1072 const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data); 1073 return src[u]; 1074 } 1075protected: 1076 void init(const context_t* c, int shift); 1077 GGLfixed m_s; 1078 GGLfixed m_ds; 1079 int m_width_m1; 1080 const uint8_t* m_data; 1081}; 1082 1083void horz_clamp_iterator::init(const context_t* c, int shift) 1084{ 1085 const int xs = c->iterators.xl; 1086 const texture_t& tx = c->state.texture[0]; 1087 const texture_iterators_t& ti = tx.iterators; 1088 m_s = (xs * ti.dsdx) + ti.ydsdy; 1089 m_ds = ti.dsdx; 1090 m_width_m1 = tx.surface.width-1; 1091 m_data = tx.surface.data; 1092 1093 GGLfixed t = (xs * ti.dtdx) + ti.ydtdy; 1094 int v = t >> 16; 1095 if (v < 0) 1096 v = 0; 1097 else if (v >= (int)tx.surface.height) 1098 v = (int)tx.surface.height-1; 1099 1100 m_data += (tx.surface.stride*v) << shift; 1101} 1102 1103struct horz_clamp_iterator16 : horz_clamp_iterator { 1104 horz_clamp_iterator16(const context_t* c) { 1105 init(c,1); 1106 }; 1107}; 1108 1109struct horz_clamp_iterator32 : horz_clamp_iterator { 1110 horz_clamp_iterator32(context_t* c) { 1111 init(c,2); 1112 }; 1113}; 1114 1115/* This is used to perform dithering operations. 1116 */ 1117struct ditherer { 1118 ditherer(const context_t* c) { 1119 const int x = c->iterators.xl; 1120 const int y = c->iterators.y; 1121 m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ]; 1122 m_index = x & GGL_DITHER_MASK; 1123 } 1124 void step(void) { 1125 m_index++; 1126 } 1127 int get_value(void) { 1128 int ret = m_line[m_index & GGL_DITHER_MASK]; 1129 m_index++; 1130 return ret; 1131 } 1132 uint16_t abgr8888ToRgb565(uint32_t s) { 1133 uint32_t r = s & 0xff; 1134 uint32_t g = (s >> 8) & 0xff; 1135 uint32_t b = (s >> 16) & 0xff; 1136 return rgb888ToRgb565(r,g,b); 1137 } 1138 /* The following assumes that r/g/b are in the 0..255 range each */ 1139 uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) { 1140 int threshold = get_value(); 1141 /* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */ 1142 r += (threshold >> (GGL_DITHER_BITS-8 +5)); 1143 g += (threshold >> (GGL_DITHER_BITS-8 +6)); 1144 b += (threshold >> (GGL_DITHER_BITS-8 +5)); 1145 if (r > 0xff) 1146 r = 0xff; 1147 if (g > 0xff) 1148 g = 0xff; 1149 if (b > 0xff) 1150 b = 0xff; 1151 return uint16_t(((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3)); 1152 } 1153protected: 1154 const uint8_t* m_line; 1155 int m_index; 1156}; 1157 1158/* This structure is used to blend (SRC_OVER) 32-bit source pixels 1159 * onto 16-bit destination ones. Usage is simply: 1160 * 1161 * blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>) 1162 */ 1163struct blender_32to16 { 1164 blender_32to16(context_t* /*c*/) { } 1165 void write(uint32_t s, uint16_t* dst) { 1166 if (s == 0) 1167 return; 1168 s = GGL_RGBA_TO_HOST(s); 1169 int sA = (s>>24); 1170 if (sA == 0xff) { 1171 *dst = convertAbgr8888ToRgb565(s); 1172 } else { 1173 int f = 0x100 - (sA + (sA>>7)); 1174 int sR = (s >> ( 3))&0x1F; 1175 int sG = (s >> ( 8+2))&0x3F; 1176 int sB = (s >> (16+3))&0x1F; 1177 uint16_t d = *dst; 1178 int dR = (d>>11)&0x1f; 1179 int dG = (d>>5)&0x3f; 1180 int dB = (d)&0x1f; 1181 sR += (f*dR)>>8; 1182 sG += (f*dG)>>8; 1183 sB += (f*dB)>>8; 1184 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1185 } 1186 } 1187 void write(uint32_t s, uint16_t* dst, ditherer& di) { 1188 if (s == 0) { 1189 di.step(); 1190 return; 1191 } 1192 s = GGL_RGBA_TO_HOST(s); 1193 int sA = (s>>24); 1194 if (sA == 0xff) { 1195 *dst = di.abgr8888ToRgb565(s); 1196 } else { 1197 int threshold = di.get_value() << (8 - GGL_DITHER_BITS); 1198 int f = 0x100 - (sA + (sA>>7)); 1199 int sR = (s >> ( 3))&0x1F; 1200 int sG = (s >> ( 8+2))&0x3F; 1201 int sB = (s >> (16+3))&0x1F; 1202 uint16_t d = *dst; 1203 int dR = (d>>11)&0x1f; 1204 int dG = (d>>5)&0x3f; 1205 int dB = (d)&0x1f; 1206 sR = ((sR << 8) + f*dR + threshold)>>8; 1207 sG = ((sG << 8) + f*dG + threshold)>>8; 1208 sB = ((sB << 8) + f*dB + threshold)>>8; 1209 if (sR > 0x1f) sR = 0x1f; 1210 if (sG > 0x3f) sG = 0x3f; 1211 if (sB > 0x1f) sB = 0x1f; 1212 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1213 } 1214 } 1215}; 1216 1217/* This blender does the same for the 'blend_srca' operation. 1218 * where dstFactor=srcA*(1-srcA) srcFactor=srcA 1219 */ 1220struct blender_32to16_srcA { 1221 blender_32to16_srcA(const context_t* /*c*/) { } 1222 void write(uint32_t s, uint16_t* dst) { 1223 if (!s) { 1224 return; 1225 } 1226 uint16_t d = *dst; 1227 s = GGL_RGBA_TO_HOST(s); 1228 int sR = (s >> ( 3))&0x1F; 1229 int sG = (s >> ( 8+2))&0x3F; 1230 int sB = (s >> (16+3))&0x1F; 1231 int sA = (s>>24); 1232 int f1 = (sA + (sA>>7)); 1233 int f2 = 0x100-f1; 1234 int dR = (d>>11)&0x1f; 1235 int dG = (d>>5)&0x3f; 1236 int dB = (d)&0x1f; 1237 sR = (f1*sR + f2*dR)>>8; 1238 sG = (f1*sG + f2*dG)>>8; 1239 sB = (f1*sB + f2*dB)>>8; 1240 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1241 } 1242}; 1243 1244/* Common init code the modulating blenders */ 1245struct blender_modulate { 1246 void init(const context_t* c) { 1247 const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8); 1248 const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8); 1249 const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8); 1250 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8); 1251 m_r = r + (r >> 7); 1252 m_g = g + (g >> 7); 1253 m_b = b + (b >> 7); 1254 m_a = a + (a >> 7); 1255 } 1256protected: 1257 int m_r, m_g, m_b, m_a; 1258}; 1259 1260/* This blender does a normal blend after modulation. 1261 */ 1262struct blender_32to16_modulate : blender_modulate { 1263 blender_32to16_modulate(const context_t* c) { 1264 init(c); 1265 } 1266 void write(uint32_t s, uint16_t* dst) { 1267 // blend source and destination 1268 if (!s) { 1269 return; 1270 } 1271 s = GGL_RGBA_TO_HOST(s); 1272 1273 /* We need to modulate s */ 1274 uint32_t sA = (s >> 24); 1275 uint32_t sB = (s >> 16) & 0xff; 1276 uint32_t sG = (s >> 8) & 0xff; 1277 uint32_t sR = s & 0xff; 1278 1279 sA = (sA*m_a) >> 8; 1280 /* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */ 1281 sR = (sR*m_r) >> (8 - 5); 1282 sG = (sG*m_g) >> (8 - 6); 1283 sB = (sB*m_b) >> (8 - 5); 1284 1285 /* Now do a normal blend */ 1286 int f = 0x100 - (sA + (sA>>7)); 1287 uint16_t d = *dst; 1288 int dR = (d>>11)&0x1f; 1289 int dG = (d>>5)&0x3f; 1290 int dB = (d)&0x1f; 1291 sR = (sR + f*dR)>>8; 1292 sG = (sG + f*dG)>>8; 1293 sB = (sB + f*dB)>>8; 1294 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1295 } 1296 void write(uint32_t s, uint16_t* dst, ditherer& di) { 1297 // blend source and destination 1298 if (!s) { 1299 di.step(); 1300 return; 1301 } 1302 s = GGL_RGBA_TO_HOST(s); 1303 1304 /* We need to modulate s */ 1305 uint32_t sA = (s >> 24); 1306 uint32_t sB = (s >> 16) & 0xff; 1307 uint32_t sG = (s >> 8) & 0xff; 1308 uint32_t sR = s & 0xff; 1309 1310 sA = (sA*m_a) >> 8; 1311 /* keep R/G/B scaled to 5.8 or 6.8 fixed float format */ 1312 sR = (sR*m_r) >> (8 - 5); 1313 sG = (sG*m_g) >> (8 - 6); 1314 sB = (sB*m_b) >> (8 - 5); 1315 1316 /* Scale threshold to 0.8 fixed float format */ 1317 int threshold = di.get_value() << (8 - GGL_DITHER_BITS); 1318 int f = 0x100 - (sA + (sA>>7)); 1319 uint16_t d = *dst; 1320 int dR = (d>>11)&0x1f; 1321 int dG = (d>>5)&0x3f; 1322 int dB = (d)&0x1f; 1323 sR = (sR + f*dR + threshold)>>8; 1324 sG = (sG + f*dG + threshold)>>8; 1325 sB = (sB + f*dB + threshold)>>8; 1326 if (sR > 0x1f) sR = 0x1f; 1327 if (sG > 0x3f) sG = 0x3f; 1328 if (sB > 0x1f) sB = 0x1f; 1329 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1330 } 1331}; 1332 1333/* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */ 1334struct blender_x32to16_modulate : blender_modulate { 1335 blender_x32to16_modulate(const context_t* c) { 1336 init(c); 1337 } 1338 void write(uint32_t s, uint16_t* dst) { 1339 s = GGL_RGBA_TO_HOST(s); 1340 1341 uint32_t sB = (s >> 16) & 0xff; 1342 uint32_t sG = (s >> 8) & 0xff; 1343 uint32_t sR = s & 0xff; 1344 1345 /* Keep R/G/B in 5.8 or 6.8 format */ 1346 sR = (sR*m_r) >> (8 - 5); 1347 sG = (sG*m_g) >> (8 - 6); 1348 sB = (sB*m_b) >> (8 - 5); 1349 1350 int f = 0x100 - m_a; 1351 uint16_t d = *dst; 1352 int dR = (d>>11)&0x1f; 1353 int dG = (d>>5)&0x3f; 1354 int dB = (d)&0x1f; 1355 sR = (sR + f*dR)>>8; 1356 sG = (sG + f*dG)>>8; 1357 sB = (sB + f*dB)>>8; 1358 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1359 } 1360 void write(uint32_t s, uint16_t* dst, ditherer& di) { 1361 s = GGL_RGBA_TO_HOST(s); 1362 1363 uint32_t sB = (s >> 16) & 0xff; 1364 uint32_t sG = (s >> 8) & 0xff; 1365 uint32_t sR = s & 0xff; 1366 1367 sR = (sR*m_r) >> (8 - 5); 1368 sG = (sG*m_g) >> (8 - 6); 1369 sB = (sB*m_b) >> (8 - 5); 1370 1371 /* Now do a normal blend */ 1372 int threshold = di.get_value() << (8 - GGL_DITHER_BITS); 1373 int f = 0x100 - m_a; 1374 uint16_t d = *dst; 1375 int dR = (d>>11)&0x1f; 1376 int dG = (d>>5)&0x3f; 1377 int dB = (d)&0x1f; 1378 sR = (sR + f*dR + threshold)>>8; 1379 sG = (sG + f*dG + threshold)>>8; 1380 sB = (sB + f*dB + threshold)>>8; 1381 if (sR > 0x1f) sR = 0x1f; 1382 if (sG > 0x3f) sG = 0x3f; 1383 if (sB > 0x1f) sB = 0x1f; 1384 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1385 } 1386}; 1387 1388/* Same as above, but source is 16bit rgb565 */ 1389struct blender_16to16_modulate : blender_modulate { 1390 blender_16to16_modulate(const context_t* c) { 1391 init(c); 1392 } 1393 void write(uint16_t s16, uint16_t* dst) { 1394 uint32_t s = s16; 1395 1396 uint32_t sR = s >> 11; 1397 uint32_t sG = (s >> 5) & 0x3f; 1398 uint32_t sB = s & 0x1f; 1399 1400 sR = (sR*m_r); 1401 sG = (sG*m_g); 1402 sB = (sB*m_b); 1403 1404 int f = 0x100 - m_a; 1405 uint16_t d = *dst; 1406 int dR = (d>>11)&0x1f; 1407 int dG = (d>>5)&0x3f; 1408 int dB = (d)&0x1f; 1409 sR = (sR + f*dR)>>8; 1410 sG = (sG + f*dG)>>8; 1411 sB = (sB + f*dB)>>8; 1412 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1413 } 1414}; 1415 1416/* This is used to iterate over a 16-bit destination color buffer. 1417 * Usage is: 1418 * 1419 * dst_iterator16 di(context); 1420 * while (di.count--) { 1421 * <do stuff with dest pixel at di.dst> 1422 * di.dst++; 1423 * } 1424 */ 1425struct dst_iterator16 { 1426 dst_iterator16(const context_t* c) { 1427 const int x = c->iterators.xl; 1428 const int width = c->iterators.xr - x; 1429 const int32_t y = c->iterators.y; 1430 const surface_t* cb = &(c->state.buffers.color); 1431 count = width; 1432 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1433 } 1434 int count; 1435 uint16_t* dst; 1436}; 1437 1438 1439static void scanline_t32cb16_clamp(context_t* c) 1440{ 1441 dst_iterator16 di(c); 1442 1443 if (is_context_horizontal(c)) { 1444 /* Special case for simple horizontal scaling */ 1445 horz_clamp_iterator32 ci(c); 1446 while (di.count--) { 1447 uint32_t s = ci.get_pixel32(); 1448 *di.dst++ = convertAbgr8888ToRgb565(s); 1449 } 1450 } else { 1451 /* General case */ 1452 clamp_iterator ci(c); 1453 while (di.count--) { 1454 uint32_t s = ci.get_pixel32(); 1455 *di.dst++ = convertAbgr8888ToRgb565(s); 1456 } 1457 } 1458} 1459 1460static void scanline_t32cb16_dither(context_t* c) 1461{ 1462 horz_iterator32 si(c); 1463 dst_iterator16 di(c); 1464 ditherer dither(c); 1465 1466 while (di.count--) { 1467 uint32_t s = si.get_pixel32(); 1468 *di.dst++ = dither.abgr8888ToRgb565(s); 1469 } 1470} 1471 1472static void scanline_t32cb16_clamp_dither(context_t* c) 1473{ 1474 dst_iterator16 di(c); 1475 ditherer dither(c); 1476 1477 if (is_context_horizontal(c)) { 1478 /* Special case for simple horizontal scaling */ 1479 horz_clamp_iterator32 ci(c); 1480 while (di.count--) { 1481 uint32_t s = ci.get_pixel32(); 1482 *di.dst++ = dither.abgr8888ToRgb565(s); 1483 } 1484 } else { 1485 /* General case */ 1486 clamp_iterator ci(c); 1487 while (di.count--) { 1488 uint32_t s = ci.get_pixel32(); 1489 *di.dst++ = dither.abgr8888ToRgb565(s); 1490 } 1491 } 1492} 1493 1494static void scanline_t32cb16blend_dither(context_t* c) 1495{ 1496 dst_iterator16 di(c); 1497 ditherer dither(c); 1498 blender_32to16 bl(c); 1499 horz_iterator32 hi(c); 1500 while (di.count--) { 1501 uint32_t s = hi.get_pixel32(); 1502 bl.write(s, di.dst, dither); 1503 di.dst++; 1504 } 1505} 1506 1507static void scanline_t32cb16blend_clamp(context_t* c) 1508{ 1509 dst_iterator16 di(c); 1510 blender_32to16 bl(c); 1511 1512 if (is_context_horizontal(c)) { 1513 horz_clamp_iterator32 ci(c); 1514 while (di.count--) { 1515 uint32_t s = ci.get_pixel32(); 1516 bl.write(s, di.dst); 1517 di.dst++; 1518 } 1519 } else { 1520 clamp_iterator ci(c); 1521 while (di.count--) { 1522 uint32_t s = ci.get_pixel32(); 1523 bl.write(s, di.dst); 1524 di.dst++; 1525 } 1526 } 1527} 1528 1529static void scanline_t32cb16blend_clamp_dither(context_t* c) 1530{ 1531 dst_iterator16 di(c); 1532 ditherer dither(c); 1533 blender_32to16 bl(c); 1534 1535 clamp_iterator ci(c); 1536 while (di.count--) { 1537 uint32_t s = ci.get_pixel32(); 1538 bl.write(s, di.dst, dither); 1539 di.dst++; 1540 } 1541} 1542 1543void scanline_t32cb16blend_clamp_mod(context_t* c) 1544{ 1545 dst_iterator16 di(c); 1546 blender_32to16_modulate bl(c); 1547 1548 clamp_iterator ci(c); 1549 while (di.count--) { 1550 uint32_t s = ci.get_pixel32(); 1551 bl.write(s, di.dst); 1552 di.dst++; 1553 } 1554} 1555 1556void scanline_t32cb16blend_clamp_mod_dither(context_t* c) 1557{ 1558 dst_iterator16 di(c); 1559 blender_32to16_modulate bl(c); 1560 ditherer dither(c); 1561 1562 clamp_iterator ci(c); 1563 while (di.count--) { 1564 uint32_t s = ci.get_pixel32(); 1565 bl.write(s, di.dst, dither); 1566 di.dst++; 1567 } 1568} 1569 1570/* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */ 1571void scanline_x32cb16blend_clamp_mod(context_t* c) 1572{ 1573 dst_iterator16 di(c); 1574 blender_x32to16_modulate bl(c); 1575 1576 clamp_iterator ci(c); 1577 while (di.count--) { 1578 uint32_t s = ci.get_pixel32(); 1579 bl.write(s, di.dst); 1580 di.dst++; 1581 } 1582} 1583 1584void scanline_x32cb16blend_clamp_mod_dither(context_t* c) 1585{ 1586 dst_iterator16 di(c); 1587 blender_x32to16_modulate bl(c); 1588 ditherer dither(c); 1589 1590 clamp_iterator ci(c); 1591 while (di.count--) { 1592 uint32_t s = ci.get_pixel32(); 1593 bl.write(s, di.dst, dither); 1594 di.dst++; 1595 } 1596} 1597 1598void scanline_t16cb16_clamp(context_t* c) 1599{ 1600 dst_iterator16 di(c); 1601 1602 /* Special case for simple horizontal scaling */ 1603 if (is_context_horizontal(c)) { 1604 horz_clamp_iterator16 ci(c); 1605 while (di.count--) { 1606 *di.dst++ = ci.get_pixel16(); 1607 } 1608 } else { 1609 clamp_iterator ci(c); 1610 while (di.count--) { 1611 *di.dst++ = ci.get_pixel16(); 1612 } 1613 } 1614} 1615 1616 1617 1618template <typename T, typename U> 1619static inline __attribute__((const)) 1620T interpolate(int y, T v0, U dvdx, U dvdy) { 1621 // interpolates in pixel's centers 1622 // v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx) 1623 return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1)); 1624} 1625 1626// ---------------------------------------------------------------------------- 1627#if 0 1628#pragma mark - 1629#endif 1630 1631void init_y(context_t* c, int32_t ys) 1632{ 1633 const uint32_t enables = c->state.enables; 1634 1635 // compute iterators... 1636 iterators_t& ci = c->iterators; 1637 1638 // sample in the center 1639 ci.y = ys; 1640 1641 if (enables & (GGL_ENABLE_DEPTH_TEST|GGL_ENABLE_W|GGL_ENABLE_FOG)) { 1642 ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy); 1643 ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy); 1644 ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy); 1645 } 1646 1647 if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) { 1648 ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy); 1649 ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy); 1650 ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy); 1651 ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady); 1652 c->step_y = step_y__smooth; 1653 } else { 1654 ci.ydrdy = c->shade.r0; 1655 ci.ydgdy = c->shade.g0; 1656 ci.ydbdy = c->shade.b0; 1657 ci.ydady = c->shade.a0; 1658 // XXX: do only if needed, or make sure this is fast 1659 c->packed = ggl_pack_color(c, c->state.buffers.color.format, 1660 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 1661 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, 1662 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 1663 } 1664 1665 // initialize the variables we need in the shader 1666 generated_vars_t& gen = c->generated_vars; 1667 gen.argb[GGLFormat::ALPHA].c = ci.ydady; 1668 gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx; 1669 gen.argb[GGLFormat::RED ].c = ci.ydrdy; 1670 gen.argb[GGLFormat::RED ].dx = c->shade.drdx; 1671 gen.argb[GGLFormat::GREEN].c = ci.ydgdy; 1672 gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx; 1673 gen.argb[GGLFormat::BLUE ].c = ci.ydbdy; 1674 gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx; 1675 gen.dzdx = c->shade.dzdx; 1676 gen.f = ci.ydfdy; 1677 gen.dfdx = c->shade.dfdx; 1678 1679 if (enables & GGL_ENABLE_TMUS) { 1680 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1681 texture_t& t = c->state.texture[i]; 1682 if (!t.enable) continue; 1683 1684 texture_iterators_t& ti = t.iterators; 1685 if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) { 1686 // we need to set all of these to 0 because in some cases 1687 // step_y__generic() or step_y__tmu() will be used and 1688 // therefore will update dtdy, however, in 1:1 mode 1689 // this is always done by the scanline rasterizer. 1690 ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0; 1691 ti.ydsdy = t.shade.is0; 1692 ti.ydtdy = t.shade.it0; 1693 } else { 1694 const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16); 1695 const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16); 1696 ti.sscale = t.shade.sscale + adjustSWrap; 1697 ti.tscale = t.shade.tscale + adjustTWrap; 1698 if (!(enables & GGL_ENABLE_W)) { 1699 // S coordinate 1700 const int32_t sscale = ti.sscale; 1701 const int32_t sy = interpolate(ys, 1702 t.shade.is0, t.shade.idsdx, t.shade.idsdy); 1703 if (sscale>=0) { 1704 ti.ydsdy= sy << sscale; 1705 ti.dsdx = t.shade.idsdx << sscale; 1706 ti.dsdy = t.shade.idsdy << sscale; 1707 } else { 1708 ti.ydsdy= sy >> -sscale; 1709 ti.dsdx = t.shade.idsdx >> -sscale; 1710 ti.dsdy = t.shade.idsdy >> -sscale; 1711 } 1712 // T coordinate 1713 const int32_t tscale = ti.tscale; 1714 const int32_t ty = interpolate(ys, 1715 t.shade.it0, t.shade.idtdx, t.shade.idtdy); 1716 if (tscale>=0) { 1717 ti.ydtdy= ty << tscale; 1718 ti.dtdx = t.shade.idtdx << tscale; 1719 ti.dtdy = t.shade.idtdy << tscale; 1720 } else { 1721 ti.ydtdy= ty >> -tscale; 1722 ti.dtdx = t.shade.idtdx >> -tscale; 1723 ti.dtdy = t.shade.idtdy >> -tscale; 1724 } 1725 } 1726 } 1727 // mirror for generated code... 1728 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1729 gen.width = t.surface.width; 1730 gen.height = t.surface.height; 1731 gen.stride = t.surface.stride; 1732 gen.data = uintptr_t(t.surface.data); 1733 gen.dsdx = ti.dsdx; 1734 gen.dtdx = ti.dtdx; 1735 } 1736 } 1737 1738 // choose the y-stepper 1739 c->step_y = step_y__nop; 1740 if (enables & GGL_ENABLE_FOG) { 1741 c->step_y = step_y__generic; 1742 } else if (enables & GGL_ENABLE_TMUS) { 1743 if (enables & GGL_ENABLE_SMOOTH) { 1744 c->step_y = step_y__generic; 1745 } else if (enables & GGL_ENABLE_W) { 1746 c->step_y = step_y__w; 1747 } else { 1748 c->step_y = step_y__tmu; 1749 } 1750 } else { 1751 if (enables & GGL_ENABLE_SMOOTH) { 1752 c->step_y = step_y__smooth; 1753 } 1754 } 1755 1756 // choose the rectangle blitter 1757 c->rect = rect_generic; 1758 if ((c->step_y == step_y__nop) && 1759 (c->scanline == scanline_memcpy)) 1760 { 1761 c->rect = rect_memcpy; 1762 } 1763} 1764 1765void init_y_packed(context_t* c, int32_t y0) 1766{ 1767 uint8_t f = c->state.buffers.color.format; 1768 c->packed = ggl_pack_color(c, f, 1769 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); 1770 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, 1771 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); 1772 c->iterators.y = y0; 1773 c->step_y = step_y__nop; 1774 // choose the rectangle blitter 1775 c->rect = rect_generic; 1776 if (c->scanline == scanline_memcpy) { 1777 c->rect = rect_memcpy; 1778 } 1779} 1780 1781void init_y_noop(context_t* c, int32_t y0) 1782{ 1783 c->iterators.y = y0; 1784 c->step_y = step_y__nop; 1785 // choose the rectangle blitter 1786 c->rect = rect_generic; 1787 if (c->scanline == scanline_memcpy) { 1788 c->rect = rect_memcpy; 1789 } 1790} 1791 1792void init_y_error(context_t* c, int32_t y0) 1793{ 1794 // woooops, shoud never happen, 1795 // fail gracefully (don't display anything) 1796 init_y_noop(c, y0); 1797 ALOGE("color-buffer has an invalid format!"); 1798} 1799 1800// ---------------------------------------------------------------------------- 1801#if 0 1802#pragma mark - 1803#endif 1804 1805void step_y__generic(context_t* c) 1806{ 1807 const uint32_t enables = c->state.enables; 1808 1809 // iterate... 1810 iterators_t& ci = c->iterators; 1811 ci.y += 1; 1812 1813 if (enables & GGL_ENABLE_SMOOTH) { 1814 ci.ydrdy += c->shade.drdy; 1815 ci.ydgdy += c->shade.dgdy; 1816 ci.ydbdy += c->shade.dbdy; 1817 ci.ydady += c->shade.dady; 1818 } 1819 1820 const uint32_t mask = 1821 GGL_ENABLE_DEPTH_TEST | 1822 GGL_ENABLE_W | 1823 GGL_ENABLE_FOG; 1824 if (enables & mask) { 1825 ci.ydzdy += c->shade.dzdy; 1826 ci.ydwdy += c->shade.dwdy; 1827 ci.ydfdy += c->shade.dfdy; 1828 } 1829 1830 if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) { 1831 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1832 if (c->state.texture[i].enable) { 1833 texture_iterators_t& ti = c->state.texture[i].iterators; 1834 ti.ydsdy += ti.dsdy; 1835 ti.ydtdy += ti.dtdy; 1836 } 1837 } 1838 } 1839} 1840 1841void step_y__nop(context_t* c) 1842{ 1843 c->iterators.y += 1; 1844 c->iterators.ydzdy += c->shade.dzdy; 1845} 1846 1847void step_y__smooth(context_t* c) 1848{ 1849 iterators_t& ci = c->iterators; 1850 ci.y += 1; 1851 ci.ydrdy += c->shade.drdy; 1852 ci.ydgdy += c->shade.dgdy; 1853 ci.ydbdy += c->shade.dbdy; 1854 ci.ydady += c->shade.dady; 1855 ci.ydzdy += c->shade.dzdy; 1856} 1857 1858void step_y__w(context_t* c) 1859{ 1860 iterators_t& ci = c->iterators; 1861 ci.y += 1; 1862 ci.ydzdy += c->shade.dzdy; 1863 ci.ydwdy += c->shade.dwdy; 1864} 1865 1866void step_y__tmu(context_t* c) 1867{ 1868 iterators_t& ci = c->iterators; 1869 ci.y += 1; 1870 ci.ydzdy += c->shade.dzdy; 1871 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1872 if (c->state.texture[i].enable) { 1873 texture_iterators_t& ti = c->state.texture[i].iterators; 1874 ti.ydsdy += ti.dsdy; 1875 ti.ydtdy += ti.dtdy; 1876 } 1877 } 1878} 1879 1880// ---------------------------------------------------------------------------- 1881#if 0 1882#pragma mark - 1883#endif 1884 1885void scanline_perspective(context_t* c) 1886{ 1887 struct { 1888 union { 1889 struct { 1890 int32_t s, sq; 1891 int32_t t, tq; 1892 } sqtq; 1893 struct { 1894 int32_t v, q; 1895 } st[2]; 1896 }; 1897 } tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16))); 1898 1899 // XXX: we should have a special case when dwdx = 0 1900 1901 // 32 pixels spans works okay. 16 is a lot better, 1902 // but hey, it's a software renderer... 1903 const uint32_t SPAN_BITS = 5; 1904 const uint32_t ys = c->iterators.y; 1905 const uint32_t xs = c->iterators.xl; 1906 const uint32_t x1 = c->iterators.xr; 1907 const uint32_t xc = x1 - xs; 1908 uint32_t remainder = xc & ((1<<SPAN_BITS)-1); 1909 uint32_t numSpans = xc >> SPAN_BITS; 1910 1911 const iterators_t& ci = c->iterators; 1912 int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy; 1913 int32_t q0 = gglRecipQ(w0, 30); 1914 const int iwscale = 32 - gglClz(q0); 1915 1916 const int32_t dwdx = c->shade.dwdx << SPAN_BITS; 1917 int32_t xl = c->iterators.xl; 1918 1919 // We process s & t with a loop to reduce the code size 1920 // (and i-cache pressure). 1921 1922 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1923 const texture_t& tmu = c->state.texture[i]; 1924 if (!tmu.enable) continue; 1925 int32_t s = tmu.shade.is0 + 1926 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 1927 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 1928 int32_t t = tmu.shade.it0 + 1929 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 1930 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 1931 tc[i].sqtq.s = s; 1932 tc[i].sqtq.t = t; 1933 tc[i].sqtq.sq = gglMulx(s, q0, iwscale); 1934 tc[i].sqtq.tq = gglMulx(t, q0, iwscale); 1935 } 1936 1937 int32_t span = 0; 1938 do { 1939 int32_t w1; 1940 if (ggl_likely(numSpans)) { 1941 w1 = w0 + dwdx; 1942 } else { 1943 if (remainder) { 1944 // finish off the scanline... 1945 span = remainder; 1946 w1 = (c->shade.dwdx * span) + w0; 1947 } else { 1948 break; 1949 } 1950 } 1951 int32_t q1 = gglRecipQ(w1, 30); 1952 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1953 texture_t& tmu = c->state.texture[i]; 1954 if (!tmu.enable) continue; 1955 texture_iterators_t& ti = tmu.iterators; 1956 1957 for (int j=0 ; j<2 ; j++) { 1958 int32_t v = tc[i].st[j].v; 1959 if (span) v += (tmu.shade.st[j].dx)*span; 1960 else v += (tmu.shade.st[j].dx)<<SPAN_BITS; 1961 const int32_t v0 = tc[i].st[j].q; 1962 const int32_t v1 = gglMulx(v, q1, iwscale); 1963 int32_t dvdx = v1 - v0; 1964 if (span) dvdx /= span; 1965 else dvdx >>= SPAN_BITS; 1966 tc[i].st[j].v = v; 1967 tc[i].st[j].q = v1; 1968 1969 const int scale = ti.st[j].scale + (iwscale - 30); 1970 if (scale >= 0) { 1971 ti.st[j].ydvdy = v0 << scale; 1972 ti.st[j].dvdx = dvdx << scale; 1973 } else { 1974 ti.st[j].ydvdy = v0 >> -scale; 1975 ti.st[j].dvdx = dvdx >> -scale; 1976 } 1977 } 1978 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1979 gen.dsdx = ti.st[0].dvdx; 1980 gen.dtdx = ti.st[1].dvdx; 1981 } 1982 c->iterators.xl = xl; 1983 c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS)); 1984 w0 = w1; 1985 q0 = q1; 1986 c->span(c); 1987 } while(numSpans--); 1988} 1989 1990void scanline_perspective_single(context_t* c) 1991{ 1992 // 32 pixels spans works okay. 16 is a lot better, 1993 // but hey, it's a software renderer... 1994 const uint32_t SPAN_BITS = 5; 1995 const uint32_t ys = c->iterators.y; 1996 const uint32_t xs = c->iterators.xl; 1997 const uint32_t x1 = c->iterators.xr; 1998 const uint32_t xc = x1 - xs; 1999 2000 const iterators_t& ci = c->iterators; 2001 int32_t w = (xs * c->shade.dwdx) + ci.ydwdy; 2002 int32_t iw = gglRecipQ(w, 30); 2003 const int iwscale = 32 - gglClz(iw); 2004 2005 const int i = 31 - gglClz(c->state.enabled_tmu); 2006 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 2007 texture_t& tmu = c->state.texture[i]; 2008 texture_iterators_t& ti = tmu.iterators; 2009 const int sscale = ti.sscale + (iwscale - 30); 2010 const int tscale = ti.tscale + (iwscale - 30); 2011 int32_t s = tmu.shade.is0 + 2012 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 2013 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 2014 int32_t t = tmu.shade.it0 + 2015 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 2016 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 2017 int32_t s0 = gglMulx(s, iw, iwscale); 2018 int32_t t0 = gglMulx(t, iw, iwscale); 2019 int32_t xl = c->iterators.xl; 2020 2021 int32_t sq, tq, dsdx, dtdx; 2022 int32_t premainder = xc & ((1<<SPAN_BITS)-1); 2023 uint32_t numSpans = xc >> SPAN_BITS; 2024 if (c->shade.dwdx == 0) { 2025 // XXX: we could choose to do this if the error is small enough 2026 numSpans = 0; 2027 premainder = xc; 2028 goto no_perspective; 2029 } 2030 2031 if (premainder) { 2032 w += c->shade.dwdx * premainder; 2033 iw = gglRecipQ(w, 30); 2034no_perspective: 2035 s += tmu.shade.idsdx * premainder; 2036 t += tmu.shade.idtdx * premainder; 2037 sq = gglMulx(s, iw, iwscale); 2038 tq = gglMulx(t, iw, iwscale); 2039 dsdx = (sq - s0) / premainder; 2040 dtdx = (tq - t0) / premainder; 2041 c->iterators.xl = xl; 2042 c->iterators.xr = xl = xl + premainder; 2043 goto finish; 2044 } 2045 2046 while (numSpans--) { 2047 w += c->shade.dwdx << SPAN_BITS; 2048 s += tmu.shade.idsdx << SPAN_BITS; 2049 t += tmu.shade.idtdx << SPAN_BITS; 2050 iw = gglRecipQ(w, 30); 2051 sq = gglMulx(s, iw, iwscale); 2052 tq = gglMulx(t, iw, iwscale); 2053 dsdx = (sq - s0) >> SPAN_BITS; 2054 dtdx = (tq - t0) >> SPAN_BITS; 2055 c->iterators.xl = xl; 2056 c->iterators.xr = xl = xl + (1<<SPAN_BITS); 2057finish: 2058 if (sscale >= 0) { 2059 ti.ydsdy = s0 << sscale; 2060 ti.dsdx = dsdx << sscale; 2061 } else { 2062 ti.ydsdy = s0 >>-sscale; 2063 ti.dsdx = dsdx >>-sscale; 2064 } 2065 if (tscale >= 0) { 2066 ti.ydtdy = t0 << tscale; 2067 ti.dtdx = dtdx << tscale; 2068 } else { 2069 ti.ydtdy = t0 >>-tscale; 2070 ti.dtdx = dtdx >>-tscale; 2071 } 2072 s0 = sq; 2073 t0 = tq; 2074 gen.dsdx = ti.dsdx; 2075 gen.dtdx = ti.dtdx; 2076 c->span(c); 2077 } 2078} 2079 2080// ---------------------------------------------------------------------------- 2081 2082void scanline_col32cb16blend(context_t* c) 2083{ 2084 int32_t x = c->iterators.xl; 2085 size_t ct = c->iterators.xr - x; 2086 int32_t y = c->iterators.y; 2087 surface_t* cb = &(c->state.buffers.color); 2088 union { 2089 uint16_t* dst; 2090 uint32_t* dst32; 2091 }; 2092 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 2093 2094#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) 2095#if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 2096 scanline_col32cb16blend_neon(dst, &(c->packed8888), ct); 2097#else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 2098 scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); 2099#endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 2100#elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__aarch64__)) 2101 scanline_col32cb16blend_arm64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); 2102#else 2103 uint32_t s = GGL_RGBA_TO_HOST(c->packed8888); 2104 int sA = (s>>24); 2105 int f = 0x100 - (sA + (sA>>7)); 2106 while (ct--) { 2107 uint16_t d = *dst; 2108 int dR = (d>>11)&0x1f; 2109 int dG = (d>>5)&0x3f; 2110 int dB = (d)&0x1f; 2111 int sR = (s >> ( 3))&0x1F; 2112 int sG = (s >> ( 8+2))&0x3F; 2113 int sB = (s >> (16+3))&0x1F; 2114 sR += (f*dR)>>8; 2115 sG += (f*dG)>>8; 2116 sB += (f*dB)>>8; 2117 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 2118 } 2119#endif 2120 2121} 2122 2123void scanline_t32cb16(context_t* c) 2124{ 2125 int32_t x = c->iterators.xl; 2126 size_t ct = c->iterators.xr - x; 2127 int32_t y = c->iterators.y; 2128 surface_t* cb = &(c->state.buffers.color); 2129 union { 2130 uint16_t* dst; 2131 uint32_t* dst32; 2132 }; 2133 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 2134 2135 surface_t* tex = &(c->state.texture[0].surface); 2136 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 2137 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 2138 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 2139 int sR, sG, sB; 2140 uint32_t s, d; 2141 2142 if (ct==1 || uintptr_t(dst)&2) { 2143last_one: 2144 s = GGL_RGBA_TO_HOST( *src++ ); 2145 *dst++ = convertAbgr8888ToRgb565(s); 2146 ct--; 2147 } 2148 2149 while (ct >= 2) { 2150#if BYTE_ORDER == BIG_ENDIAN 2151 s = GGL_RGBA_TO_HOST( *src++ ); 2152 d = convertAbgr8888ToRgb565_hi16(s); 2153 2154 s = GGL_RGBA_TO_HOST( *src++ ); 2155 d |= convertAbgr8888ToRgb565(s); 2156#else 2157 s = GGL_RGBA_TO_HOST( *src++ ); 2158 d = convertAbgr8888ToRgb565(s); 2159 2160 s = GGL_RGBA_TO_HOST( *src++ ); 2161 d |= convertAbgr8888ToRgb565(s) << 16; 2162#endif 2163 *dst32++ = d; 2164 ct -= 2; 2165 } 2166 2167 if (ct > 0) { 2168 goto last_one; 2169 } 2170} 2171 2172void scanline_t32cb16blend(context_t* c) 2173{ 2174#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips__) || defined(__aarch64__))) 2175 int32_t x = c->iterators.xl; 2176 size_t ct = c->iterators.xr - x; 2177 int32_t y = c->iterators.y; 2178 surface_t* cb = &(c->state.buffers.color); 2179 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 2180 2181 surface_t* tex = &(c->state.texture[0].surface); 2182 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 2183 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 2184 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 2185 2186#ifdef __arm__ 2187 scanline_t32cb16blend_arm(dst, src, ct); 2188#elif defined(__aarch64__) 2189 scanline_t32cb16blend_arm64(dst, src, ct); 2190#elif defined(__mips__) 2191 scanline_t32cb16blend_mips(dst, src, ct); 2192#endif 2193#else 2194 dst_iterator16 di(c); 2195 horz_iterator32 hi(c); 2196 blender_32to16 bl(c); 2197 while (di.count--) { 2198 uint32_t s = hi.get_pixel32(); 2199 bl.write(s, di.dst); 2200 di.dst++; 2201 } 2202#endif 2203} 2204 2205void scanline_t32cb16blend_srca(context_t* c) 2206{ 2207 dst_iterator16 di(c); 2208 horz_iterator32 hi(c); 2209 blender_32to16_srcA blender(c); 2210 2211 while (di.count--) { 2212 uint32_t s = hi.get_pixel32(); 2213 blender.write(s,di.dst); 2214 di.dst++; 2215 } 2216} 2217 2218void scanline_t16cb16blend_clamp_mod(context_t* c) 2219{ 2220 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8); 2221 if (a == 0) { 2222 return; 2223 } 2224 2225 if (a == 255) { 2226 scanline_t16cb16_clamp(c); 2227 return; 2228 } 2229 2230 dst_iterator16 di(c); 2231 blender_16to16_modulate blender(c); 2232 clamp_iterator ci(c); 2233 2234 while (di.count--) { 2235 uint16_t s = ci.get_pixel16(); 2236 blender.write(s, di.dst); 2237 di.dst++; 2238 } 2239} 2240 2241void scanline_memcpy(context_t* c) 2242{ 2243 int32_t x = c->iterators.xl; 2244 size_t ct = c->iterators.xr - x; 2245 int32_t y = c->iterators.y; 2246 surface_t* cb = &(c->state.buffers.color); 2247 const GGLFormat* fp = &(c->formats[cb->format]); 2248 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 2249 (x + (cb->stride * y)) * fp->size; 2250 2251 surface_t* tex = &(c->state.texture[0].surface); 2252 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 2253 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 2254 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 2255 (u + (tex->stride * v)) * fp->size; 2256 2257 const size_t size = ct * fp->size; 2258 memcpy(dst, src, size); 2259} 2260 2261void scanline_memset8(context_t* c) 2262{ 2263 int32_t x = c->iterators.xl; 2264 size_t ct = c->iterators.xr - x; 2265 int32_t y = c->iterators.y; 2266 surface_t* cb = &(c->state.buffers.color); 2267 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + (x+(cb->stride*y)); 2268 uint32_t packed = c->packed; 2269 memset(dst, packed, ct); 2270} 2271 2272void scanline_memset16(context_t* c) 2273{ 2274 int32_t x = c->iterators.xl; 2275 size_t ct = c->iterators.xr - x; 2276 int32_t y = c->iterators.y; 2277 surface_t* cb = &(c->state.buffers.color); 2278 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 2279 uint32_t packed = c->packed; 2280 android_memset16(dst, packed, ct*2); 2281} 2282 2283void scanline_memset32(context_t* c) 2284{ 2285 int32_t x = c->iterators.xl; 2286 size_t ct = c->iterators.xr - x; 2287 int32_t y = c->iterators.y; 2288 surface_t* cb = &(c->state.buffers.color); 2289 uint32_t* dst = reinterpret_cast<uint32_t*>(cb->data) + (x+(cb->stride*y)); 2290 uint32_t packed = GGL_HOST_TO_RGBA(c->packed); 2291 android_memset32(dst, packed, ct*4); 2292} 2293 2294void scanline_clear(context_t* c) 2295{ 2296 int32_t x = c->iterators.xl; 2297 size_t ct = c->iterators.xr - x; 2298 int32_t y = c->iterators.y; 2299 surface_t* cb = &(c->state.buffers.color); 2300 const GGLFormat* fp = &(c->formats[cb->format]); 2301 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 2302 (x + (cb->stride * y)) * fp->size; 2303 const size_t size = ct * fp->size; 2304 memset(dst, 0, size); 2305} 2306 2307void scanline_set(context_t* c) 2308{ 2309 int32_t x = c->iterators.xl; 2310 size_t ct = c->iterators.xr - x; 2311 int32_t y = c->iterators.y; 2312 surface_t* cb = &(c->state.buffers.color); 2313 const GGLFormat* fp = &(c->formats[cb->format]); 2314 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 2315 (x + (cb->stride * y)) * fp->size; 2316 const size_t size = ct * fp->size; 2317 memset(dst, 0xFF, size); 2318} 2319 2320void scanline_noop(context_t* /*c*/) 2321{ 2322} 2323 2324void rect_generic(context_t* c, size_t yc) 2325{ 2326 do { 2327 c->scanline(c); 2328 c->step_y(c); 2329 } while (--yc); 2330} 2331 2332void rect_memcpy(context_t* c, size_t yc) 2333{ 2334 int32_t x = c->iterators.xl; 2335 size_t ct = c->iterators.xr - x; 2336 int32_t y = c->iterators.y; 2337 surface_t* cb = &(c->state.buffers.color); 2338 const GGLFormat* fp = &(c->formats[cb->format]); 2339 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 2340 (x + (cb->stride * y)) * fp->size; 2341 2342 surface_t* tex = &(c->state.texture[0].surface); 2343 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 2344 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 2345 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 2346 (u + (tex->stride * v)) * fp->size; 2347 2348 if (cb->stride == tex->stride && ct == size_t(cb->stride)) { 2349 memcpy(dst, src, ct * fp->size * yc); 2350 } else { 2351 const size_t size = ct * fp->size; 2352 const size_t dbpr = cb->stride * fp->size; 2353 const size_t sbpr = tex->stride * fp->size; 2354 do { 2355 memcpy(dst, src, size); 2356 dst += dbpr; 2357 src += sbpr; 2358 } while (--yc); 2359 } 2360} 2361// ---------------------------------------------------------------------------- 2362}; // namespace android 2363 2364