texcompress_fxt1.c revision 3bdc8535fc7be3e0524e9dd728ef9f63532fd789
1/* 2 * Mesa 3-D graphics library 3 * Version: 6.1 4 * 5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25 26/** 27 * \file texcompress_fxt1.c 28 * GL_EXT_texture_compression_fxt1 support. 29 */ 30 31 32#include "glheader.h" 33#include "imports.h" 34#include "colormac.h" 35#include "context.h" 36#include "convolve.h" 37#include "image.h" 38#include "texcompress.h" 39#include "texformat.h" 40#include "texstore.h" 41 42 43int 44fxt1_encode (GLcontext *ctx, 45 unsigned int width, unsigned int height, 46 int srcFormat, 47 const void *source, int srcRowStride, 48 void *dest, int destRowStride); 49void 50fxt1_decode_1 (const void *texture, int width, 51 int i, int j, unsigned char *rgba); 52 53 54/** 55 * Called during context initialization. 56 */ 57void 58_mesa_init_texture_fxt1( GLcontext *ctx ) 59{ 60} 61 62 63/** 64 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture. 65 */ 66static GLboolean 67texstore_rgb_fxt1(STORE_PARAMS) 68{ 69 const GLchan *pixels; 70 GLint srcRowStride; 71 GLubyte *dst; 72 const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */ 73 const GLchan *tempImage = NULL; 74 75 ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1); 76 ASSERT(dstXoffset % 8 == 0); 77 ASSERT(dstYoffset % 4 == 0); 78 ASSERT(dstZoffset == 0); 79 80 if (srcFormat != GL_RGB || 81 srcType != CHAN_TYPE || 82 ctx->_ImageTransferState || 83 srcPacking->SwapBytes) { 84 /* convert image to RGB/GLchan */ 85 tempImage = _mesa_make_temp_chan_image(ctx, dims, 86 baseInternalFormat, 87 dstFormat->BaseFormat, 88 srcWidth, srcHeight, srcDepth, 89 srcFormat, srcType, srcAddr, 90 srcPacking); 91 if (!tempImage) 92 return GL_FALSE; /* out of memory */ 93 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight); 94 pixels = tempImage; 95 srcRowStride = 3 * srcWidth; 96 srcFormat = GL_RGB; 97 } 98 else { 99 pixels = (const GLchan *) srcAddr; 100 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, 101 srcType) / sizeof(GLchan); 102 } 103 104 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0, 105 GL_COMPRESSED_RGB_FXT1_3DFX, 106 texWidth, (GLubyte *) dstAddr); 107 108 fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride, 109 dst, dstRowStride); 110 111 if (tempImage) 112 _mesa_free((void*) tempImage); 113 114 return GL_TRUE; 115} 116 117 118/** 119 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture. 120 */ 121static GLboolean 122texstore_rgba_fxt1(STORE_PARAMS) 123{ 124 const GLchan *pixels; 125 GLint srcRowStride; 126 GLubyte *dst; 127 GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */ 128 const GLchan *tempImage = NULL; 129 130 ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1); 131 ASSERT(dstXoffset % 8 == 0); 132 ASSERT(dstYoffset % 4 == 0); 133 ASSERT(dstZoffset == 0); 134 135 if (srcFormat != GL_RGBA || 136 srcType != CHAN_TYPE || 137 ctx->_ImageTransferState || 138 srcPacking->SwapBytes) { 139 /* convert image to RGBA/GLchan */ 140 tempImage = _mesa_make_temp_chan_image(ctx, dims, 141 baseInternalFormat, 142 dstFormat->BaseFormat, 143 srcWidth, srcHeight, srcDepth, 144 srcFormat, srcType, srcAddr, 145 srcPacking); 146 if (!tempImage) 147 return GL_FALSE; /* out of memory */ 148 _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight); 149 pixels = tempImage; 150 srcRowStride = 4 * srcWidth; 151 srcFormat = GL_RGBA; 152 } 153 else { 154 pixels = (const GLchan *) srcAddr; 155 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, 156 srcType) / sizeof(GLchan); 157 } 158 159 dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0, 160 GL_COMPRESSED_RGBA_FXT1_3DFX, 161 texWidth, (GLubyte *) dstAddr); 162 163 fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride, 164 dst, dstRowStride); 165 166 if (tempImage) 167 _mesa_free((void*) tempImage); 168 169 return GL_TRUE; 170} 171 172 173static void 174fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage, 175 GLint i, GLint j, GLint k, GLchan *texel ) 176{ 177 fxt1_decode_1(texImage->Data, texImage->Width, i, j, texel); 178} 179 180 181static void 182fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage, 183 GLint i, GLint j, GLint k, GLfloat *texel ) 184{ 185 /* just sample as GLchan and convert to float here */ 186 GLchan rgba[4]; 187 fxt1_decode_1(texImage->Data, texImage->Width, i, j, rgba); 188 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]); 189 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]); 190 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]); 191 texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]); 192} 193 194 195static void 196fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage, 197 GLint i, GLint j, GLint k, GLchan *texel ) 198{ 199 fxt1_decode_1(texImage->Data, texImage->Width, i, j, texel); 200 texel[ACOMP] = 255; 201} 202 203 204static void 205fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage, 206 GLint i, GLint j, GLint k, GLfloat *texel ) 207{ 208 /* just sample as GLchan and convert to float here */ 209 GLchan rgba[4]; 210 fxt1_decode_1(texImage->Data, texImage->Width, i, j, rgba); 211 texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]); 212 texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]); 213 texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]); 214 texel[ACOMP] = 1.0; 215} 216 217 218 219const struct gl_texture_format _mesa_texformat_rgb_fxt1 = { 220 MESA_FORMAT_RGB_FXT1, /* MesaFormat */ 221 GL_RGB, /* BaseFormat */ 222 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */ 223 4, /*approx*/ /* RedBits */ 224 4, /*approx*/ /* GreenBits */ 225 4, /*approx*/ /* BlueBits */ 226 0, /* AlphaBits */ 227 0, /* LuminanceBits */ 228 0, /* IntensityBits */ 229 0, /* IndexBits */ 230 0, /* DepthBits */ 231 0, /* TexelBytes */ 232 texstore_rgb_fxt1, /* StoreTexImageFunc */ 233 NULL, /*impossible*/ /* FetchTexel1D */ 234 fetch_texel_2d_rgb_fxt1, /* FetchTexel2D */ 235 NULL, /*impossible*/ /* FetchTexel3D */ 236 NULL, /*impossible*/ /* FetchTexel1Df */ 237 fetch_texel_2d_f_rgb_fxt1, /* FetchTexel2Df */ 238 NULL, /*impossible*/ /* FetchTexel3Df */ 239}; 240 241const struct gl_texture_format _mesa_texformat_rgba_fxt1 = { 242 MESA_FORMAT_RGBA_FXT1, /* MesaFormat */ 243 GL_RGBA, /* BaseFormat */ 244 GL_UNSIGNED_NORMALIZED_ARB, /* DataType */ 245 4, /*approx*/ /* RedBits */ 246 4, /*approx*/ /* GreenBits */ 247 4, /*approx*/ /* BlueBits */ 248 1, /*approx*/ /* AlphaBits */ 249 0, /* LuminanceBits */ 250 0, /* IntensityBits */ 251 0, /* IndexBits */ 252 0, /* DepthBits */ 253 0, /* TexelBytes */ 254 texstore_rgba_fxt1, /* StoreTexImageFunc */ 255 NULL, /*impossible*/ /* FetchTexel1D */ 256 fetch_texel_2d_rgba_fxt1, /* FetchTexel2D */ 257 NULL, /*impossible*/ /* FetchTexel3D */ 258 NULL, /*impossible*/ /* FetchTexel1Df */ 259 fetch_texel_2d_f_rgba_fxt1, /* FetchTexel2Df */ 260 NULL, /*impossible*/ /* FetchTexel3Df */ 261}; 262 263 264/***************************************************************************\ 265 * FXT1 encoder 266 * 267 * The encoder was built by reversing the decoder, 268 * and is vaguely based on Texus2 by 3dfx. Note that this code 269 * is merely a proof of concept, since it is higly UNoptimized; 270 * moreover, it is sub-optimal due to inital conditions passed 271 * to Lloyd's algorithm (the interpolation modes are worse). 272\***************************************************************************/ 273 274 275#define MAX_COMP 4 /* ever needed maximum number of components in texel */ 276#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */ 277#define N_TEXELS 32 /* number of texels in a block (always 32) */ 278#define LL_N_REP 50 /* number of iterations in lloyd's vq */ 279#define LL_RMS_D 10 /* fault tolerance (maximum delta) */ 280#define LL_RMS_E 255 /* fault tolerance (maximum error) */ 281#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */ 282#define ISTBLACK(v) (*((unsigned long *)(v)) == 0) 283 284 285#ifdef __GNUC__ 286 287#define FX64_NATIVE 1 288 289typedef unsigned long long Fx64; 290 291#define FX64_MOV32(a, b) a = b; 292#define FX64_OR32(a, b) a |= b; 293#define FX64_SHL(a, c) a <<= c; 294 295#else /* !__GNUC__ */ 296 297#define FX64_NATIVE 0 298 299typedef struct { 300 unsigned long lo, hi; 301} Fx64; 302 303#define FX64_MOV32(a, b) a.lo = b 304#define FX64_OR32(a, b) a.lo |= b 305 306#define FX64_SHL(a, c) \ 307 do { \ 308 if ((c) >= 32) { \ 309 a.hi = a.lo << ((c) - 32); \ 310 a.lo = 0; \ 311 } else { \ 312 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \ 313 a.lo <<= (c); \ 314 } \ 315 } while (0) 316 317#endif /* !__GNUC__ */ 318 319 320static int 321fxt1_bestcol (float vec[][MAX_COMP], int nv, 322 unsigned char input[MAX_COMP], int nc) 323{ 324 int i, j, best = -1; 325 float err = 1e9; /* big enough */ 326 327 for (j = 0; j < nv; j++) { 328 float e = 0; 329 for (i = 0; i < nc; i++) { 330 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]); 331 } 332 if (e < err) { 333 err = e; 334 best = j; 335 } 336 } 337 338 return best; 339} 340 341 342static int 343fxt1_worst (float vec[MAX_COMP], 344 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n) 345{ 346 int i, k, worst = -1; 347 float err = -1; /* small enough */ 348 349 for (k = 0; k < n; k++) { 350 float e = 0; 351 for (i = 0; i < nc; i++) { 352 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]); 353 } 354 if (e > err) { 355 err = e; 356 worst = k; 357 } 358 } 359 360 return worst; 361} 362 363 364static int 365fxt1_variance (double variance[MAX_COMP], 366 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n) 367{ 368 int i, k, best = 0; 369 int sx, sx2; 370 double var, maxvar = -1; /* small enough */ 371 double teenth = 1.0 / n; 372 373 for (i = 0; i < nc; i++) { 374 sx = sx2 = 0; 375 for (k = 0; k < n; k++) { 376 int t = input[k][i]; 377 sx += t; 378 sx2 += t * t; 379 } 380 var = sx2 * teenth - sx * sx * teenth * teenth; 381 if (maxvar < var) { 382 maxvar = var; 383 best = i; 384 } 385 if (variance) { 386 variance[i] = var; 387 } 388 } 389 390 return best; 391} 392 393 394static int 395fxt1_choose (float vec[][MAX_COMP], int nv, 396 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n) 397{ 398#if 0 399 /* Choose colors from a grid. 400 */ 401 int i, j; 402 403 for (j = 0; j < nv; j++) { 404 int m = j * (n - 1) / (nv - 1); 405 for (i = 0; i < nc; i++) { 406 vec[j][i] = input[m][i]; 407 } 408 } 409#else 410 /* Our solution here is to find the darkest and brightest colors in 411 * the 8x4 tile and use those as the two representative colors. 412 * There are probably better algorithms to use (histogram-based). 413 */ 414 int i, j, k; 415 int minSum = 1000; /* big enough */ 416 int maxSum = -1; /* small enough */ 417 int minCol; 418 int maxCol; 419 420 struct { 421 int flag; 422 int key; 423 int freq; 424 int idx; 425 } hist[N_TEXELS]; 426 int lenh = 0; 427 428 memset(hist, 0, sizeof(hist)); 429 430 for (k = 0; k < n; k++) { 431 int l; 432 int key = 0; 433 int sum = 0; 434 for (i = 0; i < nc; i++) { 435 key <<= 8; 436 key |= input[k][i]; 437 sum += input[k][i]; 438 } 439 for (l = 0; l < n; l++) { 440 if (!hist[l].flag) { 441 /* alloc new slot */ 442 hist[l].flag = !0; 443 hist[l].key = key; 444 hist[l].freq = 1; 445 hist[l].idx = k; 446 lenh = l + 1; 447 break; 448 } else if (hist[l].key == key) { 449 hist[l].freq++; 450 break; 451 } 452 } 453 if (minSum > sum) { 454 minSum = sum; 455 minCol = k; 456 } 457 if (maxSum < sum) { 458 maxSum = sum; 459 maxCol = k; 460 } 461 } 462 463 if (lenh <= nv) { 464 for (j = 0; j < lenh; j++) { 465 for (i = 0; i < nc; i++) { 466 vec[j][i] = (float)input[hist[j].idx][i]; 467 } 468 } 469 for (; j < nv; j++) { 470 for (i = 0; i < nc; i++) { 471 vec[j][i] = vec[0][i]; 472 } 473 } 474 return 0; 475 } 476 477 for (j = 0; j < nv; j++) { 478 for (i = 0; i < nc; i++) { 479 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (nv - 1); 480 } 481 } 482#endif 483 484 return !0; 485} 486 487 488static int 489fxt1_lloyd (float vec[][MAX_COMP], int nv, 490 unsigned char input[N_TEXELS][MAX_COMP], int nc, int n) 491{ 492 /* Use the generalized lloyd's algorithm for VQ: 493 * find 4 color vectors. 494 * 495 * for each sample color 496 * sort to nearest vector. 497 * 498 * replace each vector with the centroid of it's matching colors. 499 * 500 * repeat until RMS doesn't improve. 501 * 502 * if a color vector has no samples, or becomes the same as another 503 * vector, replace it with the color which is farthest from a sample. 504 * 505 * vec[][MAX_COMP] initial vectors and resulting colors 506 * nv number of resulting colors required 507 * input[N_TEXELS][MAX_COMP] input texels 508 * nc number of components in input / vec 509 * n number of input samples 510 */ 511 512 int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */ 513 int cnt[MAX_VECT]; /* how many times a certain vector was chosen */ 514 float error, lasterror = 1e9; 515 516 int i, j, k, rep; 517 518 /* the quantizer */ 519 for (rep = 0; rep < LL_N_REP; rep++) { 520 /* reset sums & counters */ 521 for (j = 0; j < nv; j++) { 522 for (i = 0; i < nc; i++) { 523 sum[j][i] = 0; 524 } 525 cnt[j] = 0; 526 } 527 error = 0; 528 529 /* scan whole block */ 530 for (k = 0; k < n; k++) { 531#if 1 532 int best = -1; 533 float err = 1e9; /* big enough */ 534 /* determine best vector */ 535 for (j = 0; j < nv; j++) { 536 float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) + 537 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) + 538 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]); 539 if (nc == 4) { 540 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]); 541 } 542 if (e < err) { 543 err = e; 544 best = j; 545 } 546 } 547#else 548 int best = fxt1_bestcol(vec, n_vect, input[k], n_comp, &err); 549#endif 550 /* add in closest color */ 551 for (i = 0; i < nc; i++) { 552 sum[best][i] += input[k][i]; 553 } 554 /* mark this vector as used */ 555 cnt[best]++; 556 /* accumulate error */ 557 error += err; 558 } 559 560 /* check RMS */ 561 if ((error < LL_RMS_E) || 562 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) { 563 return !0; /* good match */ 564 } 565 lasterror = error; 566 567 /* move each vector to the barycenter of its closest colors */ 568 for (j = 0; j < nv; j++) { 569 if (cnt[j]) { 570 float div = 1.0 / cnt[j]; 571 for (i = 0; i < nc; i++) { 572 vec[j][i] = div * sum[j][i]; 573 } 574 } else { 575 /* this vec has no samples or is identical with a previous vec */ 576 int worst = fxt1_worst(vec[j], input, nc, n); 577 for (i = 0; i < nc; i++) { 578 vec[j][i] = input[worst][i]; 579 } 580 } 581 } 582 } 583 584 return 0; /* could not converge fast enough */ 585} 586 587 588static void 589fxt1_quantize_CHROMA (unsigned long *cc, 590 unsigned char input[N_TEXELS][MAX_COMP]) 591{ 592 const int n_vect = 4; /* 4 base vectors to find */ 593 const int n_comp = 3; /* 3 components: R, G, B */ 594 float vec[MAX_VECT][MAX_COMP]; 595 int i, j, k; 596 Fx64 hi; /* high quadword */ 597 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */ 598 599 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) { 600 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS); 601 } 602 603 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */ 604 for (j = n_vect - 1; j >= 0; j--) { 605 for (i = 0; i < n_comp; i++) { 606 /* add in colors */ 607 FX64_SHL(hi, 5); 608 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0)); 609 } 610 } 611 ((Fx64 *)cc)[1] = hi; 612 613 lohi = lolo = 0; 614 /* right microtile */ 615 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) { 616 lohi <<= 2; 617 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp); 618 } 619 /* left microtile */ 620 for (; k >= 0; k--) { 621 lolo <<= 2; 622 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp); 623 } 624 cc[1] = lohi; 625 cc[0] = lolo; 626} 627 628 629static void 630fxt1_quantize_ALPHA0 (unsigned long *cc, 631 unsigned char input[N_TEXELS][MAX_COMP], 632 unsigned char reord[N_TEXELS][MAX_COMP], int n) 633{ 634 const int n_vect = 3; /* 3 base vectors to find */ 635 const int n_comp = 4; /* 4 components: R, G, B, A */ 636 float vec[MAX_VECT][MAX_COMP]; 637 int i, j, k; 638 Fx64 hi; /* high quadword */ 639 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */ 640 641 /* the last vector indicates zero */ 642 for (i = 0; i < n_comp; i++) { 643 vec[n_vect][i] = 0; 644 } 645 646 /* the first n texels in reord are guaranteed to be non-zero */ 647 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) { 648 fxt1_lloyd(vec, n_vect, reord, n_comp, n); 649 } 650 651 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */ 652 for (j = n_vect - 1; j >= 0; j--) { 653 /* add in alphas */ 654 FX64_SHL(hi, 5); 655 FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0)); 656 } 657 for (j = n_vect - 1; j >= 0; j--) { 658 for (i = 0; i < n_comp - 1; i++) { 659 /* add in colors */ 660 FX64_SHL(hi, 5); 661 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0)); 662 } 663 } 664 ((Fx64 *)cc)[1] = hi; 665 666 lohi = lolo = 0; 667 /* right microtile */ 668 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) { 669 lohi <<= 2; 670 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); 671 } 672 /* left microtile */ 673 for (; k >= 0; k--) { 674 lolo <<= 2; 675 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); 676 } 677 cc[1] = lohi; 678 cc[0] = lolo; 679} 680 681 682static void 683fxt1_quantize_ALPHA1 (unsigned long *cc, 684 unsigned char input[N_TEXELS][MAX_COMP]) 685{ 686 const int n_vect = 3; /* highest vector number in each microtile */ 687 const int n_comp = 4; /* 4 components: R, G, B, A */ 688 float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */ 689 float b, iv[MAX_COMP]; /* interpolation vector */ 690 int i, j, k; 691 Fx64 hi; /* high quadword */ 692 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */ 693 694 int minSum; 695 int maxSum; 696 int minColL = 0, maxColL = 0; 697 int minColR = 0, maxColR = 0; 698 int sumL = 0, sumR = 0; 699 700 /* Our solution here is to find the darkest and brightest colors in 701 * the 4x4 tile and use those as the two representative colors. 702 * There are probably better algorithms to use (histogram-based). 703 */ 704 minSum = 1000; /* big enough */ 705 maxSum = -1; /* small enough */ 706 for (k = 0; k < N_TEXELS / 2; k++) { 707 int sum = 0; 708 for (i = 0; i < n_comp; i++) { 709 sum += input[k][i]; 710 } 711 if (minSum > sum) { 712 minSum = sum; 713 minColL = k; 714 } 715 if (maxSum < sum) { 716 maxSum = sum; 717 maxColL = k; 718 } 719 sumL += sum; 720 } 721 minSum = 1000; /* big enough */ 722 maxSum = -1; /* small enough */ 723 for (; k < N_TEXELS; k++) { 724 int sum = 0; 725 for (i = 0; i < n_comp; i++) { 726 sum += input[k][i]; 727 } 728 if (minSum > sum) { 729 minSum = sum; 730 minColR = k; 731 } 732 if (maxSum < sum) { 733 maxSum = sum; 734 maxColR = k; 735 } 736 sumR += sum; 737 } 738 739 /* choose the common vector (yuck!) */ 740{ 741 int j1, j2; 742 int v1 = 0, v2 = 0; 743 float err = 1e9; /* big enough */ 744 float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 745 for (i = 0; i < n_comp; i++) { 746 tv[0][i] = input[minColL][i]; 747 tv[1][i] = input[maxColL][i]; 748 tv[2][i] = input[minColR][i]; 749 tv[3][i] = input[maxColR][i]; 750 } 751 for (j1 = 0; j1 < 2; j1++) { 752 for (j2 = 2; j2 < 4; j2++) { 753 float e = 0; 754 for (i = 0; i < n_comp; i++) { 755 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]); 756 } 757 if (e < err) { 758 err = e; 759 v1 = j1; 760 v2 = j2; 761 } 762 } 763 } 764 for (i = 0; i < n_comp; i++) { 765 vec[0][i] = tv[1 - v1][i]; 766 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR); 767 vec[2][i] = tv[5 - v2][i]; 768 } 769} 770 771 /* left microtile */ 772 cc[0] = 0; 773 if (minColL != maxColL) { 774 /* compute interpolation vector */ 775 float d2 = 0; 776 float rd2; 777 778 for (i = 0; i < n_comp; i++) { 779 iv[i] = vec[1][i] - vec[0][i]; 780 d2 += iv[i] * iv[i]; 781 } 782 rd2 = (float)n_vect / d2; 783 b = 0; 784 for (i = 0; i < n_comp; i++) { 785 b -= iv[i] * vec[0][i]; 786 iv[i] *= rd2; 787 } 788 b = b * rd2 + 0.5f; 789 790 /* add in texels */ 791 lolo = 0; 792 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 793 int texel; 794 /* interpolate color */ 795 float dot = 0; 796 for (i = 0; i < n_comp; i++) { 797 dot += input[k][i] * iv[i]; 798 } 799 texel = (int)(dot + b); 800 if (texel < 0) { 801 texel = 0; 802 } else if (texel > n_vect) { 803 texel = n_vect; 804 } 805 /* add in texel */ 806 lolo <<= 2; 807 lolo |= texel; 808 } 809 810 cc[0] = lolo; 811 } 812 813 /* right microtile */ 814 cc[1] = 0; 815 if (minColR != maxColR) { 816 /* compute interpolation vector */ 817 float d2 = 0; 818 float rd2; 819 820 for (i = 0; i < n_comp; i++) { 821 iv[i] = vec[1][i] - vec[2][i]; 822 d2 += iv[i] * iv[i]; 823 } 824 rd2 = (float)n_vect / d2; 825 b = 0; 826 for (i = 0; i < n_comp; i++) { 827 b -= iv[i] * vec[2][i]; 828 iv[i] *= rd2; 829 } 830 b = b * rd2 + 0.5f; 831 832 /* add in texels */ 833 lohi = 0; 834 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 835 int texel; 836 /* interpolate color */ 837 float dot = 0; 838 for (i = 0; i < n_comp; i++) { 839 dot += input[k][i] * iv[i]; 840 } 841 texel = (int)(dot + b); 842 if (texel < 0) { 843 texel = 0; 844 } else if (texel > n_vect) { 845 texel = n_vect; 846 } 847 /* add in texel */ 848 lohi <<= 2; 849 lohi |= texel; 850 } 851 852 cc[1] = lohi; 853 } 854 855 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */ 856 for (j = n_vect - 1; j >= 0; j--) { 857 /* add in alphas */ 858 FX64_SHL(hi, 5); 859 FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0)); 860 } 861 for (j = n_vect - 1; j >= 0; j--) { 862 for (i = 0; i < n_comp - 1; i++) { 863 /* add in colors */ 864 FX64_SHL(hi, 5); 865 FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0)); 866 } 867 } 868 ((Fx64 *)cc)[1] = hi; 869} 870 871 872static void 873fxt1_quantize_HI (unsigned long *cc, 874 unsigned char input[N_TEXELS][MAX_COMP], 875 unsigned char reord[N_TEXELS][MAX_COMP], int n) 876{ 877 const int n_vect = 6; /* highest vector number */ 878 const int n_comp = 3; /* 3 components: R, G, B */ 879 float b, iv[MAX_COMP]; /* interpolation vector */ 880 int i, k; 881 unsigned long hihi; /* high quadword: hi dword */ 882 883 int minSum = 1000; /* big enough */ 884 int maxSum = -1; /* small enough */ 885 int minCol; 886 int maxCol; 887 888 /* Our solution here is to find the darkest and brightest colors in 889 * the 8x4 tile and use those as the two representative colors. 890 * There are probably better algorithms to use (histogram-based). 891 */ 892 for (k = 0; k < n; k++) { 893 int sum = 0; 894 for (i = 0; i < n_comp; i++) { 895 sum += reord[k][i]; 896 } 897 if (minSum > sum) { 898 minSum = sum; 899 minCol = k; 900 } 901 if (maxSum < sum) { 902 maxSum = sum; 903 maxCol = k; 904 } 905 } 906 907 hihi = 0; /* cc-hi = "00" */ 908 for (i = 0; i < n_comp; i++) { 909 /* add in colors */ 910 hihi <<= 5; 911 hihi |= reord[maxCol][i] >> 3; 912 } 913 for (i = 0; i < n_comp; i++) { 914 /* add in colors */ 915 hihi <<= 5; 916 hihi |= reord[minCol][i] >> 3; 917 } 918 cc[3] = hihi; 919 cc[0] = cc[1] = cc[2] = 0; 920 921 /* compute interpolation vector */ 922 if (minCol != maxCol) { 923 float d2 = 0; 924 float rd2; 925 926 for (i = 0; i < n_comp; i++) { 927 iv[i] = reord[maxCol][i] - reord[minCol][i]; 928 d2 += iv[i] * iv[i]; 929 } 930 rd2 = (float)n_vect / d2; 931 b = 0; 932 for (i = 0; i < n_comp; i++) { 933 b -= iv[i] * reord[minCol][i]; 934 iv[i] *= rd2; 935 } 936 b = b * rd2 + 0.5f; 937 } 938 939 /* add in texels */ 940 for (k = N_TEXELS - 1; k >= 0; k--) { 941 int t = k * 3; 942 unsigned long *kk = (unsigned long *)((unsigned long)cc + t / 8); 943 int texel = n_vect + 1; /* transparent black */ 944 945 if (!ISTBLACK(input[k])) { 946 if (minCol != maxCol) { 947 /* interpolate color */ 948 float dot = 0; 949 for (i = 0; i < n_comp; i++) { 950 dot += input[k][i] * iv[i]; 951 } 952 texel = (int)(dot + b); 953 if (texel < 0) { 954 texel = 0; 955 } else if (texel > n_vect) { 956 texel = n_vect; 957 } 958 /* add in texel */ 959 kk[0] |= texel << (t & 7); 960 } 961 } else { 962 /* add in texel */ 963 kk[0] |= texel << (t & 7); 964 } 965 } 966} 967 968 969static void 970fxt1_quantize_MIXED1 (unsigned long *cc, 971 unsigned char input[N_TEXELS][MAX_COMP]) 972{ 973 const int n_vect = 2; /* highest vector number in each microtile */ 974 const int n_comp = 3; /* 3 components: R, G, B */ 975 unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 976 float b, iv[MAX_COMP]; /* interpolation vector */ 977 int i, j, k; 978 Fx64 hi; /* high quadword */ 979 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */ 980 981 int minSum; 982 int maxSum; 983 int minColL = 0, maxColL = -1; 984 int minColR = 0, maxColR = -1; 985 986 /* Our solution here is to find the darkest and brightest colors in 987 * the 4x4 tile and use those as the two representative colors. 988 * There are probably better algorithms to use (histogram-based). 989 */ 990 minSum = 1000; /* big enough */ 991 maxSum = -1; /* small enough */ 992 for (k = 0; k < N_TEXELS / 2; k++) { 993 if (!ISTBLACK(input[k])) { 994 int sum = 0; 995 for (i = 0; i < n_comp; i++) { 996 sum += input[k][i]; 997 } 998 if (minSum > sum) { 999 minSum = sum; 1000 minColL = k; 1001 } 1002 if (maxSum < sum) { 1003 maxSum = sum; 1004 maxColL = k; 1005 } 1006 } 1007 } 1008 minSum = 1000; /* big enough */ 1009 maxSum = -1; /* small enough */ 1010 for (; k < N_TEXELS; k++) { 1011 if (!ISTBLACK(input[k])) { 1012 int sum = 0; 1013 for (i = 0; i < n_comp; i++) { 1014 sum += input[k][i]; 1015 } 1016 if (minSum > sum) { 1017 minSum = sum; 1018 minColR = k; 1019 } 1020 if (maxSum < sum) { 1021 maxSum = sum; 1022 maxColR = k; 1023 } 1024 } 1025 } 1026 1027 /* left microtile */ 1028 if (maxColL == -1) { 1029 /* all transparent black */ 1030 cc[0] = -1; 1031 for (i = 0; i < n_comp; i++) { 1032 vec[0][i] = 0; 1033 vec[1][i] = 0; 1034 } 1035 } else { 1036 cc[0] = 0; 1037 for (i = 0; i < n_comp; i++) { 1038 vec[0][i] = input[minColL][i]; 1039 vec[1][i] = input[maxColL][i]; 1040 } 1041 if (minColL != maxColL) { 1042 /* compute interpolation vector */ 1043 float d2 = 0; 1044 float rd2; 1045 1046 for (i = 0; i < n_comp; i++) { 1047 iv[i] = vec[1][i] - vec[0][i]; 1048 d2 += iv[i] * iv[i]; 1049 } 1050 rd2 = (float)n_vect / d2; 1051 b = 0; 1052 for (i = 0; i < n_comp; i++) { 1053 b -= iv[i] * vec[0][i]; 1054 iv[i] *= rd2; 1055 } 1056 b = b * rd2 + 0.5f; 1057 1058 /* add in texels */ 1059 lolo = 0; 1060 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 1061 int texel = n_vect + 1; /* transparent black */ 1062 if (!ISTBLACK(input[k])) { 1063 /* interpolate color */ 1064 float dot = 0; 1065 for (i = 0; i < n_comp; i++) { 1066 dot += input[k][i] * iv[i]; 1067 } 1068 texel = (int)(dot + b); 1069 if (texel < 0) { 1070 texel = 0; 1071 } else if (texel > n_vect) { 1072 texel = n_vect; 1073 } 1074 } 1075 /* add in texel */ 1076 lolo <<= 2; 1077 lolo |= texel; 1078 } 1079 cc[0] = lolo; 1080 } 1081 } 1082 1083 /* right microtile */ 1084 if (maxColR == -1) { 1085 /* all transparent black */ 1086 cc[1] = -1; 1087 for (i = 0; i < n_comp; i++) { 1088 vec[2][i] = 0; 1089 vec[3][i] = 0; 1090 } 1091 } else { 1092 cc[1] = 0; 1093 for (i = 0; i < n_comp; i++) { 1094 vec[2][i] = input[minColR][i]; 1095 vec[3][i] = input[maxColR][i]; 1096 } 1097 if (minColR != maxColR) { 1098 /* compute interpolation vector */ 1099 float d2 = 0; 1100 float rd2; 1101 1102 for (i = 0; i < n_comp; i++) { 1103 iv[i] = vec[3][i] - vec[2][i]; 1104 d2 += iv[i] * iv[i]; 1105 } 1106 rd2 = (float)n_vect / d2; 1107 b = 0; 1108 for (i = 0; i < n_comp; i++) { 1109 b -= iv[i] * vec[2][i]; 1110 iv[i] *= rd2; 1111 } 1112 b = b * rd2 + 0.5f; 1113 1114 /* add in texels */ 1115 lohi = 0; 1116 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 1117 int texel = n_vect + 1; /* transparent black */ 1118 if (!ISTBLACK(input[k])) { 1119 /* interpolate color */ 1120 float dot = 0; 1121 for (i = 0; i < n_comp; i++) { 1122 dot += input[k][i] * iv[i]; 1123 } 1124 texel = (int)(dot + b); 1125 if (texel < 0) { 1126 texel = 0; 1127 } else if (texel > n_vect) { 1128 texel = n_vect; 1129 } 1130 } 1131 /* add in texel */ 1132 lohi <<= 2; 1133 lohi |= texel; 1134 } 1135 cc[1] = lohi; 1136 } 1137 } 1138 1139 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */ 1140 for (j = 2 * 2 - 1; j >= 0; j--) { 1141 for (i = 0; i < n_comp; i++) { 1142 /* add in colors */ 1143 FX64_SHL(hi, 5); 1144 FX64_OR32(hi, vec[j][i] >> 3); 1145 } 1146 } 1147 ((Fx64 *)cc)[1] = hi; 1148} 1149 1150 1151static void 1152fxt1_quantize_MIXED0 (unsigned long *cc, 1153 unsigned char input[N_TEXELS][MAX_COMP]) 1154{ 1155 const int n_vect = 3; /* highest vector number in each microtile */ 1156 const int n_comp = 3; /* 3 components: R, G, B */ 1157 unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */ 1158 float b, iv[MAX_COMP]; /* interpolation vector */ 1159 int i, j, k; 1160 Fx64 hi; /* high quadword */ 1161 unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */ 1162 1163 int minColL = 0, maxColL = 0; 1164 int minColR = 0, maxColR = 0; 1165#if 0 1166 int minSum; 1167 int maxSum; 1168 1169 /* Our solution here is to find the darkest and brightest colors in 1170 * the 4x4 tile and use those as the two representative colors. 1171 * There are probably better algorithms to use (histogram-based). 1172 */ 1173 minSum = 1000; /* big enough */ 1174 maxSum = -1; /* small enough */ 1175 for (k = 0; k < N_TEXELS / 2; k++) { 1176 int sum = 0; 1177 for (i = 0; i < n_comp; i++) { 1178 sum += input[k][i]; 1179 } 1180 if (minSum > sum) { 1181 minSum = sum; 1182 minColL = k; 1183 } 1184 if (maxSum < sum) { 1185 maxSum = sum; 1186 maxColL = k; 1187 } 1188 } 1189 minSum = 1000; /* big enough */ 1190 maxSum = -1; /* small enough */ 1191 for (; k < N_TEXELS; k++) { 1192 int sum = 0; 1193 for (i = 0; i < n_comp; i++) { 1194 sum += input[k][i]; 1195 } 1196 if (minSum > sum) { 1197 minSum = sum; 1198 minColR = k; 1199 } 1200 if (maxSum < sum) { 1201 maxSum = sum; 1202 maxColR = k; 1203 } 1204 } 1205#else 1206 int minVal; 1207 int maxVal; 1208 int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2); 1209 int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2); 1210 1211 /* Scan the channel with max variance for lo & hi 1212 * and use those as the two representative colors. 1213 */ 1214 minVal = 1000; /* big enough */ 1215 maxVal = -1; /* small enough */ 1216 for (k = 0; k < N_TEXELS / 2; k++) { 1217 int t = input[k][maxVarL]; 1218 if (minVal > t) { 1219 minVal = t; 1220 minColL = k; 1221 } 1222 if (maxVal < t) { 1223 maxVal = t; 1224 maxColL = k; 1225 } 1226 } 1227 minVal = 1000; /* big enough */ 1228 maxVal = -1; /* small enough */ 1229 for (; k < N_TEXELS; k++) { 1230 int t = input[k][maxVarR]; 1231 if (minVal > t) { 1232 minVal = t; 1233 minColR = k; 1234 } 1235 if (maxVal < t) { 1236 maxVal = t; 1237 maxColR = k; 1238 } 1239 } 1240#endif 1241 1242 /* left microtile */ 1243 cc[0] = 0; 1244 for (i = 0; i < n_comp; i++) { 1245 vec[0][i] = input[minColL][i]; 1246 vec[1][i] = input[maxColL][i]; 1247 } 1248 if (minColL != maxColL) { 1249 /* compute interpolation vector */ 1250 float d2 = 0; 1251 float rd2; 1252 1253 for (i = 0; i < n_comp; i++) { 1254 iv[i] = vec[1][i] - vec[0][i]; 1255 d2 += iv[i] * iv[i]; 1256 } 1257 rd2 = (float)n_vect / d2; 1258 b = 0; 1259 for (i = 0; i < n_comp; i++) { 1260 b -= iv[i] * vec[0][i]; 1261 iv[i] *= rd2; 1262 } 1263 b = b * rd2 + 0.5f; 1264 1265 /* add in texels */ 1266 lolo = 0; 1267 for (k = N_TEXELS / 2 - 1; k >= 0; k--) { 1268 int texel; 1269 /* interpolate color */ 1270 float dot = 0; 1271 for (i = 0; i < n_comp; i++) { 1272 dot += input[k][i] * iv[i]; 1273 } 1274 texel = (int)(dot + b); 1275 if (texel < 0) { 1276 texel = 0; 1277 } else if (texel > n_vect) { 1278 texel = n_vect; 1279 } 1280 /* add in texel */ 1281 lolo <<= 2; 1282 lolo |= texel; 1283 } 1284 1285 /* funky encoding for LSB of green */ 1286 if (((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) { 1287 for (i = 0; i < n_comp; i++) { 1288 vec[1][i] = input[minColL][i]; 1289 vec[0][i] = input[maxColL][i]; 1290 } 1291 lolo = ~lolo; 1292 } 1293 1294 cc[0] = lolo; 1295 } 1296 1297 /* right microtile */ 1298 cc[1] = 0; 1299 for (i = 0; i < n_comp; i++) { 1300 vec[2][i] = input[minColR][i]; 1301 vec[3][i] = input[maxColR][i]; 1302 } 1303 if (minColR != maxColR) { 1304 /* compute interpolation vector */ 1305 float d2 = 0; 1306 float rd2; 1307 1308 for (i = 0; i < n_comp; i++) { 1309 iv[i] = vec[3][i] - vec[2][i]; 1310 d2 += iv[i] * iv[i]; 1311 } 1312 rd2 = (float)n_vect / d2; 1313 b = 0; 1314 for (i = 0; i < n_comp; i++) { 1315 b -= iv[i] * vec[2][i]; 1316 iv[i] *= rd2; 1317 } 1318 b = b * rd2 + 0.5f; 1319 1320 /* add in texels */ 1321 lohi = 0; 1322 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { 1323 int texel; 1324 /* interpolate color */ 1325 float dot = 0; 1326 for (i = 0; i < n_comp; i++) { 1327 dot += input[k][i] * iv[i]; 1328 } 1329 texel = (int)(dot + b); 1330 if (texel < 0) { 1331 texel = 0; 1332 } else if (texel > n_vect) { 1333 texel = n_vect; 1334 } 1335 /* add in texel */ 1336 lohi <<= 2; 1337 lohi |= texel; 1338 } 1339 1340 /* funky encoding for LSB of green */ 1341 if (((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) { 1342 for (i = 0; i < n_comp; i++) { 1343 vec[3][i] = input[minColR][i]; 1344 vec[2][i] = input[maxColR][i]; 1345 } 1346 lohi = ~lohi; 1347 } 1348 1349 cc[1] = lohi; 1350 } 1351 1352 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */ 1353 for (j = 2 * 2 - 1; j >= 0; j--) { 1354 for (i = 0; i < n_comp; i++) { 1355 /* add in colors */ 1356 FX64_SHL(hi, 5); 1357 FX64_OR32(hi, vec[j][i] >> 3); 1358 } 1359 } 1360 ((Fx64 *)cc)[1] = hi; 1361} 1362 1363 1364static void 1365fxt1_quantize (unsigned long *cc, const unsigned char *lines[], int comps) 1366{ 1367 int trualpha; 1368 unsigned char reord[N_TEXELS][MAX_COMP]; 1369 1370 unsigned char input[N_TEXELS][MAX_COMP]; 1371 int i, k, l; 1372 1373 memset(input, -1, sizeof(input)); 1374 1375 /* 8 texels each line */ 1376 for (l = 0; l < 4; l++) { 1377 for (k = 0; k < 4; k++) { 1378 for (i = 0; i < comps; i++) { 1379 input[k + l * 4][i] = *lines[l]++; 1380 } 1381 } 1382 for (; k < 8; k++) { 1383 for (i = 0; i < comps; i++) { 1384 input[k + l * 4 + 12][i] = *lines[l]++; 1385 } 1386 } 1387 } 1388 1389 /* block looks like this: 1390 * 00, 01, 02, 03, 08, 09, 0a, 0b 1391 * 10, 11, 12, 13, 18, 19, 1a, 1b 1392 * 04, 05, 06, 07, 0c, 0d, 0e, 0f 1393 * 14, 15, 16, 17, 1c, 1d, 1e, 1f 1394 */ 1395 1396 /* [dBorca] 1397 * stupidity flows forth from this 1398 */ 1399 l = N_TEXELS; 1400 trualpha = 0; 1401 if (comps == 4) { 1402 /* skip all transparent black texels */ 1403 l = 0; 1404 for (k = 0; k < N_TEXELS; k++) { 1405 /* test all components against 0 */ 1406 if (!ISTBLACK(input[k])) { 1407 /* texel is not transparent black */ 1408 COPY_4UBV(reord[l], input[k]); 1409 if (reord[l][ACOMP] < (255 - ALPHA_TS)) { 1410 /* non-opaque texel */ 1411 trualpha = !0; 1412 } 1413 l++; 1414 } 1415 } 1416 } 1417 1418#if 0 1419 if (trualpha) { 1420 fxt1_quantize_ALPHA0(cc, input, reord, l); 1421 } else if (l == 0) { 1422 cc[0] = cc[1] = cc[2] = -1; 1423 cc[3] = 0; 1424 } else if (l < N_TEXELS) { 1425 fxt1_quantize_HI(cc, input, reord, l); 1426 } else { 1427 fxt1_quantize_CHROMA(cc, input); 1428 } 1429#else 1430 if (trualpha) { 1431 fxt1_quantize_ALPHA1(cc, input); 1432 } else if (l == 0) { 1433 cc[0] = cc[1] = cc[2] = -1; 1434 cc[3] = 0; 1435 } else if (l < N_TEXELS) { 1436 fxt1_quantize_MIXED1(cc, input); 1437 } else { 1438 fxt1_quantize_MIXED0(cc, input); 1439 } 1440#endif 1441} 1442 1443 1444int 1445fxt1_encode (GLcontext *ctx, 1446 unsigned int width, unsigned int height, 1447 int srcFormat, 1448 const void *source, int srcRowStride, 1449 void *dest, int destRowStride) 1450{ 1451 const int comps = (srcFormat == GL_RGB) ? 3 : 4; 1452 unsigned int x, y; 1453 const unsigned char *data; 1454 unsigned long *encoded = dest; 1455 GLubyte *newSource = NULL; 1456 1457 /* 1458 * Rescale image if width is less than 8 or height is less than 4. 1459 */ 1460 if (width < 8 || height < 4) { 1461 GLint newWidth = (width + 7) & ~7; 1462 GLint newHeight = (height + 3) & ~3; 1463 newSource = MALLOC(comps * newWidth * newHeight * sizeof(GLchan)); 1464 _mesa_upscale_teximage2d(width, height, newWidth, newHeight, 1465 comps, source, srcRowStride, newSource); 1466 source = newSource; 1467 width = newWidth; 1468 height = newHeight; 1469 srcRowStride = comps * newWidth; 1470 } 1471 1472 data = source; 1473 destRowStride = (destRowStride - width * 2) / 4; 1474 for (y = 0; y < height; y += 4) { 1475 unsigned int offs = 0 + (y + 0) * srcRowStride; 1476 for (x = 0; x < width; x += 8) { 1477 const unsigned char *lines[4]; 1478 lines[0] = &data[offs]; 1479 lines[1] = lines[0] + srcRowStride; 1480 lines[2] = lines[1] + srcRowStride; 1481 lines[3] = lines[2] + srcRowStride; 1482 offs += 8 * comps; 1483 fxt1_quantize(encoded, lines, comps); 1484 /* 128 bits per 8x4 block = 4bpp */ 1485 encoded += 4; 1486 } 1487 encoded += destRowStride; 1488 } 1489 1490 if (newSource != NULL) { 1491 FREE(newSource); 1492 } 1493 1494 return 0; 1495} 1496 1497 1498/***************************************************************************\ 1499 * FXT1 decoder 1500 * 1501 * The decoder is based on GL_3DFX_texture_compression_FXT1 1502 * specification and serves as a concept for the encoder. 1503\***************************************************************************/ 1504 1505 1506/* lookup table for scaling 5 bit colors up to 8 bits */ 1507static unsigned char _rgb_scale_5[] = { 1508 0, 8, 16, 25, 33, 41, 49, 58, 1509 66, 74, 82, 90, 99, 107, 115, 123, 1510 132, 140, 148, 156, 165, 173, 181, 189, 1511 197, 206, 214, 222, 230, 239, 247, 255 1512}; 1513 1514/* lookup table for scaling 6 bit colors up to 8 bits */ 1515static unsigned char _rgb_scale_6[] = { 1516 0, 4, 8, 12, 16, 20, 24, 28, 1517 32, 36, 40, 45, 49, 53, 57, 61, 1518 65, 69, 73, 77, 81, 85, 89, 93, 1519 97, 101, 105, 109, 113, 117, 121, 125, 1520 130, 134, 138, 142, 146, 150, 154, 158, 1521 162, 166, 170, 174, 178, 182, 186, 190, 1522 194, 198, 202, 206, 210, 215, 219, 223, 1523 227, 231, 235, 239, 243, 247, 251, 255 1524}; 1525 1526 1527#define CC_SEL(cc, which) ((cc)[(which) / 32] >> ((which) & 31)) 1528#define UP5(c) _rgb_scale_5[(c) & 31] 1529#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)] 1530#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n) 1531#define ZERO_4UBV(v) *((unsigned long *)(v)) = 0 1532 1533 1534static void 1535fxt1_decode_1HI (unsigned long code, int t, unsigned char *rgba) 1536{ 1537 const unsigned long *cc; 1538 1539 t *= 3; 1540 cc = (unsigned long *)(code + t / 8); 1541 t = (cc[0] >> (t & 7)) & 7; 1542 1543 if (t == 7) { 1544 ZERO_4UBV(rgba); 1545 } else { 1546 cc = (unsigned long *)(code + 12); 1547 if (t == 0) { 1548 rgba[BCOMP] = UP5(CC_SEL(cc, 0)); 1549 rgba[GCOMP] = UP5(CC_SEL(cc, 5)); 1550 rgba[RCOMP] = UP5(CC_SEL(cc, 10)); 1551 } else if (t == 6) { 1552 rgba[BCOMP] = UP5(CC_SEL(cc, 15)); 1553 rgba[GCOMP] = UP5(CC_SEL(cc, 20)); 1554 rgba[RCOMP] = UP5(CC_SEL(cc, 25)); 1555 } else { 1556 rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15))); 1557 rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20))); 1558 rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25))); 1559 } 1560 rgba[ACOMP] = 255; 1561 } 1562} 1563 1564 1565static void 1566fxt1_decode_1CHROMA (unsigned long code, int t, unsigned char *rgba) 1567{ 1568 const unsigned long *cc; 1569 unsigned long kk; 1570 1571 cc = (unsigned long *)code; 1572 if (t & 16) { 1573 cc++; 1574 t &= 15; 1575 } 1576 t = (cc[0] >> (t * 2)) & 3; 1577 1578 t *= 15; 1579 cc = (unsigned long *)(code + 8 + t / 8); 1580 kk = cc[0] >> (t & 7); 1581 rgba[BCOMP] = UP5(kk); 1582 rgba[GCOMP] = UP5(kk >> 5); 1583 rgba[RCOMP] = UP5(kk >> 10); 1584 rgba[ACOMP] = 255; 1585} 1586 1587 1588static void 1589fxt1_decode_1MIXED (unsigned long code, int t, unsigned char *rgba) 1590{ 1591 const unsigned long *cc; 1592 unsigned int col[2][3]; 1593 int glsb, selb; 1594 1595 cc = (unsigned long *)code; 1596 if (t & 16) { 1597 t &= 15; 1598 t = (cc[1] >> (t * 2)) & 3; 1599 /* col 2 */ 1600 col[0][BCOMP] = (*(unsigned long *)(code + 11)) >> 6; 1601 col[0][GCOMP] = CC_SEL(cc, 99); 1602 col[0][RCOMP] = CC_SEL(cc, 104); 1603 /* col 3 */ 1604 col[1][BCOMP] = CC_SEL(cc, 109); 1605 col[1][GCOMP] = CC_SEL(cc, 114); 1606 col[1][RCOMP] = CC_SEL(cc, 119); 1607 glsb = CC_SEL(cc, 126); 1608 selb = CC_SEL(cc, 33); 1609 } else { 1610 t = (cc[0] >> (t * 2)) & 3; 1611 /* col 0 */ 1612 col[0][BCOMP] = CC_SEL(cc, 64); 1613 col[0][GCOMP] = CC_SEL(cc, 69); 1614 col[0][RCOMP] = CC_SEL(cc, 74); 1615 /* col 1 */ 1616 col[1][BCOMP] = CC_SEL(cc, 79); 1617 col[1][GCOMP] = CC_SEL(cc, 84); 1618 col[1][RCOMP] = CC_SEL(cc, 89); 1619 glsb = CC_SEL(cc, 125); 1620 selb = CC_SEL(cc, 1); 1621 } 1622 1623 if (CC_SEL(cc, 124) & 1) { 1624 /* alpha[0] == 1 */ 1625 1626 if (t == 3) { 1627 ZERO_4UBV(rgba); 1628 } else { 1629 if (t == 0) { 1630 rgba[BCOMP] = UP5(col[0][BCOMP]); 1631 rgba[GCOMP] = UP5(col[0][GCOMP]); 1632 rgba[RCOMP] = UP5(col[0][RCOMP]); 1633 } else if (t == 2) { 1634 rgba[BCOMP] = UP5(col[1][BCOMP]); 1635 rgba[GCOMP] = UP6(col[1][GCOMP], glsb); 1636 rgba[RCOMP] = UP5(col[1][RCOMP]); 1637 } else { 1638 rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2; 1639 rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2; 1640 rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2; 1641 } 1642 rgba[ACOMP] = 255; 1643 } 1644 } else { 1645 /* alpha[0] == 0 */ 1646 1647 if (t == 0) { 1648 rgba[BCOMP] = UP5(col[0][BCOMP]); 1649 rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb); 1650 rgba[RCOMP] = UP5(col[0][RCOMP]); 1651 } else if (t == 3) { 1652 rgba[BCOMP] = UP5(col[1][BCOMP]); 1653 rgba[GCOMP] = UP6(col[1][GCOMP], glsb); 1654 rgba[RCOMP] = UP5(col[1][RCOMP]); 1655 } else { 1656 rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP])); 1657 rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb), 1658 UP6(col[1][GCOMP], glsb)); 1659 rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP])); 1660 } 1661 rgba[ACOMP] = 255; 1662 } 1663} 1664 1665 1666static void 1667fxt1_decode_1ALPHA (unsigned long code, int t, unsigned char *rgba) 1668{ 1669 const unsigned long *cc; 1670 1671 cc = (unsigned long *)code; 1672 if (CC_SEL(cc, 124) & 1) { 1673 /* lerp == 1 */ 1674 unsigned int col0[4]; 1675 1676 if (t & 16) { 1677 t &= 15; 1678 t = (cc[1] >> (t * 2)) & 3; 1679 /* col 2 */ 1680 col0[BCOMP] = (*(unsigned long *)(code + 11)) >> 6; 1681 col0[GCOMP] = CC_SEL(cc, 99); 1682 col0[RCOMP] = CC_SEL(cc, 104); 1683 col0[ACOMP] = CC_SEL(cc, 119); 1684 } else { 1685 t = (cc[0] >> (t * 2)) & 3; 1686 /* col 0 */ 1687 col0[BCOMP] = CC_SEL(cc, 64); 1688 col0[GCOMP] = CC_SEL(cc, 69); 1689 col0[RCOMP] = CC_SEL(cc, 74); 1690 col0[ACOMP] = CC_SEL(cc, 109); 1691 } 1692 1693 if (t == 0) { 1694 rgba[BCOMP] = UP5(col0[BCOMP]); 1695 rgba[GCOMP] = UP5(col0[GCOMP]); 1696 rgba[RCOMP] = UP5(col0[RCOMP]); 1697 rgba[ACOMP] = UP5(col0[ACOMP]); 1698 } else if (t == 3) { 1699 rgba[BCOMP] = UP5(CC_SEL(cc, 79)); 1700 rgba[GCOMP] = UP5(CC_SEL(cc, 84)); 1701 rgba[RCOMP] = UP5(CC_SEL(cc, 89)); 1702 rgba[ACOMP] = UP5(CC_SEL(cc, 114)); 1703 } else { 1704 rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79))); 1705 rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84))); 1706 rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89))); 1707 rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114))); 1708 } 1709 } else { 1710 /* lerp == 0 */ 1711 1712 if (t & 16) { 1713 cc++; 1714 t &= 15; 1715 } 1716 t = (cc[0] >> (t * 2)) & 3; 1717 1718 if (t == 3) { 1719 ZERO_4UBV(rgba); 1720 } else { 1721 unsigned long kk; 1722 cc = (unsigned long *)code; 1723 rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13)); 1724 t *= 15; 1725 cc = (unsigned long *)(code + 8 + t / 8); 1726 kk = cc[0] >> (t & 7); 1727 rgba[BCOMP] = UP5(kk); 1728 rgba[GCOMP] = UP5(kk >> 5); 1729 rgba[RCOMP] = UP5(kk >> 10); 1730 } 1731 } 1732} 1733 1734 1735void 1736fxt1_decode_1 (const void *texture, int width, 1737 int i, int j, unsigned char *rgba) 1738{ 1739 static void (*decode_1[]) (unsigned long, int, unsigned char *) = { 1740 fxt1_decode_1HI, /* cc-high = "00?" */ 1741 fxt1_decode_1HI, /* cc-high = "00?" */ 1742 fxt1_decode_1CHROMA, /* cc-chroma = "010" */ 1743 fxt1_decode_1ALPHA, /* alpha = "011" */ 1744 fxt1_decode_1MIXED, /* mixed = "1??" */ 1745 fxt1_decode_1MIXED, /* mixed = "1??" */ 1746 fxt1_decode_1MIXED, /* mixed = "1??" */ 1747 fxt1_decode_1MIXED /* mixed = "1??" */ 1748 }; 1749 1750 unsigned long code = (unsigned long)texture + 1751 ((j / 4) * (width / 8) + (i / 8)) * 16; 1752 int mode = CC_SEL((unsigned long *)code, 125); 1753 int t = i & 7; 1754 1755 if (t & 4) { 1756 t += 12; 1757 } 1758 t += (j & 3) * 4; 1759 1760 decode_1[mode](code, t, rgba); 1761} 1762