texcompress_fxt1.c revision 3bdc8535fc7be3e0524e9dd728ef9f63532fd789
1/*
2 * Mesa 3-D graphics library
3 * Version:  6.1
4 *
5 * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26/**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32#include "glheader.h"
33#include "imports.h"
34#include "colormac.h"
35#include "context.h"
36#include "convolve.h"
37#include "image.h"
38#include "texcompress.h"
39#include "texformat.h"
40#include "texstore.h"
41
42
43int
44fxt1_encode (GLcontext *ctx,
45             unsigned int width, unsigned int height,
46             int srcFormat,
47             const void *source, int srcRowStride,
48             void *dest, int destRowStride);
49void
50fxt1_decode_1 (const void *texture, int width,
51               int i, int j, unsigned char *rgba);
52
53
54/**
55 * Called during context initialization.
56 */
57void
58_mesa_init_texture_fxt1( GLcontext *ctx )
59{
60}
61
62
63/**
64 * Called via TexFormat->StoreImage to store an RGB_FXT1 texture.
65 */
66static GLboolean
67texstore_rgb_fxt1(STORE_PARAMS)
68{
69   const GLchan *pixels;
70   GLint srcRowStride;
71   GLubyte *dst;
72   const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
73   const GLchan *tempImage = NULL;
74
75   ASSERT(dstFormat == &_mesa_texformat_rgb_fxt1);
76   ASSERT(dstXoffset % 8 == 0);
77   ASSERT(dstYoffset % 4 == 0);
78   ASSERT(dstZoffset     == 0);
79
80   if (srcFormat != GL_RGB ||
81       srcType != CHAN_TYPE ||
82       ctx->_ImageTransferState ||
83       srcPacking->SwapBytes) {
84      /* convert image to RGB/GLchan */
85      tempImage = _mesa_make_temp_chan_image(ctx, dims,
86                                             baseInternalFormat,
87                                             dstFormat->BaseFormat,
88                                             srcWidth, srcHeight, srcDepth,
89                                             srcFormat, srcType, srcAddr,
90                                             srcPacking);
91      if (!tempImage)
92         return GL_FALSE; /* out of memory */
93      _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
94      pixels = tempImage;
95      srcRowStride = 3 * srcWidth;
96      srcFormat = GL_RGB;
97   }
98   else {
99      pixels = (const GLchan *) srcAddr;
100      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
101                                            srcType) / sizeof(GLchan);
102   }
103
104   dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
105                                        GL_COMPRESSED_RGB_FXT1_3DFX,
106                                        texWidth, (GLubyte *) dstAddr);
107
108   fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
109               dst, dstRowStride);
110
111   if (tempImage)
112      _mesa_free((void*) tempImage);
113
114   return GL_TRUE;
115}
116
117
118/**
119 * Called via TexFormat->StoreImage to store an RGBA_FXT1 texture.
120 */
121static GLboolean
122texstore_rgba_fxt1(STORE_PARAMS)
123{
124   const GLchan *pixels;
125   GLint srcRowStride;
126   GLubyte *dst;
127   GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
128   const GLchan *tempImage = NULL;
129
130   ASSERT(dstFormat == &_mesa_texformat_rgba_fxt1);
131   ASSERT(dstXoffset % 8 == 0);
132   ASSERT(dstYoffset % 4 == 0);
133   ASSERT(dstZoffset     == 0);
134
135   if (srcFormat != GL_RGBA ||
136       srcType != CHAN_TYPE ||
137       ctx->_ImageTransferState ||
138       srcPacking->SwapBytes) {
139      /* convert image to RGBA/GLchan */
140      tempImage = _mesa_make_temp_chan_image(ctx, dims,
141                                             baseInternalFormat,
142                                             dstFormat->BaseFormat,
143                                             srcWidth, srcHeight, srcDepth,
144                                             srcFormat, srcType, srcAddr,
145                                             srcPacking);
146      if (!tempImage)
147         return GL_FALSE; /* out of memory */
148      _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
149      pixels = tempImage;
150      srcRowStride = 4 * srcWidth;
151      srcFormat = GL_RGBA;
152   }
153   else {
154      pixels = (const GLchan *) srcAddr;
155      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
156                                            srcType) / sizeof(GLchan);
157   }
158
159   dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
160                                        GL_COMPRESSED_RGBA_FXT1_3DFX,
161                                        texWidth, (GLubyte *) dstAddr);
162
163   fxt1_encode(ctx, srcWidth, srcHeight, srcFormat, pixels, srcRowStride,
164               dst, dstRowStride);
165
166   if (tempImage)
167      _mesa_free((void*) tempImage);
168
169   return GL_TRUE;
170}
171
172
173static void
174fetch_texel_2d_rgba_fxt1( const struct gl_texture_image *texImage,
175                          GLint i, GLint j, GLint k, GLchan *texel )
176{
177   fxt1_decode_1(texImage->Data, texImage->Width, i, j, texel);
178}
179
180
181static void
182fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
183                            GLint i, GLint j, GLint k, GLfloat *texel )
184{
185   /* just sample as GLchan and convert to float here */
186   GLchan rgba[4];
187   fxt1_decode_1(texImage->Data, texImage->Width, i, j, rgba);
188   texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
189   texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
190   texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
191   texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
192}
193
194
195static void
196fetch_texel_2d_rgb_fxt1( const struct gl_texture_image *texImage,
197                         GLint i, GLint j, GLint k, GLchan *texel )
198{
199   fxt1_decode_1(texImage->Data, texImage->Width, i, j, texel);
200   texel[ACOMP] = 255;
201}
202
203
204static void
205fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
206                           GLint i, GLint j, GLint k, GLfloat *texel )
207{
208   /* just sample as GLchan and convert to float here */
209   GLchan rgba[4];
210   fxt1_decode_1(texImage->Data, texImage->Width, i, j, rgba);
211   texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
212   texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
213   texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
214   texel[ACOMP] = 1.0;
215}
216
217
218
219const struct gl_texture_format _mesa_texformat_rgb_fxt1 = {
220   MESA_FORMAT_RGB_FXT1,		/* MesaFormat */
221   GL_RGB,				/* BaseFormat */
222   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
223   4, /*approx*/			/* RedBits */
224   4, /*approx*/			/* GreenBits */
225   4, /*approx*/			/* BlueBits */
226   0,					/* AlphaBits */
227   0,					/* LuminanceBits */
228   0,					/* IntensityBits */
229   0,					/* IndexBits */
230   0,					/* DepthBits */
231   0,					/* TexelBytes */
232   texstore_rgb_fxt1,			/* StoreTexImageFunc */
233   NULL, /*impossible*/ 		/* FetchTexel1D */
234   fetch_texel_2d_rgb_fxt1, 		/* FetchTexel2D */
235   NULL, /*impossible*/ 		/* FetchTexel3D */
236   NULL, /*impossible*/ 		/* FetchTexel1Df */
237   fetch_texel_2d_f_rgb_fxt1, 		/* FetchTexel2Df */
238   NULL, /*impossible*/ 		/* FetchTexel3Df */
239};
240
241const struct gl_texture_format _mesa_texformat_rgba_fxt1 = {
242   MESA_FORMAT_RGBA_FXT1,		/* MesaFormat */
243   GL_RGBA,				/* BaseFormat */
244   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
245   4, /*approx*/			/* RedBits */
246   4, /*approx*/			/* GreenBits */
247   4, /*approx*/			/* BlueBits */
248   1, /*approx*/			/* AlphaBits */
249   0,					/* LuminanceBits */
250   0,					/* IntensityBits */
251   0,					/* IndexBits */
252   0,					/* DepthBits */
253   0,					/* TexelBytes */
254   texstore_rgba_fxt1,			/* StoreTexImageFunc */
255   NULL, /*impossible*/ 		/* FetchTexel1D */
256   fetch_texel_2d_rgba_fxt1, 		/* FetchTexel2D */
257   NULL, /*impossible*/ 		/* FetchTexel3D */
258   NULL, /*impossible*/ 		/* FetchTexel1Df */
259   fetch_texel_2d_f_rgba_fxt1, 		/* FetchTexel2Df */
260   NULL, /*impossible*/ 		/* FetchTexel3Df */
261};
262
263
264/***************************************************************************\
265 * FXT1 encoder
266 *
267 * The encoder was built by reversing the decoder,
268 * and is vaguely based on Texus2 by 3dfx. Note that this code
269 * is merely a proof of concept, since it is higly UNoptimized;
270 * moreover, it is sub-optimal due to inital conditions passed
271 * to Lloyd's algorithm (the interpolation modes are worse).
272\***************************************************************************/
273
274
275#define MAX_COMP 4 /* ever needed maximum number of components in texel */
276#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
277#define N_TEXELS 32 /* number of texels in a block (always 32) */
278#define LL_N_REP 50 /* number of iterations in lloyd's vq */
279#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
280#define LL_RMS_E 255 /* fault tolerance (maximum error) */
281#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
282#define ISTBLACK(v) (*((unsigned long *)(v)) == 0)
283
284
285#ifdef __GNUC__
286
287#define FX64_NATIVE 1
288
289typedef unsigned long long Fx64;
290
291#define FX64_MOV32(a, b) a = b;
292#define FX64_OR32(a, b)  a |= b;
293#define FX64_SHL(a, c)   a <<= c;
294
295#else  /* !__GNUC__ */
296
297#define FX64_NATIVE 0
298
299typedef struct {
300        unsigned long lo, hi;
301} Fx64;
302
303#define FX64_MOV32(a, b) a.lo = b
304#define FX64_OR32(a, b)  a.lo |= b
305
306#define FX64_SHL(a, c)                                 \
307   do {                                                \
308       if ((c) >= 32) {                                \
309          a.hi = a.lo << ((c) - 32);                   \
310          a.lo = 0;                                    \
311       } else {                                        \
312          a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
313          a.lo <<= (c);                                \
314       }                                               \
315   } while (0)
316
317#endif /* !__GNUC__ */
318
319
320static int
321fxt1_bestcol (float vec[][MAX_COMP], int nv,
322              unsigned char input[MAX_COMP], int nc)
323{
324   int i, j, best = -1;
325   float err = 1e9; /* big enough */
326
327   for (j = 0; j < nv; j++) {
328      float e = 0;
329      for (i = 0; i < nc; i++) {
330         e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
331      }
332      if (e < err) {
333         err = e;
334         best = j;
335      }
336   }
337
338   return best;
339}
340
341
342static int
343fxt1_worst (float vec[MAX_COMP],
344            unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
345{
346   int i, k, worst = -1;
347   float err = -1; /* small enough */
348
349   for (k = 0; k < n; k++) {
350      float e = 0;
351      for (i = 0; i < nc; i++) {
352         e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
353      }
354      if (e > err) {
355         err = e;
356         worst = k;
357      }
358   }
359
360   return worst;
361}
362
363
364static int
365fxt1_variance (double variance[MAX_COMP],
366               unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
367{
368   int i, k, best = 0;
369   int sx, sx2;
370   double var, maxvar = -1; /* small enough */
371   double teenth = 1.0 / n;
372
373   for (i = 0; i < nc; i++) {
374      sx = sx2 = 0;
375      for (k = 0; k < n; k++) {
376         int t = input[k][i];
377         sx += t;
378         sx2 += t * t;
379      }
380      var = sx2 * teenth - sx * sx * teenth * teenth;
381      if (maxvar < var) {
382         maxvar = var;
383         best = i;
384      }
385      if (variance) {
386         variance[i] = var;
387      }
388   }
389
390   return best;
391}
392
393
394static int
395fxt1_choose (float vec[][MAX_COMP], int nv,
396            unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
397{
398#if 0
399   /* Choose colors from a grid.
400    */
401   int i, j;
402
403   for (j = 0; j < nv; j++) {
404      int m = j * (n - 1) / (nv - 1);
405      for (i = 0; i < nc; i++) {
406         vec[j][i] = input[m][i];
407      }
408   }
409#else
410   /* Our solution here is to find the darkest and brightest colors in
411    * the 8x4 tile and use those as the two representative colors.
412    * There are probably better algorithms to use (histogram-based).
413    */
414   int i, j, k;
415   int minSum = 1000; /* big enough */
416   int maxSum = -1; /* small enough */
417   int minCol;
418   int maxCol;
419
420   struct {
421      int flag;
422      int key;
423      int freq;
424      int idx;
425   } hist[N_TEXELS];
426   int lenh = 0;
427
428   memset(hist, 0, sizeof(hist));
429
430   for (k = 0; k < n; k++) {
431      int l;
432      int key = 0;
433      int sum = 0;
434      for (i = 0; i < nc; i++) {
435         key <<= 8;
436         key |= input[k][i];
437         sum += input[k][i];
438      }
439      for (l = 0; l < n; l++) {
440         if (!hist[l].flag) {
441            /* alloc new slot */
442            hist[l].flag = !0;
443            hist[l].key = key;
444            hist[l].freq = 1;
445            hist[l].idx = k;
446            lenh = l + 1;
447            break;
448         } else if (hist[l].key == key) {
449            hist[l].freq++;
450            break;
451         }
452      }
453      if (minSum > sum) {
454         minSum = sum;
455         minCol = k;
456      }
457      if (maxSum < sum) {
458         maxSum = sum;
459         maxCol = k;
460      }
461   }
462
463   if (lenh <= nv) {
464      for (j = 0; j < lenh; j++) {
465         for (i = 0; i < nc; i++) {
466            vec[j][i] = (float)input[hist[j].idx][i];
467         }
468      }
469      for (; j < nv; j++) {
470         for (i = 0; i < nc; i++) {
471            vec[j][i] = vec[0][i];
472         }
473      }
474      return 0;
475   }
476
477   for (j = 0; j < nv; j++) {
478      for (i = 0; i < nc; i++) {
479         vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (nv - 1);
480      }
481   }
482#endif
483
484   return !0;
485}
486
487
488static int
489fxt1_lloyd (float vec[][MAX_COMP], int nv,
490            unsigned char input[N_TEXELS][MAX_COMP], int nc, int n)
491{
492   /* Use the generalized lloyd's algorithm for VQ:
493    *     find 4 color vectors.
494    *
495    *     for each sample color
496    *         sort to nearest vector.
497    *
498    *     replace each vector with the centroid of it's matching colors.
499    *
500    *     repeat until RMS doesn't improve.
501    *
502    *     if a color vector has no samples, or becomes the same as another
503    *     vector, replace it with the color which is farthest from a sample.
504    *
505    * vec[][MAX_COMP]           initial vectors and resulting colors
506    * nv                        number of resulting colors required
507    * input[N_TEXELS][MAX_COMP] input texels
508    * nc                        number of components in input / vec
509    * n                         number of input samples
510    */
511
512   int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
513   int cnt[MAX_VECT]; /* how many times a certain vector was chosen */
514   float error, lasterror = 1e9;
515
516   int i, j, k, rep;
517
518   /* the quantizer */
519   for (rep = 0; rep < LL_N_REP; rep++) {
520      /* reset sums & counters */
521      for (j = 0; j < nv; j++) {
522         for (i = 0; i < nc; i++) {
523            sum[j][i] = 0;
524         }
525         cnt[j] = 0;
526      }
527      error = 0;
528
529      /* scan whole block */
530      for (k = 0; k < n; k++) {
531#if 1
532         int best = -1;
533         float err = 1e9; /* big enough */
534         /* determine best vector */
535         for (j = 0; j < nv; j++) {
536            float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
537                      (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
538                      (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
539            if (nc == 4) {
540               e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
541            }
542            if (e < err) {
543               err = e;
544               best = j;
545            }
546         }
547#else
548         int best = fxt1_bestcol(vec, n_vect, input[k], n_comp, &err);
549#endif
550         /* add in closest color */
551         for (i = 0; i < nc; i++) {
552            sum[best][i] += input[k][i];
553         }
554         /* mark this vector as used */
555         cnt[best]++;
556         /* accumulate error */
557         error += err;
558      }
559
560      /* check RMS */
561      if ((error < LL_RMS_E) ||
562          ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
563         return !0; /* good match */
564      }
565      lasterror = error;
566
567      /* move each vector to the barycenter of its closest colors */
568      for (j = 0; j < nv; j++) {
569         if (cnt[j]) {
570            float div = 1.0 / cnt[j];
571            for (i = 0; i < nc; i++) {
572               vec[j][i] = div * sum[j][i];
573            }
574         } else {
575            /* this vec has no samples or is identical with a previous vec */
576            int worst = fxt1_worst(vec[j], input, nc, n);
577            for (i = 0; i < nc; i++) {
578               vec[j][i] = input[worst][i];
579            }
580         }
581      }
582   }
583
584   return 0; /* could not converge fast enough */
585}
586
587
588static void
589fxt1_quantize_CHROMA (unsigned long *cc,
590                      unsigned char input[N_TEXELS][MAX_COMP])
591{
592   const int n_vect = 4; /* 4 base vectors to find */
593   const int n_comp = 3; /* 3 components: R, G, B */
594   float vec[MAX_VECT][MAX_COMP];
595   int i, j, k;
596   Fx64 hi; /* high quadword */
597   unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
598
599   if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
600      fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
601   }
602
603   FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
604   for (j = n_vect - 1; j >= 0; j--) {
605      for (i = 0; i < n_comp; i++) {
606         /* add in colors */
607         FX64_SHL(hi, 5);
608         FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
609      }
610   }
611   ((Fx64 *)cc)[1] = hi;
612
613   lohi = lolo = 0;
614   /* right microtile */
615   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
616      lohi <<= 2;
617      lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
618   }
619   /* left microtile */
620   for (; k >= 0; k--) {
621      lolo <<= 2;
622      lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
623   }
624   cc[1] = lohi;
625   cc[0] = lolo;
626}
627
628
629static void
630fxt1_quantize_ALPHA0 (unsigned long *cc,
631                      unsigned char input[N_TEXELS][MAX_COMP],
632                      unsigned char reord[N_TEXELS][MAX_COMP], int n)
633{
634   const int n_vect = 3; /* 3 base vectors to find */
635   const int n_comp = 4; /* 4 components: R, G, B, A */
636   float vec[MAX_VECT][MAX_COMP];
637   int i, j, k;
638   Fx64 hi; /* high quadword */
639   unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
640
641   /* the last vector indicates zero */
642   for (i = 0; i < n_comp; i++) {
643      vec[n_vect][i] = 0;
644   }
645
646   /* the first n texels in reord are guaranteed to be non-zero */
647   if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
648      fxt1_lloyd(vec, n_vect, reord, n_comp, n);
649   }
650
651   FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
652   for (j = n_vect - 1; j >= 0; j--) {
653      /* add in alphas */
654      FX64_SHL(hi, 5);
655      FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
656   }
657   for (j = n_vect - 1; j >= 0; j--) {
658      for (i = 0; i < n_comp - 1; i++) {
659         /* add in colors */
660         FX64_SHL(hi, 5);
661         FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
662      }
663   }
664   ((Fx64 *)cc)[1] = hi;
665
666   lohi = lolo = 0;
667   /* right microtile */
668   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
669      lohi <<= 2;
670      lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
671   }
672   /* left microtile */
673   for (; k >= 0; k--) {
674      lolo <<= 2;
675      lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
676   }
677   cc[1] = lohi;
678   cc[0] = lolo;
679}
680
681
682static void
683fxt1_quantize_ALPHA1 (unsigned long *cc,
684                      unsigned char input[N_TEXELS][MAX_COMP])
685{
686   const int n_vect = 3; /* highest vector number in each microtile */
687   const int n_comp = 4; /* 4 components: R, G, B, A */
688   float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
689   float b, iv[MAX_COMP]; /* interpolation vector */
690   int i, j, k;
691   Fx64 hi; /* high quadword */
692   unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
693
694   int minSum;
695   int maxSum;
696   int minColL = 0, maxColL = 0;
697   int minColR = 0, maxColR = 0;
698   int sumL = 0, sumR = 0;
699
700   /* Our solution here is to find the darkest and brightest colors in
701    * the 4x4 tile and use those as the two representative colors.
702    * There are probably better algorithms to use (histogram-based).
703    */
704   minSum = 1000; /* big enough */
705   maxSum = -1; /* small enough */
706   for (k = 0; k < N_TEXELS / 2; k++) {
707      int sum = 0;
708      for (i = 0; i < n_comp; i++) {
709         sum += input[k][i];
710      }
711      if (minSum > sum) {
712         minSum = sum;
713         minColL = k;
714      }
715      if (maxSum < sum) {
716         maxSum = sum;
717         maxColL = k;
718      }
719      sumL += sum;
720   }
721   minSum = 1000; /* big enough */
722   maxSum = -1; /* small enough */
723   for (; k < N_TEXELS; k++) {
724      int sum = 0;
725      for (i = 0; i < n_comp; i++) {
726         sum += input[k][i];
727      }
728      if (minSum > sum) {
729         minSum = sum;
730         minColR = k;
731      }
732      if (maxSum < sum) {
733         maxSum = sum;
734         maxColR = k;
735      }
736      sumR += sum;
737   }
738
739   /* choose the common vector (yuck!) */
740{
741   int j1, j2;
742   int v1 = 0, v2 = 0;
743   float err = 1e9; /* big enough */
744   float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
745   for (i = 0; i < n_comp; i++) {
746      tv[0][i] = input[minColL][i];
747      tv[1][i] = input[maxColL][i];
748      tv[2][i] = input[minColR][i];
749      tv[3][i] = input[maxColR][i];
750   }
751   for (j1 = 0; j1 < 2; j1++) {
752      for (j2 = 2; j2 < 4; j2++) {
753          float e = 0;
754          for (i = 0; i < n_comp; i++) {
755             e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
756          }
757          if (e < err) {
758             err = e;
759             v1 = j1;
760             v2 = j2;
761          }
762      }
763   }
764   for (i = 0; i < n_comp; i++) {
765      vec[0][i] = tv[1 - v1][i];
766      vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
767      vec[2][i] = tv[5 - v2][i];
768   }
769}
770
771   /* left microtile */
772   cc[0] = 0;
773   if (minColL != maxColL) {
774      /* compute interpolation vector */
775      float d2 = 0;
776      float rd2;
777
778      for (i = 0; i < n_comp; i++) {
779         iv[i] = vec[1][i] - vec[0][i];
780         d2 += iv[i] * iv[i];
781      }
782      rd2 = (float)n_vect / d2;
783      b = 0;
784      for (i = 0; i < n_comp; i++) {
785         b -= iv[i] * vec[0][i];
786         iv[i] *= rd2;
787      }
788      b = b * rd2 + 0.5f;
789
790      /* add in texels */
791      lolo = 0;
792      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
793         int texel;
794         /* interpolate color */
795         float dot = 0;
796         for (i = 0; i < n_comp; i++) {
797            dot += input[k][i] * iv[i];
798         }
799         texel = (int)(dot + b);
800         if (texel < 0) {
801            texel = 0;
802         } else if (texel > n_vect) {
803            texel = n_vect;
804         }
805         /* add in texel */
806         lolo <<= 2;
807         lolo |= texel;
808      }
809
810      cc[0] = lolo;
811   }
812
813   /* right microtile */
814   cc[1] = 0;
815   if (minColR != maxColR) {
816      /* compute interpolation vector */
817      float d2 = 0;
818      float rd2;
819
820      for (i = 0; i < n_comp; i++) {
821         iv[i] = vec[1][i] - vec[2][i];
822         d2 += iv[i] * iv[i];
823      }
824      rd2 = (float)n_vect / d2;
825      b = 0;
826      for (i = 0; i < n_comp; i++) {
827         b -= iv[i] * vec[2][i];
828         iv[i] *= rd2;
829      }
830      b = b * rd2 + 0.5f;
831
832      /* add in texels */
833      lohi = 0;
834      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
835         int texel;
836         /* interpolate color */
837         float dot = 0;
838         for (i = 0; i < n_comp; i++) {
839            dot += input[k][i] * iv[i];
840         }
841         texel = (int)(dot + b);
842         if (texel < 0) {
843            texel = 0;
844         } else if (texel > n_vect) {
845            texel = n_vect;
846         }
847         /* add in texel */
848         lohi <<= 2;
849         lohi |= texel;
850      }
851
852      cc[1] = lohi;
853   }
854
855   FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
856   for (j = n_vect - 1; j >= 0; j--) {
857      /* add in alphas */
858      FX64_SHL(hi, 5);
859      FX64_OR32(hi, (unsigned int)(vec[j][ACOMP] / 8.0));
860   }
861   for (j = n_vect - 1; j >= 0; j--) {
862      for (i = 0; i < n_comp - 1; i++) {
863         /* add in colors */
864         FX64_SHL(hi, 5);
865         FX64_OR32(hi, (unsigned int)(vec[j][i] / 8.0));
866      }
867   }
868   ((Fx64 *)cc)[1] = hi;
869}
870
871
872static void
873fxt1_quantize_HI (unsigned long *cc,
874                  unsigned char input[N_TEXELS][MAX_COMP],
875                  unsigned char reord[N_TEXELS][MAX_COMP], int n)
876{
877   const int n_vect = 6; /* highest vector number */
878   const int n_comp = 3; /* 3 components: R, G, B */
879   float b, iv[MAX_COMP]; /* interpolation vector */
880   int i, k;
881   unsigned long hihi; /* high quadword: hi dword */
882
883   int minSum = 1000; /* big enough */
884   int maxSum = -1; /* small enough */
885   int minCol;
886   int maxCol;
887
888   /* Our solution here is to find the darkest and brightest colors in
889    * the 8x4 tile and use those as the two representative colors.
890    * There are probably better algorithms to use (histogram-based).
891    */
892   for (k = 0; k < n; k++) {
893      int sum = 0;
894      for (i = 0; i < n_comp; i++) {
895         sum += reord[k][i];
896      }
897      if (minSum > sum) {
898         minSum = sum;
899         minCol = k;
900      }
901      if (maxSum < sum) {
902         maxSum = sum;
903         maxCol = k;
904      }
905   }
906
907   hihi = 0; /* cc-hi = "00" */
908   for (i = 0; i < n_comp; i++) {
909      /* add in colors */
910      hihi <<= 5;
911      hihi |= reord[maxCol][i] >> 3;
912   }
913   for (i = 0; i < n_comp; i++) {
914      /* add in colors */
915      hihi <<= 5;
916      hihi |= reord[minCol][i] >> 3;
917   }
918   cc[3] = hihi;
919   cc[0] = cc[1] = cc[2] = 0;
920
921   /* compute interpolation vector */
922   if (minCol != maxCol) {
923      float d2 = 0;
924      float rd2;
925
926      for (i = 0; i < n_comp; i++) {
927         iv[i] = reord[maxCol][i] - reord[minCol][i];
928         d2 += iv[i] * iv[i];
929      }
930      rd2 = (float)n_vect / d2;
931      b = 0;
932      for (i = 0; i < n_comp; i++) {
933         b -= iv[i] * reord[minCol][i];
934         iv[i] *= rd2;
935      }
936      b = b * rd2 + 0.5f;
937   }
938
939   /* add in texels */
940   for (k = N_TEXELS - 1; k >= 0; k--) {
941      int t = k * 3;
942      unsigned long *kk = (unsigned long *)((unsigned long)cc + t / 8);
943      int texel = n_vect + 1; /* transparent black */
944
945      if (!ISTBLACK(input[k])) {
946         if (minCol != maxCol) {
947            /* interpolate color */
948            float dot = 0;
949            for (i = 0; i < n_comp; i++) {
950               dot += input[k][i] * iv[i];
951            }
952            texel = (int)(dot + b);
953            if (texel < 0) {
954               texel = 0;
955            } else if (texel > n_vect) {
956               texel = n_vect;
957            }
958            /* add in texel */
959            kk[0] |= texel << (t & 7);
960         }
961      } else {
962         /* add in texel */
963         kk[0] |= texel << (t & 7);
964      }
965   }
966}
967
968
969static void
970fxt1_quantize_MIXED1 (unsigned long *cc,
971                      unsigned char input[N_TEXELS][MAX_COMP])
972{
973   const int n_vect = 2; /* highest vector number in each microtile */
974   const int n_comp = 3; /* 3 components: R, G, B */
975   unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
976   float b, iv[MAX_COMP]; /* interpolation vector */
977   int i, j, k;
978   Fx64 hi; /* high quadword */
979   unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
980
981   int minSum;
982   int maxSum;
983   int minColL = 0, maxColL = -1;
984   int minColR = 0, maxColR = -1;
985
986   /* Our solution here is to find the darkest and brightest colors in
987    * the 4x4 tile and use those as the two representative colors.
988    * There are probably better algorithms to use (histogram-based).
989    */
990   minSum = 1000; /* big enough */
991   maxSum = -1; /* small enough */
992   for (k = 0; k < N_TEXELS / 2; k++) {
993      if (!ISTBLACK(input[k])) {
994         int sum = 0;
995         for (i = 0; i < n_comp; i++) {
996            sum += input[k][i];
997         }
998         if (minSum > sum) {
999            minSum = sum;
1000            minColL = k;
1001         }
1002         if (maxSum < sum) {
1003            maxSum = sum;
1004            maxColL = k;
1005         }
1006      }
1007   }
1008   minSum = 1000; /* big enough */
1009   maxSum = -1; /* small enough */
1010   for (; k < N_TEXELS; k++) {
1011      if (!ISTBLACK(input[k])) {
1012         int sum = 0;
1013         for (i = 0; i < n_comp; i++) {
1014            sum += input[k][i];
1015         }
1016         if (minSum > sum) {
1017            minSum = sum;
1018            minColR = k;
1019         }
1020         if (maxSum < sum) {
1021            maxSum = sum;
1022            maxColR = k;
1023         }
1024      }
1025   }
1026
1027   /* left microtile */
1028   if (maxColL == -1) {
1029      /* all transparent black */
1030      cc[0] = -1;
1031      for (i = 0; i < n_comp; i++) {
1032         vec[0][i] = 0;
1033         vec[1][i] = 0;
1034      }
1035   } else {
1036      cc[0] = 0;
1037      for (i = 0; i < n_comp; i++) {
1038         vec[0][i] = input[minColL][i];
1039         vec[1][i] = input[maxColL][i];
1040      }
1041      if (minColL != maxColL) {
1042         /* compute interpolation vector */
1043         float d2 = 0;
1044         float rd2;
1045
1046         for (i = 0; i < n_comp; i++) {
1047            iv[i] = vec[1][i] - vec[0][i];
1048            d2 += iv[i] * iv[i];
1049         }
1050         rd2 = (float)n_vect / d2;
1051         b = 0;
1052         for (i = 0; i < n_comp; i++) {
1053            b -= iv[i] * vec[0][i];
1054            iv[i] *= rd2;
1055         }
1056         b = b * rd2 + 0.5f;
1057
1058         /* add in texels */
1059         lolo = 0;
1060         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1061            int texel = n_vect + 1; /* transparent black */
1062            if (!ISTBLACK(input[k])) {
1063               /* interpolate color */
1064               float dot = 0;
1065               for (i = 0; i < n_comp; i++) {
1066                  dot += input[k][i] * iv[i];
1067               }
1068               texel = (int)(dot + b);
1069               if (texel < 0) {
1070                  texel = 0;
1071               } else if (texel > n_vect) {
1072                  texel = n_vect;
1073               }
1074            }
1075            /* add in texel */
1076            lolo <<= 2;
1077            lolo |= texel;
1078         }
1079         cc[0] = lolo;
1080      }
1081   }
1082
1083   /* right microtile */
1084   if (maxColR == -1) {
1085      /* all transparent black */
1086      cc[1] = -1;
1087      for (i = 0; i < n_comp; i++) {
1088         vec[2][i] = 0;
1089         vec[3][i] = 0;
1090      }
1091   } else {
1092      cc[1] = 0;
1093      for (i = 0; i < n_comp; i++) {
1094         vec[2][i] = input[minColR][i];
1095         vec[3][i] = input[maxColR][i];
1096      }
1097      if (minColR != maxColR) {
1098         /* compute interpolation vector */
1099         float d2 = 0;
1100         float rd2;
1101
1102         for (i = 0; i < n_comp; i++) {
1103            iv[i] = vec[3][i] - vec[2][i];
1104            d2 += iv[i] * iv[i];
1105         }
1106         rd2 = (float)n_vect / d2;
1107         b = 0;
1108         for (i = 0; i < n_comp; i++) {
1109            b -= iv[i] * vec[2][i];
1110            iv[i] *= rd2;
1111         }
1112         b = b * rd2 + 0.5f;
1113
1114         /* add in texels */
1115         lohi = 0;
1116         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1117            int texel = n_vect + 1; /* transparent black */
1118            if (!ISTBLACK(input[k])) {
1119               /* interpolate color */
1120               float dot = 0;
1121               for (i = 0; i < n_comp; i++) {
1122                  dot += input[k][i] * iv[i];
1123               }
1124               texel = (int)(dot + b);
1125               if (texel < 0) {
1126                  texel = 0;
1127               } else if (texel > n_vect) {
1128                  texel = n_vect;
1129               }
1130            }
1131            /* add in texel */
1132            lohi <<= 2;
1133            lohi |= texel;
1134         }
1135         cc[1] = lohi;
1136      }
1137   }
1138
1139   FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1140   for (j = 2 * 2 - 1; j >= 0; j--) {
1141      for (i = 0; i < n_comp; i++) {
1142         /* add in colors */
1143         FX64_SHL(hi, 5);
1144         FX64_OR32(hi, vec[j][i] >> 3);
1145      }
1146   }
1147   ((Fx64 *)cc)[1] = hi;
1148}
1149
1150
1151static void
1152fxt1_quantize_MIXED0 (unsigned long *cc,
1153                      unsigned char input[N_TEXELS][MAX_COMP])
1154{
1155   const int n_vect = 3; /* highest vector number in each microtile */
1156   const int n_comp = 3; /* 3 components: R, G, B */
1157   unsigned char vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1158   float b, iv[MAX_COMP]; /* interpolation vector */
1159   int i, j, k;
1160   Fx64 hi; /* high quadword */
1161   unsigned long lohi, lolo; /* low quadword: hi dword, lo dword */
1162
1163   int minColL = 0, maxColL = 0;
1164   int minColR = 0, maxColR = 0;
1165#if 0
1166   int minSum;
1167   int maxSum;
1168
1169   /* Our solution here is to find the darkest and brightest colors in
1170    * the 4x4 tile and use those as the two representative colors.
1171    * There are probably better algorithms to use (histogram-based).
1172    */
1173   minSum = 1000; /* big enough */
1174   maxSum = -1; /* small enough */
1175   for (k = 0; k < N_TEXELS / 2; k++) {
1176      int sum = 0;
1177      for (i = 0; i < n_comp; i++) {
1178         sum += input[k][i];
1179      }
1180      if (minSum > sum) {
1181         minSum = sum;
1182         minColL = k;
1183      }
1184      if (maxSum < sum) {
1185         maxSum = sum;
1186         maxColL = k;
1187      }
1188   }
1189   minSum = 1000; /* big enough */
1190   maxSum = -1; /* small enough */
1191   for (; k < N_TEXELS; k++) {
1192      int sum = 0;
1193      for (i = 0; i < n_comp; i++) {
1194         sum += input[k][i];
1195      }
1196      if (minSum > sum) {
1197         minSum = sum;
1198         minColR = k;
1199      }
1200      if (maxSum < sum) {
1201         maxSum = sum;
1202         maxColR = k;
1203      }
1204   }
1205#else
1206   int minVal;
1207   int maxVal;
1208   int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1209   int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1210
1211   /* Scan the channel with max variance for lo & hi
1212    * and use those as the two representative colors.
1213    */
1214   minVal = 1000; /* big enough */
1215   maxVal = -1; /* small enough */
1216   for (k = 0; k < N_TEXELS / 2; k++) {
1217      int t = input[k][maxVarL];
1218      if (minVal > t) {
1219         minVal = t;
1220         minColL = k;
1221      }
1222      if (maxVal < t) {
1223         maxVal = t;
1224         maxColL = k;
1225      }
1226   }
1227   minVal = 1000; /* big enough */
1228   maxVal = -1; /* small enough */
1229   for (; k < N_TEXELS; k++) {
1230      int t = input[k][maxVarR];
1231      if (minVal > t) {
1232         minVal = t;
1233         minColR = k;
1234      }
1235      if (maxVal < t) {
1236         maxVal = t;
1237         maxColR = k;
1238      }
1239   }
1240#endif
1241
1242   /* left microtile */
1243   cc[0] = 0;
1244   for (i = 0; i < n_comp; i++) {
1245      vec[0][i] = input[minColL][i];
1246      vec[1][i] = input[maxColL][i];
1247   }
1248   if (minColL != maxColL) {
1249      /* compute interpolation vector */
1250      float d2 = 0;
1251      float rd2;
1252
1253      for (i = 0; i < n_comp; i++) {
1254         iv[i] = vec[1][i] - vec[0][i];
1255         d2 += iv[i] * iv[i];
1256      }
1257      rd2 = (float)n_vect / d2;
1258      b = 0;
1259      for (i = 0; i < n_comp; i++) {
1260         b -= iv[i] * vec[0][i];
1261         iv[i] *= rd2;
1262      }
1263      b = b * rd2 + 0.5f;
1264
1265      /* add in texels */
1266      lolo = 0;
1267      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1268         int texel;
1269         /* interpolate color */
1270         float dot = 0;
1271         for (i = 0; i < n_comp; i++) {
1272            dot += input[k][i] * iv[i];
1273         }
1274         texel = (int)(dot + b);
1275         if (texel < 0) {
1276            texel = 0;
1277         } else if (texel > n_vect) {
1278            texel = n_vect;
1279         }
1280         /* add in texel */
1281         lolo <<= 2;
1282         lolo |= texel;
1283      }
1284
1285      /* funky encoding for LSB of green */
1286      if (((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1287         for (i = 0; i < n_comp; i++) {
1288            vec[1][i] = input[minColL][i];
1289            vec[0][i] = input[maxColL][i];
1290         }
1291         lolo = ~lolo;
1292      }
1293
1294      cc[0] = lolo;
1295   }
1296
1297   /* right microtile */
1298   cc[1] = 0;
1299   for (i = 0; i < n_comp; i++) {
1300      vec[2][i] = input[minColR][i];
1301      vec[3][i] = input[maxColR][i];
1302   }
1303   if (minColR != maxColR) {
1304      /* compute interpolation vector */
1305      float d2 = 0;
1306      float rd2;
1307
1308      for (i = 0; i < n_comp; i++) {
1309         iv[i] = vec[3][i] - vec[2][i];
1310         d2 += iv[i] * iv[i];
1311      }
1312      rd2 = (float)n_vect / d2;
1313      b = 0;
1314      for (i = 0; i < n_comp; i++) {
1315         b -= iv[i] * vec[2][i];
1316         iv[i] *= rd2;
1317      }
1318      b = b * rd2 + 0.5f;
1319
1320      /* add in texels */
1321      lohi = 0;
1322      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1323         int texel;
1324         /* interpolate color */
1325         float dot = 0;
1326         for (i = 0; i < n_comp; i++) {
1327            dot += input[k][i] * iv[i];
1328         }
1329         texel = (int)(dot + b);
1330         if (texel < 0) {
1331            texel = 0;
1332         } else if (texel > n_vect) {
1333            texel = n_vect;
1334         }
1335         /* add in texel */
1336         lohi <<= 2;
1337         lohi |= texel;
1338      }
1339
1340      /* funky encoding for LSB of green */
1341      if (((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1342         for (i = 0; i < n_comp; i++) {
1343            vec[3][i] = input[minColR][i];
1344            vec[2][i] = input[maxColR][i];
1345         }
1346         lohi = ~lohi;
1347      }
1348
1349      cc[1] = lohi;
1350   }
1351
1352   FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1353   for (j = 2 * 2 - 1; j >= 0; j--) {
1354      for (i = 0; i < n_comp; i++) {
1355         /* add in colors */
1356         FX64_SHL(hi, 5);
1357         FX64_OR32(hi, vec[j][i] >> 3);
1358      }
1359   }
1360   ((Fx64 *)cc)[1] = hi;
1361}
1362
1363
1364static void
1365fxt1_quantize (unsigned long *cc, const unsigned char *lines[], int comps)
1366{
1367   int trualpha;
1368   unsigned char reord[N_TEXELS][MAX_COMP];
1369
1370   unsigned char input[N_TEXELS][MAX_COMP];
1371   int i, k, l;
1372
1373   memset(input, -1, sizeof(input));
1374
1375   /* 8 texels each line */
1376   for (l = 0; l < 4; l++) {
1377      for (k = 0; k < 4; k++) {
1378         for (i = 0; i < comps; i++) {
1379            input[k + l * 4][i] = *lines[l]++;
1380         }
1381      }
1382      for (; k < 8; k++) {
1383         for (i = 0; i < comps; i++) {
1384            input[k + l * 4 + 12][i] = *lines[l]++;
1385         }
1386      }
1387   }
1388
1389   /* block looks like this:
1390    * 00, 01, 02, 03, 08, 09, 0a, 0b
1391    * 10, 11, 12, 13, 18, 19, 1a, 1b
1392    * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1393    * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1394    */
1395
1396   /* [dBorca]
1397    * stupidity flows forth from this
1398    */
1399   l = N_TEXELS;
1400   trualpha = 0;
1401   if (comps == 4) {
1402      /* skip all transparent black texels */
1403      l = 0;
1404      for (k = 0; k < N_TEXELS; k++) {
1405         /* test all components against 0 */
1406         if (!ISTBLACK(input[k])) {
1407            /* texel is not transparent black */
1408            COPY_4UBV(reord[l], input[k]);
1409            if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1410               /* non-opaque texel */
1411               trualpha = !0;
1412            }
1413            l++;
1414         }
1415      }
1416   }
1417
1418#if 0
1419   if (trualpha) {
1420      fxt1_quantize_ALPHA0(cc, input, reord, l);
1421   } else if (l == 0) {
1422      cc[0] = cc[1] = cc[2] = -1;
1423      cc[3] = 0;
1424   } else if (l < N_TEXELS) {
1425      fxt1_quantize_HI(cc, input, reord, l);
1426   } else {
1427      fxt1_quantize_CHROMA(cc, input);
1428   }
1429#else
1430   if (trualpha) {
1431      fxt1_quantize_ALPHA1(cc, input);
1432   } else if (l == 0) {
1433      cc[0] = cc[1] = cc[2] = -1;
1434      cc[3] = 0;
1435   } else if (l < N_TEXELS) {
1436      fxt1_quantize_MIXED1(cc, input);
1437   } else {
1438      fxt1_quantize_MIXED0(cc, input);
1439   }
1440#endif
1441}
1442
1443
1444int
1445fxt1_encode (GLcontext *ctx,
1446             unsigned int width, unsigned int height,
1447             int srcFormat,
1448             const void *source, int srcRowStride,
1449             void *dest, int destRowStride)
1450{
1451   const int comps = (srcFormat == GL_RGB) ? 3 : 4;
1452   unsigned int x, y;
1453   const unsigned char *data;
1454   unsigned long *encoded = dest;
1455   GLubyte *newSource = NULL;
1456
1457   /*
1458    * Rescale image if width is less than 8 or height is less than 4.
1459    */
1460   if (width < 8 || height < 4) {
1461      GLint newWidth = (width + 7) & ~7;
1462      GLint newHeight = (height + 3) & ~3;
1463      newSource = MALLOC(comps * newWidth * newHeight * sizeof(GLchan));
1464      _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1465                               comps, source, srcRowStride, newSource);
1466      source = newSource;
1467      width = newWidth;
1468      height = newHeight;
1469      srcRowStride = comps * newWidth;
1470   }
1471
1472   data = source;
1473   destRowStride = (destRowStride - width * 2) / 4;
1474   for (y = 0; y < height; y += 4) {
1475      unsigned int offs = 0 + (y + 0) * srcRowStride;
1476      for (x = 0; x < width; x += 8) {
1477         const unsigned char *lines[4];
1478         lines[0] = &data[offs];
1479         lines[1] = lines[0] + srcRowStride;
1480         lines[2] = lines[1] + srcRowStride;
1481         lines[3] = lines[2] + srcRowStride;
1482         offs += 8 * comps;
1483         fxt1_quantize(encoded, lines, comps);
1484         /* 128 bits per 8x4 block = 4bpp */
1485         encoded += 4;
1486      }
1487      encoded += destRowStride;
1488   }
1489
1490   if (newSource != NULL) {
1491      FREE(newSource);
1492   }
1493
1494   return 0;
1495}
1496
1497
1498/***************************************************************************\
1499 * FXT1 decoder
1500 *
1501 * The decoder is based on GL_3DFX_texture_compression_FXT1
1502 * specification and serves as a concept for the encoder.
1503\***************************************************************************/
1504
1505
1506/* lookup table for scaling 5 bit colors up to 8 bits */
1507static unsigned char _rgb_scale_5[] = {
1508   0,   8,   16,  25,  33,  41,  49,  58,
1509   66,  74,  82,  90,  99,  107, 115, 123,
1510   132, 140, 148, 156, 165, 173, 181, 189,
1511   197, 206, 214, 222, 230, 239, 247, 255
1512};
1513
1514/* lookup table for scaling 6 bit colors up to 8 bits */
1515static unsigned char _rgb_scale_6[] = {
1516   0,   4,   8,   12,  16,  20,  24,  28,
1517   32,  36,  40,  45,  49,  53,  57,  61,
1518   65,  69,  73,  77,  81,  85,  89,  93,
1519   97,  101, 105, 109, 113, 117, 121, 125,
1520   130, 134, 138, 142, 146, 150, 154, 158,
1521   162, 166, 170, 174, 178, 182, 186, 190,
1522   194, 198, 202, 206, 210, 215, 219, 223,
1523   227, 231, 235, 239, 243, 247, 251, 255
1524};
1525
1526
1527#define CC_SEL(cc, which) ((cc)[(which) / 32] >> ((which) & 31))
1528#define UP5(c) _rgb_scale_5[(c) & 31]
1529#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1530#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1531#define ZERO_4UBV(v) *((unsigned long *)(v)) = 0
1532
1533
1534static void
1535fxt1_decode_1HI (unsigned long code, int t, unsigned char *rgba)
1536{
1537   const unsigned long *cc;
1538
1539   t *= 3;
1540   cc = (unsigned long *)(code + t / 8);
1541   t = (cc[0] >> (t & 7)) & 7;
1542
1543   if (t == 7) {
1544      ZERO_4UBV(rgba);
1545   } else {
1546      cc = (unsigned long *)(code + 12);
1547      if (t == 0) {
1548         rgba[BCOMP] = UP5(CC_SEL(cc, 0));
1549         rgba[GCOMP] = UP5(CC_SEL(cc, 5));
1550         rgba[RCOMP] = UP5(CC_SEL(cc, 10));
1551      } else if (t == 6) {
1552         rgba[BCOMP] = UP5(CC_SEL(cc, 15));
1553         rgba[GCOMP] = UP5(CC_SEL(cc, 20));
1554         rgba[RCOMP] = UP5(CC_SEL(cc, 25));
1555      } else {
1556         rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1557         rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1558         rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1559      }
1560      rgba[ACOMP] = 255;
1561   }
1562}
1563
1564
1565static void
1566fxt1_decode_1CHROMA (unsigned long code, int t, unsigned char *rgba)
1567{
1568   const unsigned long *cc;
1569   unsigned long kk;
1570
1571   cc = (unsigned long *)code;
1572   if (t & 16) {
1573      cc++;
1574      t &= 15;
1575   }
1576   t = (cc[0] >> (t * 2)) & 3;
1577
1578   t *= 15;
1579   cc = (unsigned long *)(code + 8 + t / 8);
1580   kk = cc[0] >> (t & 7);
1581   rgba[BCOMP] = UP5(kk);
1582   rgba[GCOMP] = UP5(kk >> 5);
1583   rgba[RCOMP] = UP5(kk >> 10);
1584   rgba[ACOMP] = 255;
1585}
1586
1587
1588static void
1589fxt1_decode_1MIXED (unsigned long code, int t, unsigned char *rgba)
1590{
1591   const unsigned long *cc;
1592   unsigned int col[2][3];
1593   int glsb, selb;
1594
1595   cc = (unsigned long *)code;
1596   if (t & 16) {
1597      t &= 15;
1598      t = (cc[1] >> (t * 2)) & 3;
1599      /* col 2 */
1600      col[0][BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1601      col[0][GCOMP] = CC_SEL(cc, 99);
1602      col[0][RCOMP] = CC_SEL(cc, 104);
1603      /* col 3 */
1604      col[1][BCOMP] = CC_SEL(cc, 109);
1605      col[1][GCOMP] = CC_SEL(cc, 114);
1606      col[1][RCOMP] = CC_SEL(cc, 119);
1607      glsb = CC_SEL(cc, 126);
1608      selb = CC_SEL(cc, 33);
1609   } else {
1610      t = (cc[0] >> (t * 2)) & 3;
1611      /* col 0 */
1612      col[0][BCOMP] = CC_SEL(cc, 64);
1613      col[0][GCOMP] = CC_SEL(cc, 69);
1614      col[0][RCOMP] = CC_SEL(cc, 74);
1615      /* col 1 */
1616      col[1][BCOMP] = CC_SEL(cc, 79);
1617      col[1][GCOMP] = CC_SEL(cc, 84);
1618      col[1][RCOMP] = CC_SEL(cc, 89);
1619      glsb = CC_SEL(cc, 125);
1620      selb = CC_SEL(cc, 1);
1621   }
1622
1623   if (CC_SEL(cc, 124) & 1) {
1624      /* alpha[0] == 1 */
1625
1626      if (t == 3) {
1627         ZERO_4UBV(rgba);
1628      } else {
1629         if (t == 0) {
1630            rgba[BCOMP] = UP5(col[0][BCOMP]);
1631            rgba[GCOMP] = UP5(col[0][GCOMP]);
1632            rgba[RCOMP] = UP5(col[0][RCOMP]);
1633         } else if (t == 2) {
1634            rgba[BCOMP] = UP5(col[1][BCOMP]);
1635            rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1636            rgba[RCOMP] = UP5(col[1][RCOMP]);
1637         } else {
1638            rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1639            rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1640            rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1641         }
1642         rgba[ACOMP] = 255;
1643      }
1644   } else {
1645      /* alpha[0] == 0 */
1646
1647      if (t == 0) {
1648         rgba[BCOMP] = UP5(col[0][BCOMP]);
1649         rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);
1650         rgba[RCOMP] = UP5(col[0][RCOMP]);
1651      } else if (t == 3) {
1652         rgba[BCOMP] = UP5(col[1][BCOMP]);
1653         rgba[GCOMP] = UP6(col[1][GCOMP], glsb);
1654         rgba[RCOMP] = UP5(col[1][RCOMP]);
1655      } else {
1656         rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1657         rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1658                                  UP6(col[1][GCOMP], glsb));
1659         rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1660      }
1661      rgba[ACOMP] = 255;
1662   }
1663}
1664
1665
1666static void
1667fxt1_decode_1ALPHA (unsigned long code, int t, unsigned char *rgba)
1668{
1669   const unsigned long *cc;
1670
1671   cc = (unsigned long *)code;
1672   if (CC_SEL(cc, 124) & 1) {
1673      /* lerp == 1 */
1674      unsigned int col0[4];
1675
1676      if (t & 16) {
1677         t &= 15;
1678         t = (cc[1] >> (t * 2)) & 3;
1679         /* col 2 */
1680         col0[BCOMP] = (*(unsigned long *)(code + 11)) >> 6;
1681         col0[GCOMP] = CC_SEL(cc, 99);
1682         col0[RCOMP] = CC_SEL(cc, 104);
1683         col0[ACOMP] = CC_SEL(cc, 119);
1684      } else {
1685         t = (cc[0] >> (t * 2)) & 3;
1686         /* col 0 */
1687         col0[BCOMP] = CC_SEL(cc, 64);
1688         col0[GCOMP] = CC_SEL(cc, 69);
1689         col0[RCOMP] = CC_SEL(cc, 74);
1690         col0[ACOMP] = CC_SEL(cc, 109);
1691      }
1692
1693      if (t == 0) {
1694         rgba[BCOMP] = UP5(col0[BCOMP]);
1695         rgba[GCOMP] = UP5(col0[GCOMP]);
1696         rgba[RCOMP] = UP5(col0[RCOMP]);
1697         rgba[ACOMP] = UP5(col0[ACOMP]);
1698      } else if (t == 3) {
1699         rgba[BCOMP] = UP5(CC_SEL(cc, 79));
1700         rgba[GCOMP] = UP5(CC_SEL(cc, 84));
1701         rgba[RCOMP] = UP5(CC_SEL(cc, 89));
1702         rgba[ACOMP] = UP5(CC_SEL(cc, 114));
1703      } else {
1704         rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1705         rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1706         rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1707         rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1708      }
1709   } else {
1710      /* lerp == 0 */
1711
1712      if (t & 16) {
1713         cc++;
1714         t &= 15;
1715      }
1716      t = (cc[0] >> (t * 2)) & 3;
1717
1718      if (t == 3) {
1719         ZERO_4UBV(rgba);
1720      } else {
1721         unsigned long kk;
1722         cc = (unsigned long *)code;
1723         rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));
1724         t *= 15;
1725         cc = (unsigned long *)(code + 8 + t / 8);
1726         kk = cc[0] >> (t & 7);
1727         rgba[BCOMP] = UP5(kk);
1728         rgba[GCOMP] = UP5(kk >> 5);
1729         rgba[RCOMP] = UP5(kk >> 10);
1730      }
1731   }
1732}
1733
1734
1735void
1736fxt1_decode_1 (const void *texture, int width,
1737               int i, int j, unsigned char *rgba)
1738{
1739   static void (*decode_1[]) (unsigned long, int, unsigned char *) = {
1740      fxt1_decode_1HI,     /* cc-high   = "00?" */
1741      fxt1_decode_1HI,     /* cc-high   = "00?" */
1742      fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1743      fxt1_decode_1ALPHA,  /* alpha     = "011" */
1744      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1745      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1746      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1747      fxt1_decode_1MIXED   /* mixed     = "1??" */
1748   };
1749
1750   unsigned long code = (unsigned long)texture +
1751                        ((j / 4) * (width / 8) + (i / 8)) * 16;
1752   int mode = CC_SEL((unsigned long *)code, 125);
1753   int t = i & 7;
1754
1755   if (t & 4) {
1756      t += 12;
1757   }
1758   t += (j & 3) * 4;
1759
1760   decode_1[mode](code, t, rgba);
1761}
1762