texcompress_fxt1.c revision abcb6b6d01c253627363a05205291630b5247018
1/*
2 * Mesa 3-D graphics library
3 * Version:  7.1
4 *
5 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26/**
27 * \file texcompress_fxt1.c
28 * GL_EXT_texture_compression_fxt1 support.
29 */
30
31
32#include "glheader.h"
33#include "imports.h"
34#include "colormac.h"
35#include "context.h"
36#include "convolve.h"
37#include "image.h"
38#include "mipmap.h"
39#include "texcompress.h"
40#include "texcompress_fxt1.h"
41#include "texstore.h"
42
43
44#if FEATURE_texture_fxt1
45
46
47static void
48fxt1_encode (GLuint width, GLuint height, GLint comps,
49             const void *source, GLint srcRowStride,
50             void *dest, GLint destRowStride);
51
52void
53fxt1_decode_1 (const void *texture, GLint stride,
54               GLint i, GLint j, GLchan *rgba);
55
56
57/**
58 * Store user's image in rgb_fxt1 format.
59 */
60GLboolean
61_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
62{
63   const GLchan *pixels;
64   GLint srcRowStride;
65   GLubyte *dst;
66   const GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
67   const GLchan *tempImage = NULL;
68
69   ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
70   ASSERT(dstXoffset % 8 == 0);
71   ASSERT(dstYoffset % 4 == 0);
72   ASSERT(dstZoffset     == 0);
73   (void) dstZoffset;
74   (void) dstImageOffsets;
75
76   if (srcFormat != GL_RGB ||
77       srcType != CHAN_TYPE ||
78       ctx->_ImageTransferState ||
79       srcPacking->SwapBytes) {
80      /* convert image to RGB/GLchan */
81      tempImage = _mesa_make_temp_chan_image(ctx, dims,
82                                             baseInternalFormat,
83                                             _mesa_get_format_base_format(dstFormat),
84                                             srcWidth, srcHeight, srcDepth,
85                                             srcFormat, srcType, srcAddr,
86                                             srcPacking);
87      if (!tempImage)
88         return GL_FALSE; /* out of memory */
89      _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
90      pixels = tempImage;
91      srcRowStride = 3 * srcWidth;
92      srcFormat = GL_RGB;
93   }
94   else {
95      pixels = (const GLchan *) srcAddr;
96      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
97                                            srcType) / sizeof(GLchan);
98   }
99
100   dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
101                                        dstFormat,
102                                        texWidth, (GLubyte *) dstAddr);
103
104   fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
105               dst, dstRowStride);
106
107   if (tempImage)
108      free((void*) tempImage);
109
110   return GL_TRUE;
111}
112
113
114/**
115 * Store user's image in rgba_fxt1 format.
116 */
117GLboolean
118_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
119{
120   const GLchan *pixels;
121   GLint srcRowStride;
122   GLubyte *dst;
123   GLint texWidth = dstRowStride * 8 / 16; /* a bit of a hack */
124   const GLchan *tempImage = NULL;
125
126   ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
127   ASSERT(dstXoffset % 8 == 0);
128   ASSERT(dstYoffset % 4 == 0);
129   ASSERT(dstZoffset     == 0);
130   (void) dstZoffset;
131   (void) dstImageOffsets;
132
133   if (srcFormat != GL_RGBA ||
134       srcType != CHAN_TYPE ||
135       ctx->_ImageTransferState ||
136       srcPacking->SwapBytes) {
137      /* convert image to RGBA/GLchan */
138      tempImage = _mesa_make_temp_chan_image(ctx, dims,
139                                             baseInternalFormat,
140                                             _mesa_get_format_base_format(dstFormat),
141                                             srcWidth, srcHeight, srcDepth,
142                                             srcFormat, srcType, srcAddr,
143                                             srcPacking);
144      if (!tempImage)
145         return GL_FALSE; /* out of memory */
146      _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
147      pixels = tempImage;
148      srcRowStride = 4 * srcWidth;
149      srcFormat = GL_RGBA;
150   }
151   else {
152      pixels = (const GLchan *) srcAddr;
153      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
154                                            srcType) / sizeof(GLchan);
155   }
156
157   dst = _mesa_compressed_image_address(dstXoffset, dstYoffset, 0,
158                                        dstFormat,
159                                        texWidth, (GLubyte *) dstAddr);
160
161   fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
162               dst, dstRowStride);
163
164   if (tempImage)
165      free((void*) tempImage);
166
167   return GL_TRUE;
168}
169
170
171void
172_mesa_fetch_texel_2d_f_rgba_fxt1( const struct gl_texture_image *texImage,
173                                  GLint i, GLint j, GLint k, GLfloat *texel )
174{
175   /* just sample as GLchan and convert to float here */
176   GLchan rgba[4];
177   (void) k;
178   fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
179   texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
180   texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
181   texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
182   texel[ACOMP] = CHAN_TO_FLOAT(rgba[ACOMP]);
183}
184
185
186void
187_mesa_fetch_texel_2d_f_rgb_fxt1( const struct gl_texture_image *texImage,
188                                 GLint i, GLint j, GLint k, GLfloat *texel )
189{
190   /* just sample as GLchan and convert to float here */
191   GLchan rgba[4];
192   (void) k;
193   fxt1_decode_1(texImage->Data, texImage->RowStride, i, j, rgba);
194   texel[RCOMP] = CHAN_TO_FLOAT(rgba[RCOMP]);
195   texel[GCOMP] = CHAN_TO_FLOAT(rgba[GCOMP]);
196   texel[BCOMP] = CHAN_TO_FLOAT(rgba[BCOMP]);
197   texel[ACOMP] = 1.0F;
198}
199
200
201
202/***************************************************************************\
203 * FXT1 encoder
204 *
205 * The encoder was built by reversing the decoder,
206 * and is vaguely based on Texus2 by 3dfx. Note that this code
207 * is merely a proof of concept, since it is highly UNoptimized;
208 * moreover, it is sub-optimal due to initial conditions passed
209 * to Lloyd's algorithm (the interpolation modes are even worse).
210\***************************************************************************/
211
212
213#define MAX_COMP 4 /* ever needed maximum number of components in texel */
214#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
215#define N_TEXELS 32 /* number of texels in a block (always 32) */
216#define LL_N_REP 50 /* number of iterations in lloyd's vq */
217#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
218#define LL_RMS_E 255 /* fault tolerance (maximum error) */
219#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
220#define ISTBLACK(v) (*((GLuint *)(v)) == 0)
221
222
223/*
224 * Define a 64-bit unsigned integer type and macros
225 */
226#if 1
227
228#define FX64_NATIVE 1
229
230typedef uint64_t Fx64;
231
232#define FX64_MOV32(a, b) a = b
233#define FX64_OR32(a, b)  a |= b
234#define FX64_SHL(a, c)   a <<= c
235
236#else
237
238#define FX64_NATIVE 0
239
240typedef struct {
241   GLuint lo, hi;
242} Fx64;
243
244#define FX64_MOV32(a, b) a.lo = b
245#define FX64_OR32(a, b)  a.lo |= b
246
247#define FX64_SHL(a, c)                                 \
248   do {                                                \
249       if ((c) >= 32) {                                \
250          a.hi = a.lo << ((c) - 32);                   \
251          a.lo = 0;                                    \
252       } else {                                        \
253          a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
254          a.lo <<= (c);                                \
255       }                                               \
256   } while (0)
257
258#endif
259
260
261#define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
262#define SAFECDOT 1 /* for paranoids */
263
264#define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
265   do {                                  \
266      /* compute interpolation vector */ \
267      GLfloat d2 = 0.0F;                 \
268      GLfloat rd2;                       \
269                                         \
270      for (i = 0; i < NC; i++) {         \
271         IV[i] = (V1[i] - V0[i]) * F(i); \
272         d2 += IV[i] * IV[i];            \
273      }                                  \
274      rd2 = (GLfloat)NV / d2;            \
275      B = 0;                             \
276      for (i = 0; i < NC; i++) {         \
277         IV[i] *= F(i);                  \
278         B -= IV[i] * V0[i];             \
279         IV[i] *= rd2;                   \
280      }                                  \
281      B = B * rd2 + 0.5f;                \
282   } while (0)
283
284#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
285   do {                                  \
286      GLfloat dot = 0.0F;                \
287      for (i = 0; i < NC; i++) {         \
288         dot += V[i] * IV[i];            \
289      }                                  \
290      TEXEL = (GLint)(dot + B);          \
291      if (SAFECDOT) {                    \
292         if (TEXEL < 0) {                \
293            TEXEL = 0;                   \
294         } else if (TEXEL > NV) {        \
295            TEXEL = NV;                  \
296         }                               \
297      }                                  \
298   } while (0)
299
300
301static GLint
302fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
303              GLubyte input[MAX_COMP], GLint nc)
304{
305   GLint i, j, best = -1;
306   GLfloat err = 1e9; /* big enough */
307
308   for (j = 0; j < nv; j++) {
309      GLfloat e = 0.0F;
310      for (i = 0; i < nc; i++) {
311         e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
312      }
313      if (e < err) {
314         err = e;
315         best = j;
316      }
317   }
318
319   return best;
320}
321
322
323static GLint
324fxt1_worst (GLfloat vec[MAX_COMP],
325            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
326{
327   GLint i, k, worst = -1;
328   GLfloat err = -1.0F; /* small enough */
329
330   for (k = 0; k < n; k++) {
331      GLfloat e = 0.0F;
332      for (i = 0; i < nc; i++) {
333         e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
334      }
335      if (e > err) {
336         err = e;
337         worst = k;
338      }
339   }
340
341   return worst;
342}
343
344
345static GLint
346fxt1_variance (GLdouble variance[MAX_COMP],
347               GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
348{
349   GLint i, k, best = 0;
350   GLint sx, sx2;
351   GLdouble var, maxvar = -1; /* small enough */
352   GLdouble teenth = 1.0 / n;
353
354   for (i = 0; i < nc; i++) {
355      sx = sx2 = 0;
356      for (k = 0; k < n; k++) {
357         GLint t = input[k][i];
358         sx += t;
359         sx2 += t * t;
360      }
361      var = sx2 * teenth - sx * sx * teenth * teenth;
362      if (maxvar < var) {
363         maxvar = var;
364         best = i;
365      }
366      if (variance) {
367         variance[i] = var;
368      }
369   }
370
371   return best;
372}
373
374
375static GLint
376fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
377             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
378{
379#if 0
380   /* Choose colors from a grid.
381    */
382   GLint i, j;
383
384   for (j = 0; j < nv; j++) {
385      GLint m = j * (n - 1) / (nv - 1);
386      for (i = 0; i < nc; i++) {
387         vec[j][i] = input[m][i];
388      }
389   }
390#else
391   /* Our solution here is to find the darkest and brightest colors in
392    * the 8x4 tile and use those as the two representative colors.
393    * There are probably better algorithms to use (histogram-based).
394    */
395   GLint i, j, k;
396   GLint minSum = 2000; /* big enough */
397   GLint maxSum = -1; /* small enough */
398   GLint minCol = 0; /* phoudoin: silent compiler! */
399   GLint maxCol = 0; /* phoudoin: silent compiler! */
400
401   struct {
402      GLint flag;
403      GLint key;
404      GLint freq;
405      GLint idx;
406   } hist[N_TEXELS];
407   GLint lenh = 0;
408
409   memset(hist, 0, sizeof(hist));
410
411   for (k = 0; k < n; k++) {
412      GLint l;
413      GLint key = 0;
414      GLint sum = 0;
415      for (i = 0; i < nc; i++) {
416         key <<= 8;
417         key |= input[k][i];
418         sum += input[k][i];
419      }
420      for (l = 0; l < n; l++) {
421         if (!hist[l].flag) {
422            /* alloc new slot */
423            hist[l].flag = !0;
424            hist[l].key = key;
425            hist[l].freq = 1;
426            hist[l].idx = k;
427            lenh = l + 1;
428            break;
429         } else if (hist[l].key == key) {
430            hist[l].freq++;
431            break;
432         }
433      }
434      if (minSum > sum) {
435         minSum = sum;
436         minCol = k;
437      }
438      if (maxSum < sum) {
439         maxSum = sum;
440         maxCol = k;
441      }
442   }
443
444   if (lenh <= nv) {
445      for (j = 0; j < lenh; j++) {
446         for (i = 0; i < nc; i++) {
447            vec[j][i] = (GLfloat)input[hist[j].idx][i];
448         }
449      }
450      for (; j < nv; j++) {
451         for (i = 0; i < nc; i++) {
452            vec[j][i] = vec[0][i];
453         }
454      }
455      return 0;
456   }
457
458   for (j = 0; j < nv; j++) {
459      for (i = 0; i < nc; i++) {
460         vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
461      }
462   }
463#endif
464
465   return !0;
466}
467
468
469static GLint
470fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
471            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
472{
473   /* Use the generalized lloyd's algorithm for VQ:
474    *     find 4 color vectors.
475    *
476    *     for each sample color
477    *         sort to nearest vector.
478    *
479    *     replace each vector with the centroid of it's matching colors.
480    *
481    *     repeat until RMS doesn't improve.
482    *
483    *     if a color vector has no samples, or becomes the same as another
484    *     vector, replace it with the color which is farthest from a sample.
485    *
486    * vec[][MAX_COMP]           initial vectors and resulting colors
487    * nv                        number of resulting colors required
488    * input[N_TEXELS][MAX_COMP] input texels
489    * nc                        number of components in input / vec
490    * n                         number of input samples
491    */
492
493   GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
494   GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
495   GLfloat error, lasterror = 1e9;
496
497   GLint i, j, k, rep;
498
499   /* the quantizer */
500   for (rep = 0; rep < LL_N_REP; rep++) {
501      /* reset sums & counters */
502      for (j = 0; j < nv; j++) {
503         for (i = 0; i < nc; i++) {
504            sum[j][i] = 0;
505         }
506         cnt[j] = 0;
507      }
508      error = 0;
509
510      /* scan whole block */
511      for (k = 0; k < n; k++) {
512#if 1
513         GLint best = -1;
514         GLfloat err = 1e9; /* big enough */
515         /* determine best vector */
516         for (j = 0; j < nv; j++) {
517            GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
518                      (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
519                      (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
520            if (nc == 4) {
521               e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
522            }
523            if (e < err) {
524               err = e;
525               best = j;
526            }
527         }
528#else
529         GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
530#endif
531         assert(best >= 0);
532         /* add in closest color */
533         for (i = 0; i < nc; i++) {
534            sum[best][i] += input[k][i];
535         }
536         /* mark this vector as used */
537         cnt[best]++;
538         /* accumulate error */
539         error += err;
540      }
541
542      /* check RMS */
543      if ((error < LL_RMS_E) ||
544          ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
545         return !0; /* good match */
546      }
547      lasterror = error;
548
549      /* move each vector to the barycenter of its closest colors */
550      for (j = 0; j < nv; j++) {
551         if (cnt[j]) {
552            GLfloat div = 1.0F / cnt[j];
553            for (i = 0; i < nc; i++) {
554               vec[j][i] = div * sum[j][i];
555            }
556         } else {
557            /* this vec has no samples or is identical with a previous vec */
558            GLint worst = fxt1_worst(vec[j], input, nc, n);
559            for (i = 0; i < nc; i++) {
560               vec[j][i] = input[worst][i];
561            }
562         }
563      }
564   }
565
566   return 0; /* could not converge fast enough */
567}
568
569
570static void
571fxt1_quantize_CHROMA (GLuint *cc,
572                      GLubyte input[N_TEXELS][MAX_COMP])
573{
574   const GLint n_vect = 4; /* 4 base vectors to find */
575   const GLint n_comp = 3; /* 3 components: R, G, B */
576   GLfloat vec[MAX_VECT][MAX_COMP];
577   GLint i, j, k;
578   Fx64 hi; /* high quadword */
579   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
580
581   if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
582      fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
583   }
584
585   FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
586   for (j = n_vect - 1; j >= 0; j--) {
587      for (i = 0; i < n_comp; i++) {
588         /* add in colors */
589         FX64_SHL(hi, 5);
590         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
591      }
592   }
593   ((Fx64 *)cc)[1] = hi;
594
595   lohi = lolo = 0;
596   /* right microtile */
597   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
598      lohi <<= 2;
599      lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
600   }
601   /* left microtile */
602   for (; k >= 0; k--) {
603      lolo <<= 2;
604      lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
605   }
606   cc[1] = lohi;
607   cc[0] = lolo;
608}
609
610
611static void
612fxt1_quantize_ALPHA0 (GLuint *cc,
613                      GLubyte input[N_TEXELS][MAX_COMP],
614                      GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
615{
616   const GLint n_vect = 3; /* 3 base vectors to find */
617   const GLint n_comp = 4; /* 4 components: R, G, B, A */
618   GLfloat vec[MAX_VECT][MAX_COMP];
619   GLint i, j, k;
620   Fx64 hi; /* high quadword */
621   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
622
623   /* the last vector indicates zero */
624   for (i = 0; i < n_comp; i++) {
625      vec[n_vect][i] = 0;
626   }
627
628   /* the first n texels in reord are guaranteed to be non-zero */
629   if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
630      fxt1_lloyd(vec, n_vect, reord, n_comp, n);
631   }
632
633   FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
634   for (j = n_vect - 1; j >= 0; j--) {
635      /* add in alphas */
636      FX64_SHL(hi, 5);
637      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
638   }
639   for (j = n_vect - 1; j >= 0; j--) {
640      for (i = 0; i < n_comp - 1; i++) {
641         /* add in colors */
642         FX64_SHL(hi, 5);
643         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
644      }
645   }
646   ((Fx64 *)cc)[1] = hi;
647
648   lohi = lolo = 0;
649   /* right microtile */
650   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
651      lohi <<= 2;
652      lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
653   }
654   /* left microtile */
655   for (; k >= 0; k--) {
656      lolo <<= 2;
657      lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
658   }
659   cc[1] = lohi;
660   cc[0] = lolo;
661}
662
663
664static void
665fxt1_quantize_ALPHA1 (GLuint *cc,
666                      GLubyte input[N_TEXELS][MAX_COMP])
667{
668   const GLint n_vect = 3; /* highest vector number in each microtile */
669   const GLint n_comp = 4; /* 4 components: R, G, B, A */
670   GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
671   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
672   GLint i, j, k;
673   Fx64 hi; /* high quadword */
674   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
675
676   GLint minSum;
677   GLint maxSum;
678   GLint minColL = 0, maxColL = 0;
679   GLint minColR = 0, maxColR = 0;
680   GLint sumL = 0, sumR = 0;
681   GLint nn_comp;
682   /* Our solution here is to find the darkest and brightest colors in
683    * the 4x4 tile and use those as the two representative colors.
684    * There are probably better algorithms to use (histogram-based).
685    */
686   nn_comp = n_comp;
687   while ((minColL == maxColL) && nn_comp) {
688       minSum = 2000; /* big enough */
689       maxSum = -1; /* small enough */
690       for (k = 0; k < N_TEXELS / 2; k++) {
691           GLint sum = 0;
692           for (i = 0; i < nn_comp; i++) {
693               sum += input[k][i];
694           }
695           if (minSum > sum) {
696               minSum = sum;
697               minColL = k;
698           }
699           if (maxSum < sum) {
700               maxSum = sum;
701               maxColL = k;
702           }
703           sumL += sum;
704       }
705
706       nn_comp--;
707   }
708
709   nn_comp = n_comp;
710   while ((minColR == maxColR) && nn_comp) {
711       minSum = 2000; /* big enough */
712       maxSum = -1; /* small enough */
713       for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
714           GLint sum = 0;
715           for (i = 0; i < nn_comp; i++) {
716               sum += input[k][i];
717           }
718           if (minSum > sum) {
719               minSum = sum;
720               minColR = k;
721           }
722           if (maxSum < sum) {
723               maxSum = sum;
724               maxColR = k;
725           }
726           sumR += sum;
727       }
728
729       nn_comp--;
730   }
731
732   /* choose the common vector (yuck!) */
733   {
734      GLint j1, j2;
735      GLint v1 = 0, v2 = 0;
736      GLfloat err = 1e9; /* big enough */
737      GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
738      for (i = 0; i < n_comp; i++) {
739         tv[0][i] = input[minColL][i];
740         tv[1][i] = input[maxColL][i];
741         tv[2][i] = input[minColR][i];
742         tv[3][i] = input[maxColR][i];
743      }
744      for (j1 = 0; j1 < 2; j1++) {
745         for (j2 = 2; j2 < 4; j2++) {
746            GLfloat e = 0.0F;
747            for (i = 0; i < n_comp; i++) {
748               e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
749            }
750            if (e < err) {
751               err = e;
752               v1 = j1;
753               v2 = j2;
754            }
755         }
756      }
757      for (i = 0; i < n_comp; i++) {
758         vec[0][i] = tv[1 - v1][i];
759         vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
760         vec[2][i] = tv[5 - v2][i];
761      }
762   }
763
764   /* left microtile */
765   cc[0] = 0;
766   if (minColL != maxColL) {
767      /* compute interpolation vector */
768      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
769
770      /* add in texels */
771      lolo = 0;
772      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
773         GLint texel;
774         /* interpolate color */
775         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
776         /* add in texel */
777         lolo <<= 2;
778         lolo |= texel;
779      }
780
781      cc[0] = lolo;
782   }
783
784   /* right microtile */
785   cc[1] = 0;
786   if (minColR != maxColR) {
787      /* compute interpolation vector */
788      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
789
790      /* add in texels */
791      lohi = 0;
792      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
793         GLint texel;
794         /* interpolate color */
795         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
796         /* add in texel */
797         lohi <<= 2;
798         lohi |= texel;
799      }
800
801      cc[1] = lohi;
802   }
803
804   FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
805   for (j = n_vect - 1; j >= 0; j--) {
806      /* add in alphas */
807      FX64_SHL(hi, 5);
808      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
809   }
810   for (j = n_vect - 1; j >= 0; j--) {
811      for (i = 0; i < n_comp - 1; i++) {
812         /* add in colors */
813         FX64_SHL(hi, 5);
814         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
815      }
816   }
817   ((Fx64 *)cc)[1] = hi;
818}
819
820
821static void
822fxt1_quantize_HI (GLuint *cc,
823                  GLubyte input[N_TEXELS][MAX_COMP],
824                  GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
825{
826   const GLint n_vect = 6; /* highest vector number */
827   const GLint n_comp = 3; /* 3 components: R, G, B */
828   GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
829   GLfloat iv[MAX_COMP];   /* interpolation vector */
830   GLint i, k;
831   GLuint hihi; /* high quadword: hi dword */
832
833   GLint minSum = 2000; /* big enough */
834   GLint maxSum = -1; /* small enough */
835   GLint minCol = 0; /* phoudoin: silent compiler! */
836   GLint maxCol = 0; /* phoudoin: silent compiler! */
837
838   /* Our solution here is to find the darkest and brightest colors in
839    * the 8x4 tile and use those as the two representative colors.
840    * There are probably better algorithms to use (histogram-based).
841    */
842   for (k = 0; k < n; k++) {
843      GLint sum = 0;
844      for (i = 0; i < n_comp; i++) {
845         sum += reord[k][i];
846      }
847      if (minSum > sum) {
848         minSum = sum;
849         minCol = k;
850      }
851      if (maxSum < sum) {
852         maxSum = sum;
853         maxCol = k;
854      }
855   }
856
857   hihi = 0; /* cc-hi = "00" */
858   for (i = 0; i < n_comp; i++) {
859      /* add in colors */
860      hihi <<= 5;
861      hihi |= reord[maxCol][i] >> 3;
862   }
863   for (i = 0; i < n_comp; i++) {
864      /* add in colors */
865      hihi <<= 5;
866      hihi |= reord[minCol][i] >> 3;
867   }
868   cc[3] = hihi;
869   cc[0] = cc[1] = cc[2] = 0;
870
871   /* compute interpolation vector */
872   if (minCol != maxCol) {
873      MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
874   }
875
876   /* add in texels */
877   for (k = N_TEXELS - 1; k >= 0; k--) {
878      GLint t = k * 3;
879      GLuint *kk = (GLuint *)((char *)cc + t / 8);
880      GLint texel = n_vect + 1; /* transparent black */
881
882      if (!ISTBLACK(input[k])) {
883         if (minCol != maxCol) {
884            /* interpolate color */
885            CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
886            /* add in texel */
887            kk[0] |= texel << (t & 7);
888         }
889      } else {
890         /* add in texel */
891         kk[0] |= texel << (t & 7);
892      }
893   }
894}
895
896
897static void
898fxt1_quantize_MIXED1 (GLuint *cc,
899                      GLubyte input[N_TEXELS][MAX_COMP])
900{
901   const GLint n_vect = 2; /* highest vector number in each microtile */
902   const GLint n_comp = 3; /* 3 components: R, G, B */
903   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
904   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
905   GLint i, j, k;
906   Fx64 hi; /* high quadword */
907   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
908
909   GLint minSum;
910   GLint maxSum;
911   GLint minColL = 0, maxColL = -1;
912   GLint minColR = 0, maxColR = -1;
913
914   /* Our solution here is to find the darkest and brightest colors in
915    * the 4x4 tile and use those as the two representative colors.
916    * There are probably better algorithms to use (histogram-based).
917    */
918   minSum = 2000; /* big enough */
919   maxSum = -1; /* small enough */
920   for (k = 0; k < N_TEXELS / 2; k++) {
921      if (!ISTBLACK(input[k])) {
922         GLint sum = 0;
923         for (i = 0; i < n_comp; i++) {
924            sum += input[k][i];
925         }
926         if (minSum > sum) {
927            minSum = sum;
928            minColL = k;
929         }
930         if (maxSum < sum) {
931            maxSum = sum;
932            maxColL = k;
933         }
934      }
935   }
936   minSum = 2000; /* big enough */
937   maxSum = -1; /* small enough */
938   for (; k < N_TEXELS; k++) {
939      if (!ISTBLACK(input[k])) {
940         GLint sum = 0;
941         for (i = 0; i < n_comp; i++) {
942            sum += input[k][i];
943         }
944         if (minSum > sum) {
945            minSum = sum;
946            minColR = k;
947         }
948         if (maxSum < sum) {
949            maxSum = sum;
950            maxColR = k;
951         }
952      }
953   }
954
955   /* left microtile */
956   if (maxColL == -1) {
957      /* all transparent black */
958      cc[0] = ~0u;
959      for (i = 0; i < n_comp; i++) {
960         vec[0][i] = 0;
961         vec[1][i] = 0;
962      }
963   } else {
964      cc[0] = 0;
965      for (i = 0; i < n_comp; i++) {
966         vec[0][i] = input[minColL][i];
967         vec[1][i] = input[maxColL][i];
968      }
969      if (minColL != maxColL) {
970         /* compute interpolation vector */
971         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
972
973         /* add in texels */
974         lolo = 0;
975         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
976            GLint texel = n_vect + 1; /* transparent black */
977            if (!ISTBLACK(input[k])) {
978               /* interpolate color */
979               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
980            }
981            /* add in texel */
982            lolo <<= 2;
983            lolo |= texel;
984         }
985         cc[0] = lolo;
986      }
987   }
988
989   /* right microtile */
990   if (maxColR == -1) {
991      /* all transparent black */
992      cc[1] = ~0u;
993      for (i = 0; i < n_comp; i++) {
994         vec[2][i] = 0;
995         vec[3][i] = 0;
996      }
997   } else {
998      cc[1] = 0;
999      for (i = 0; i < n_comp; i++) {
1000         vec[2][i] = input[minColR][i];
1001         vec[3][i] = input[maxColR][i];
1002      }
1003      if (minColR != maxColR) {
1004         /* compute interpolation vector */
1005         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1006
1007         /* add in texels */
1008         lohi = 0;
1009         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1010            GLint texel = n_vect + 1; /* transparent black */
1011            if (!ISTBLACK(input[k])) {
1012               /* interpolate color */
1013               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1014            }
1015            /* add in texel */
1016            lohi <<= 2;
1017            lohi |= texel;
1018         }
1019         cc[1] = lohi;
1020      }
1021   }
1022
1023   FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1024   for (j = 2 * 2 - 1; j >= 0; j--) {
1025      for (i = 0; i < n_comp; i++) {
1026         /* add in colors */
1027         FX64_SHL(hi, 5);
1028         FX64_OR32(hi, vec[j][i] >> 3);
1029      }
1030   }
1031   ((Fx64 *)cc)[1] = hi;
1032}
1033
1034
1035static void
1036fxt1_quantize_MIXED0 (GLuint *cc,
1037                      GLubyte input[N_TEXELS][MAX_COMP])
1038{
1039   const GLint n_vect = 3; /* highest vector number in each microtile */
1040   const GLint n_comp = 3; /* 3 components: R, G, B */
1041   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1042   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1043   GLint i, j, k;
1044   Fx64 hi; /* high quadword */
1045   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1046
1047   GLint minColL = 0, maxColL = 0;
1048   GLint minColR = 0, maxColR = 0;
1049#if 0
1050   GLint minSum;
1051   GLint maxSum;
1052
1053   /* Our solution here is to find the darkest and brightest colors in
1054    * the 4x4 tile and use those as the two representative colors.
1055    * There are probably better algorithms to use (histogram-based).
1056    */
1057   minSum = 2000; /* big enough */
1058   maxSum = -1; /* small enough */
1059   for (k = 0; k < N_TEXELS / 2; k++) {
1060      GLint sum = 0;
1061      for (i = 0; i < n_comp; i++) {
1062         sum += input[k][i];
1063      }
1064      if (minSum > sum) {
1065         minSum = sum;
1066         minColL = k;
1067      }
1068      if (maxSum < sum) {
1069         maxSum = sum;
1070         maxColL = k;
1071      }
1072   }
1073   minSum = 2000; /* big enough */
1074   maxSum = -1; /* small enough */
1075   for (; k < N_TEXELS; k++) {
1076      GLint sum = 0;
1077      for (i = 0; i < n_comp; i++) {
1078         sum += input[k][i];
1079      }
1080      if (minSum > sum) {
1081         minSum = sum;
1082         minColR = k;
1083      }
1084      if (maxSum < sum) {
1085         maxSum = sum;
1086         maxColR = k;
1087      }
1088   }
1089#else
1090   GLint minVal;
1091   GLint maxVal;
1092   GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1093   GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1094
1095   /* Scan the channel with max variance for lo & hi
1096    * and use those as the two representative colors.
1097    */
1098   minVal = 2000; /* big enough */
1099   maxVal = -1; /* small enough */
1100   for (k = 0; k < N_TEXELS / 2; k++) {
1101      GLint t = input[k][maxVarL];
1102      if (minVal > t) {
1103         minVal = t;
1104         minColL = k;
1105      }
1106      if (maxVal < t) {
1107         maxVal = t;
1108         maxColL = k;
1109      }
1110   }
1111   minVal = 2000; /* big enough */
1112   maxVal = -1; /* small enough */
1113   for (; k < N_TEXELS; k++) {
1114      GLint t = input[k][maxVarR];
1115      if (minVal > t) {
1116         minVal = t;
1117         minColR = k;
1118      }
1119      if (maxVal < t) {
1120         maxVal = t;
1121         maxColR = k;
1122      }
1123   }
1124#endif
1125
1126   /* left microtile */
1127   cc[0] = 0;
1128   for (i = 0; i < n_comp; i++) {
1129      vec[0][i] = input[minColL][i];
1130      vec[1][i] = input[maxColL][i];
1131   }
1132   if (minColL != maxColL) {
1133      /* compute interpolation vector */
1134      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1135
1136      /* add in texels */
1137      lolo = 0;
1138      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1139         GLint texel;
1140         /* interpolate color */
1141         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1142         /* add in texel */
1143         lolo <<= 2;
1144         lolo |= texel;
1145      }
1146
1147      /* funky encoding for LSB of green */
1148      if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1149         for (i = 0; i < n_comp; i++) {
1150            vec[1][i] = input[minColL][i];
1151            vec[0][i] = input[maxColL][i];
1152         }
1153         lolo = ~lolo;
1154      }
1155
1156      cc[0] = lolo;
1157   }
1158
1159   /* right microtile */
1160   cc[1] = 0;
1161   for (i = 0; i < n_comp; i++) {
1162      vec[2][i] = input[minColR][i];
1163      vec[3][i] = input[maxColR][i];
1164   }
1165   if (minColR != maxColR) {
1166      /* compute interpolation vector */
1167      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1168
1169      /* add in texels */
1170      lohi = 0;
1171      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1172         GLint texel;
1173         /* interpolate color */
1174         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1175         /* add in texel */
1176         lohi <<= 2;
1177         lohi |= texel;
1178      }
1179
1180      /* funky encoding for LSB of green */
1181      if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1182         for (i = 0; i < n_comp; i++) {
1183            vec[3][i] = input[minColR][i];
1184            vec[2][i] = input[maxColR][i];
1185         }
1186         lohi = ~lohi;
1187      }
1188
1189      cc[1] = lohi;
1190   }
1191
1192   FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1193   for (j = 2 * 2 - 1; j >= 0; j--) {
1194      for (i = 0; i < n_comp; i++) {
1195         /* add in colors */
1196         FX64_SHL(hi, 5);
1197         FX64_OR32(hi, vec[j][i] >> 3);
1198      }
1199   }
1200   ((Fx64 *)cc)[1] = hi;
1201}
1202
1203
1204static void
1205fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1206{
1207   GLint trualpha;
1208   GLubyte reord[N_TEXELS][MAX_COMP];
1209
1210   GLubyte input[N_TEXELS][MAX_COMP];
1211   GLint i, k, l;
1212
1213   if (comps == 3) {
1214      /* make the whole block opaque */
1215      memset(input, -1, sizeof(input));
1216   }
1217
1218   /* 8 texels each line */
1219   for (l = 0; l < 4; l++) {
1220      for (k = 0; k < 4; k++) {
1221         for (i = 0; i < comps; i++) {
1222            input[k + l * 4][i] = *lines[l]++;
1223         }
1224      }
1225      for (; k < 8; k++) {
1226         for (i = 0; i < comps; i++) {
1227            input[k + l * 4 + 12][i] = *lines[l]++;
1228         }
1229      }
1230   }
1231
1232   /* block layout:
1233    * 00, 01, 02, 03, 08, 09, 0a, 0b
1234    * 10, 11, 12, 13, 18, 19, 1a, 1b
1235    * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1236    * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1237    */
1238
1239   /* [dBorca]
1240    * stupidity flows forth from this
1241    */
1242   l = N_TEXELS;
1243   trualpha = 0;
1244   if (comps == 4) {
1245      /* skip all transparent black texels */
1246      l = 0;
1247      for (k = 0; k < N_TEXELS; k++) {
1248         /* test all components against 0 */
1249         if (!ISTBLACK(input[k])) {
1250            /* texel is not transparent black */
1251            COPY_4UBV(reord[l], input[k]);
1252            if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1253               /* non-opaque texel */
1254               trualpha = !0;
1255            }
1256            l++;
1257         }
1258      }
1259   }
1260
1261#if 0
1262   if (trualpha) {
1263      fxt1_quantize_ALPHA0(cc, input, reord, l);
1264   } else if (l == 0) {
1265      cc[0] = cc[1] = cc[2] = -1;
1266      cc[3] = 0;
1267   } else if (l < N_TEXELS) {
1268      fxt1_quantize_HI(cc, input, reord, l);
1269   } else {
1270      fxt1_quantize_CHROMA(cc, input);
1271   }
1272   (void)fxt1_quantize_ALPHA1;
1273   (void)fxt1_quantize_MIXED1;
1274   (void)fxt1_quantize_MIXED0;
1275#else
1276   if (trualpha) {
1277      fxt1_quantize_ALPHA1(cc, input);
1278   } else if (l == 0) {
1279      cc[0] = cc[1] = cc[2] = ~0u;
1280      cc[3] = 0;
1281   } else if (l < N_TEXELS) {
1282      fxt1_quantize_MIXED1(cc, input);
1283   } else {
1284      fxt1_quantize_MIXED0(cc, input);
1285   }
1286   (void)fxt1_quantize_ALPHA0;
1287   (void)fxt1_quantize_HI;
1288   (void)fxt1_quantize_CHROMA;
1289#endif
1290}
1291
1292
1293static void
1294fxt1_encode (GLuint width, GLuint height, GLint comps,
1295             const void *source, GLint srcRowStride,
1296             void *dest, GLint destRowStride)
1297{
1298   GLuint x, y;
1299   const GLubyte *data;
1300   GLuint *encoded = (GLuint *)dest;
1301   void *newSource = NULL;
1302
1303   assert(comps == 3 || comps == 4);
1304
1305   /* Replicate image if width is not M8 or height is not M4 */
1306   if ((width & 7) | (height & 3)) {
1307      GLint newWidth = (width + 7) & ~7;
1308      GLint newHeight = (height + 3) & ~3;
1309      newSource = malloc(comps * newWidth * newHeight * sizeof(GLchan));
1310      if (!newSource) {
1311         GET_CURRENT_CONTEXT(ctx);
1312         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1313         goto cleanUp;
1314      }
1315      _mesa_upscale_teximage2d(width, height, newWidth, newHeight,
1316                               comps, (const GLchan *) source,
1317                               srcRowStride, (GLchan *) newSource);
1318      source = newSource;
1319      width = newWidth;
1320      height = newHeight;
1321      srcRowStride = comps * newWidth;
1322   }
1323
1324   /* convert from 16/32-bit channels to GLubyte if needed */
1325   if (CHAN_TYPE != GL_UNSIGNED_BYTE) {
1326      const GLuint n = width * height * comps;
1327      const GLchan *src = (const GLchan *) source;
1328      GLubyte *dest = (GLubyte *) malloc(n * sizeof(GLubyte));
1329      GLuint i;
1330      if (!dest) {
1331         GET_CURRENT_CONTEXT(ctx);
1332         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1333         goto cleanUp;
1334      }
1335      for (i = 0; i < n; i++) {
1336         dest[i] = CHAN_TO_UBYTE(src[i]);
1337      }
1338      if (newSource != NULL) {
1339         free(newSource);
1340      }
1341      newSource = dest;  /* we'll free this buffer before returning */
1342      source = dest;  /* the new, GLubyte incoming image */
1343   }
1344
1345   data = (const GLubyte *) source;
1346   destRowStride = (destRowStride - width * 2) / 4;
1347   for (y = 0; y < height; y += 4) {
1348      GLuint offs = 0 + (y + 0) * srcRowStride;
1349      for (x = 0; x < width; x += 8) {
1350         const GLubyte *lines[4];
1351         lines[0] = &data[offs];
1352         lines[1] = lines[0] + srcRowStride;
1353         lines[2] = lines[1] + srcRowStride;
1354         lines[3] = lines[2] + srcRowStride;
1355         offs += 8 * comps;
1356         fxt1_quantize(encoded, lines, comps);
1357         /* 128 bits per 8x4 block */
1358         encoded += 4;
1359      }
1360      encoded += destRowStride;
1361   }
1362
1363 cleanUp:
1364   if (newSource != NULL) {
1365      free(newSource);
1366   }
1367}
1368
1369
1370/***************************************************************************\
1371 * FXT1 decoder
1372 *
1373 * The decoder is based on GL_3DFX_texture_compression_FXT1
1374 * specification and serves as a concept for the encoder.
1375\***************************************************************************/
1376
1377
1378/* lookup table for scaling 5 bit colors up to 8 bits */
1379static const GLubyte _rgb_scale_5[] = {
1380   0,   8,   16,  25,  33,  41,  49,  58,
1381   66,  74,  82,  90,  99,  107, 115, 123,
1382   132, 140, 148, 156, 165, 173, 181, 189,
1383   197, 206, 214, 222, 230, 239, 247, 255
1384};
1385
1386/* lookup table for scaling 6 bit colors up to 8 bits */
1387static const GLubyte _rgb_scale_6[] = {
1388   0,   4,   8,   12,  16,  20,  24,  28,
1389   32,  36,  40,  45,  49,  53,  57,  61,
1390   65,  69,  73,  77,  81,  85,  89,  93,
1391   97,  101, 105, 109, 113, 117, 121, 125,
1392   130, 134, 138, 142, 146, 150, 154, 158,
1393   162, 166, 170, 174, 178, 182, 186, 190,
1394   194, 198, 202, 206, 210, 215, 219, 223,
1395   227, 231, 235, 239, 243, 247, 251, 255
1396};
1397
1398
1399#define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1400#define UP5(c) _rgb_scale_5[(c) & 31]
1401#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1402#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1403
1404
1405static void
1406fxt1_decode_1HI (const GLubyte *code, GLint t, GLchan *rgba)
1407{
1408   const GLuint *cc;
1409
1410   t *= 3;
1411   cc = (const GLuint *)(code + t / 8);
1412   t = (cc[0] >> (t & 7)) & 7;
1413
1414   if (t == 7) {
1415      rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1416   } else {
1417      GLubyte r, g, b;
1418      cc = (const GLuint *)(code + 12);
1419      if (t == 0) {
1420         b = UP5(CC_SEL(cc, 0));
1421         g = UP5(CC_SEL(cc, 5));
1422         r = UP5(CC_SEL(cc, 10));
1423      } else if (t == 6) {
1424         b = UP5(CC_SEL(cc, 15));
1425         g = UP5(CC_SEL(cc, 20));
1426         r = UP5(CC_SEL(cc, 25));
1427      } else {
1428         b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1429         g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1430         r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1431      }
1432      rgba[RCOMP] = UBYTE_TO_CHAN(r);
1433      rgba[GCOMP] = UBYTE_TO_CHAN(g);
1434      rgba[BCOMP] = UBYTE_TO_CHAN(b);
1435      rgba[ACOMP] = CHAN_MAX;
1436   }
1437}
1438
1439
1440static void
1441fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLchan *rgba)
1442{
1443   const GLuint *cc;
1444   GLuint kk;
1445
1446   cc = (const GLuint *)code;
1447   if (t & 16) {
1448      cc++;
1449      t &= 15;
1450   }
1451   t = (cc[0] >> (t * 2)) & 3;
1452
1453   t *= 15;
1454   cc = (const GLuint *)(code + 8 + t / 8);
1455   kk = cc[0] >> (t & 7);
1456   rgba[BCOMP] = UBYTE_TO_CHAN( UP5(kk) );
1457   rgba[GCOMP] = UBYTE_TO_CHAN( UP5(kk >> 5) );
1458   rgba[RCOMP] = UBYTE_TO_CHAN( UP5(kk >> 10) );
1459   rgba[ACOMP] = CHAN_MAX;
1460}
1461
1462
1463static void
1464fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLchan *rgba)
1465{
1466   const GLuint *cc;
1467   GLuint col[2][3];
1468   GLint glsb, selb;
1469
1470   cc = (const GLuint *)code;
1471   if (t & 16) {
1472      t &= 15;
1473      t = (cc[1] >> (t * 2)) & 3;
1474      /* col 2 */
1475      col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1476      col[0][GCOMP] = CC_SEL(cc, 99);
1477      col[0][RCOMP] = CC_SEL(cc, 104);
1478      /* col 3 */
1479      col[1][BCOMP] = CC_SEL(cc, 109);
1480      col[1][GCOMP] = CC_SEL(cc, 114);
1481      col[1][RCOMP] = CC_SEL(cc, 119);
1482      glsb = CC_SEL(cc, 126);
1483      selb = CC_SEL(cc, 33);
1484   } else {
1485      t = (cc[0] >> (t * 2)) & 3;
1486      /* col 0 */
1487      col[0][BCOMP] = CC_SEL(cc, 64);
1488      col[0][GCOMP] = CC_SEL(cc, 69);
1489      col[0][RCOMP] = CC_SEL(cc, 74);
1490      /* col 1 */
1491      col[1][BCOMP] = CC_SEL(cc, 79);
1492      col[1][GCOMP] = CC_SEL(cc, 84);
1493      col[1][RCOMP] = CC_SEL(cc, 89);
1494      glsb = CC_SEL(cc, 125);
1495      selb = CC_SEL(cc, 1);
1496   }
1497
1498   if (CC_SEL(cc, 124) & 1) {
1499      /* alpha[0] == 1 */
1500
1501      if (t == 3) {
1502         /* zero */
1503         rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1504      } else {
1505         GLubyte r, g, b;
1506         if (t == 0) {
1507            b = UP5(col[0][BCOMP]);
1508            g = UP5(col[0][GCOMP]);
1509            r = UP5(col[0][RCOMP]);
1510         } else if (t == 2) {
1511            b = UP5(col[1][BCOMP]);
1512            g = UP6(col[1][GCOMP], glsb);
1513            r = UP5(col[1][RCOMP]);
1514         } else {
1515            b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1516            g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1517            r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1518         }
1519         rgba[RCOMP] = UBYTE_TO_CHAN(r);
1520         rgba[GCOMP] = UBYTE_TO_CHAN(g);
1521         rgba[BCOMP] = UBYTE_TO_CHAN(b);
1522         rgba[ACOMP] = CHAN_MAX;
1523      }
1524   } else {
1525      /* alpha[0] == 0 */
1526      GLubyte r, g, b;
1527      if (t == 0) {
1528         b = UP5(col[0][BCOMP]);
1529         g = UP6(col[0][GCOMP], glsb ^ selb);
1530         r = UP5(col[0][RCOMP]);
1531      } else if (t == 3) {
1532         b = UP5(col[1][BCOMP]);
1533         g = UP6(col[1][GCOMP], glsb);
1534         r = UP5(col[1][RCOMP]);
1535      } else {
1536         b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1537         g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1538                        UP6(col[1][GCOMP], glsb));
1539         r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1540      }
1541      rgba[RCOMP] = UBYTE_TO_CHAN(r);
1542      rgba[GCOMP] = UBYTE_TO_CHAN(g);
1543      rgba[BCOMP] = UBYTE_TO_CHAN(b);
1544      rgba[ACOMP] = CHAN_MAX;
1545   }
1546}
1547
1548
1549static void
1550fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLchan *rgba)
1551{
1552   const GLuint *cc;
1553   GLubyte r, g, b, a;
1554
1555   cc = (const GLuint *)code;
1556   if (CC_SEL(cc, 124) & 1) {
1557      /* lerp == 1 */
1558      GLuint col0[4];
1559
1560      if (t & 16) {
1561         t &= 15;
1562         t = (cc[1] >> (t * 2)) & 3;
1563         /* col 2 */
1564         col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1565         col0[GCOMP] = CC_SEL(cc, 99);
1566         col0[RCOMP] = CC_SEL(cc, 104);
1567         col0[ACOMP] = CC_SEL(cc, 119);
1568      } else {
1569         t = (cc[0] >> (t * 2)) & 3;
1570         /* col 0 */
1571         col0[BCOMP] = CC_SEL(cc, 64);
1572         col0[GCOMP] = CC_SEL(cc, 69);
1573         col0[RCOMP] = CC_SEL(cc, 74);
1574         col0[ACOMP] = CC_SEL(cc, 109);
1575      }
1576
1577      if (t == 0) {
1578         b = UP5(col0[BCOMP]);
1579         g = UP5(col0[GCOMP]);
1580         r = UP5(col0[RCOMP]);
1581         a = UP5(col0[ACOMP]);
1582      } else if (t == 3) {
1583         b = UP5(CC_SEL(cc, 79));
1584         g = UP5(CC_SEL(cc, 84));
1585         r = UP5(CC_SEL(cc, 89));
1586         a = UP5(CC_SEL(cc, 114));
1587      } else {
1588         b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1589         g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1590         r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1591         a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1592      }
1593   } else {
1594      /* lerp == 0 */
1595
1596      if (t & 16) {
1597         cc++;
1598         t &= 15;
1599      }
1600      t = (cc[0] >> (t * 2)) & 3;
1601
1602      if (t == 3) {
1603         /* zero */
1604         r = g = b = a = 0;
1605      } else {
1606         GLuint kk;
1607         cc = (const GLuint *)code;
1608         a = UP5(cc[3] >> (t * 5 + 13));
1609         t *= 15;
1610         cc = (const GLuint *)(code + 8 + t / 8);
1611         kk = cc[0] >> (t & 7);
1612         b = UP5(kk);
1613         g = UP5(kk >> 5);
1614         r = UP5(kk >> 10);
1615      }
1616   }
1617   rgba[RCOMP] = UBYTE_TO_CHAN(r);
1618   rgba[GCOMP] = UBYTE_TO_CHAN(g);
1619   rgba[BCOMP] = UBYTE_TO_CHAN(b);
1620   rgba[ACOMP] = UBYTE_TO_CHAN(a);
1621}
1622
1623
1624void
1625fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1626               GLint i, GLint j, GLchan *rgba)
1627{
1628   static void (*decode_1[]) (const GLubyte *, GLint, GLchan *) = {
1629      fxt1_decode_1HI,     /* cc-high   = "00?" */
1630      fxt1_decode_1HI,     /* cc-high   = "00?" */
1631      fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1632      fxt1_decode_1ALPHA,  /* alpha     = "011" */
1633      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1634      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1635      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1636      fxt1_decode_1MIXED   /* mixed     = "1??" */
1637   };
1638
1639   const GLubyte *code = (const GLubyte *)texture +
1640                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1641   GLint mode = CC_SEL(code, 125);
1642   GLint t = i & 7;
1643
1644   if (t & 4) {
1645      t += 12;
1646   }
1647   t += (j & 3) * 4;
1648
1649   decode_1[mode](code, t, rgba);
1650}
1651
1652
1653#endif /* FEATURE_texture_fxt1 */
1654