1/*
2 * Mesa 3-D graphics library
3 * Version:  7.1
4 *
5 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26/**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32#include "glheader.h"
33#include "imports.h"
34#include "colormac.h"
35#include "image.h"
36#include "macros.h"
37#include "mfeatures.h"
38#include "mipmap.h"
39#include "texcompress.h"
40#include "texcompress_fxt1.h"
41#include "texstore.h"
42#include "swrast/s_context.h"
43
44
45#if FEATURE_texture_fxt1
46
47
48static void
49fxt1_encode (GLuint width, GLuint height, GLint comps,
50             const void *source, GLint srcRowStride,
51             void *dest, GLint destRowStride);
52
53void
54fxt1_decode_1 (const void *texture, GLint stride,
55               GLint i, GLint j, GLubyte *rgba);
56
57
58/**
59 * Store user's image in rgb_fxt1 format.
60 */
61GLboolean
62_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
63{
64   const GLubyte *pixels;
65   GLint srcRowStride;
66   GLubyte *dst;
67   const GLubyte *tempImage = NULL;
68
69   ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
70
71   if (srcFormat != GL_RGB ||
72       srcType != GL_UNSIGNED_BYTE ||
73       ctx->_ImageTransferState ||
74       srcPacking->RowLength != srcWidth ||
75       srcPacking->SwapBytes) {
76      /* convert image to RGB/GLubyte */
77      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
78                                             baseInternalFormat,
79                                             _mesa_get_format_base_format(dstFormat),
80                                             srcWidth, srcHeight, srcDepth,
81                                             srcFormat, srcType, srcAddr,
82                                             srcPacking);
83      if (!tempImage)
84         return GL_FALSE; /* out of memory */
85      pixels = tempImage;
86      srcRowStride = 3 * srcWidth;
87      srcFormat = GL_RGB;
88   }
89   else {
90      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
91                                     srcFormat, srcType, 0, 0);
92
93      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
94                                            srcType) / sizeof(GLubyte);
95   }
96
97   dst = dstSlices[0];
98
99   fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
100               dst, dstRowStride);
101
102   if (tempImage)
103      free((void*) tempImage);
104
105   return GL_TRUE;
106}
107
108
109/**
110 * Store user's image in rgba_fxt1 format.
111 */
112GLboolean
113_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
114{
115   const GLubyte *pixels;
116   GLint srcRowStride;
117   GLubyte *dst;
118   const GLubyte *tempImage = NULL;
119
120   ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
121
122   if (srcFormat != GL_RGBA ||
123       srcType != GL_UNSIGNED_BYTE ||
124       ctx->_ImageTransferState ||
125       srcPacking->SwapBytes) {
126      /* convert image to RGBA/GLubyte */
127      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
128                                             baseInternalFormat,
129                                             _mesa_get_format_base_format(dstFormat),
130                                             srcWidth, srcHeight, srcDepth,
131                                             srcFormat, srcType, srcAddr,
132                                             srcPacking);
133      if (!tempImage)
134         return GL_FALSE; /* out of memory */
135      pixels = tempImage;
136      srcRowStride = 4 * srcWidth;
137      srcFormat = GL_RGBA;
138   }
139   else {
140      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
141                                     srcFormat, srcType, 0, 0);
142
143      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
144                                            srcType) / sizeof(GLubyte);
145   }
146
147   dst = dstSlices[0];
148
149   fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
150               dst, dstRowStride);
151
152   if (tempImage)
153      free((void*) tempImage);
154
155   return GL_TRUE;
156}
157
158
159void
160_mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
161                                  GLint i, GLint j, GLint k, GLfloat *texel )
162{
163   /* just sample as GLubyte and convert to float here */
164   GLubyte rgba[4];
165   (void) k;
166   fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
167   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
168   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
169   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
170   texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
171}
172
173
174void
175_mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
176                                 GLint i, GLint j, GLint k, GLfloat *texel )
177{
178   /* just sample as GLubyte and convert to float here */
179   GLubyte rgba[4];
180   (void) k;
181   fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
182   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
183   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
184   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
185   texel[ACOMP] = 1.0F;
186}
187
188
189
190/***************************************************************************\
191 * FXT1 encoder
192 *
193 * The encoder was built by reversing the decoder,
194 * and is vaguely based on Texus2 by 3dfx. Note that this code
195 * is merely a proof of concept, since it is highly UNoptimized;
196 * moreover, it is sub-optimal due to initial conditions passed
197 * to Lloyd's algorithm (the interpolation modes are even worse).
198\***************************************************************************/
199
200
201#define MAX_COMP 4 /* ever needed maximum number of components in texel */
202#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
203#define N_TEXELS 32 /* number of texels in a block (always 32) */
204#define LL_N_REP 50 /* number of iterations in lloyd's vq */
205#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
206#define LL_RMS_E 255 /* fault tolerance (maximum error) */
207#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
208#define ISTBLACK(v) (*((GLuint *)(v)) == 0)
209
210
211/*
212 * Define a 64-bit unsigned integer type and macros
213 */
214#if 1
215
216#define FX64_NATIVE 1
217
218typedef uint64_t Fx64;
219
220#define FX64_MOV32(a, b) a = b
221#define FX64_OR32(a, b)  a |= b
222#define FX64_SHL(a, c)   a <<= c
223
224#else
225
226#define FX64_NATIVE 0
227
228typedef struct {
229   GLuint lo, hi;
230} Fx64;
231
232#define FX64_MOV32(a, b) a.lo = b
233#define FX64_OR32(a, b)  a.lo |= b
234
235#define FX64_SHL(a, c)                                 \
236   do {                                                \
237       if ((c) >= 32) {                                \
238          a.hi = a.lo << ((c) - 32);                   \
239          a.lo = 0;                                    \
240       } else {                                        \
241          a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
242          a.lo <<= (c);                                \
243       }                                               \
244   } while (0)
245
246#endif
247
248
249#define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
250#define SAFECDOT 1 /* for paranoids */
251
252#define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
253   do {                                  \
254      /* compute interpolation vector */ \
255      GLfloat d2 = 0.0F;                 \
256      GLfloat rd2;                       \
257                                         \
258      for (i = 0; i < NC; i++) {         \
259         IV[i] = (V1[i] - V0[i]) * F(i); \
260         d2 += IV[i] * IV[i];            \
261      }                                  \
262      rd2 = (GLfloat)NV / d2;            \
263      B = 0;                             \
264      for (i = 0; i < NC; i++) {         \
265         IV[i] *= F(i);                  \
266         B -= IV[i] * V0[i];             \
267         IV[i] *= rd2;                   \
268      }                                  \
269      B = B * rd2 + 0.5f;                \
270   } while (0)
271
272#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
273   do {                                  \
274      GLfloat dot = 0.0F;                \
275      for (i = 0; i < NC; i++) {         \
276         dot += V[i] * IV[i];            \
277      }                                  \
278      TEXEL = (GLint)(dot + B);          \
279      if (SAFECDOT) {                    \
280         if (TEXEL < 0) {                \
281            TEXEL = 0;                   \
282         } else if (TEXEL > NV) {        \
283            TEXEL = NV;                  \
284         }                               \
285      }                                  \
286   } while (0)
287
288
289static GLint
290fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
291              GLubyte input[MAX_COMP], GLint nc)
292{
293   GLint i, j, best = -1;
294   GLfloat err = 1e9; /* big enough */
295
296   for (j = 0; j < nv; j++) {
297      GLfloat e = 0.0F;
298      for (i = 0; i < nc; i++) {
299         e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
300      }
301      if (e < err) {
302         err = e;
303         best = j;
304      }
305   }
306
307   return best;
308}
309
310
311static GLint
312fxt1_worst (GLfloat vec[MAX_COMP],
313            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
314{
315   GLint i, k, worst = -1;
316   GLfloat err = -1.0F; /* small enough */
317
318   for (k = 0; k < n; k++) {
319      GLfloat e = 0.0F;
320      for (i = 0; i < nc; i++) {
321         e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
322      }
323      if (e > err) {
324         err = e;
325         worst = k;
326      }
327   }
328
329   return worst;
330}
331
332
333static GLint
334fxt1_variance (GLdouble variance[MAX_COMP],
335               GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
336{
337   GLint i, k, best = 0;
338   GLint sx, sx2;
339   GLdouble var, maxvar = -1; /* small enough */
340   GLdouble teenth = 1.0 / n;
341
342   for (i = 0; i < nc; i++) {
343      sx = sx2 = 0;
344      for (k = 0; k < n; k++) {
345         GLint t = input[k][i];
346         sx += t;
347         sx2 += t * t;
348      }
349      var = sx2 * teenth - sx * sx * teenth * teenth;
350      if (maxvar < var) {
351         maxvar = var;
352         best = i;
353      }
354      if (variance) {
355         variance[i] = var;
356      }
357   }
358
359   return best;
360}
361
362
363static GLint
364fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
365             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
366{
367#if 0
368   /* Choose colors from a grid.
369    */
370   GLint i, j;
371
372   for (j = 0; j < nv; j++) {
373      GLint m = j * (n - 1) / (nv - 1);
374      for (i = 0; i < nc; i++) {
375         vec[j][i] = input[m][i];
376      }
377   }
378#else
379   /* Our solution here is to find the darkest and brightest colors in
380    * the 8x4 tile and use those as the two representative colors.
381    * There are probably better algorithms to use (histogram-based).
382    */
383   GLint i, j, k;
384   GLint minSum = 2000; /* big enough */
385   GLint maxSum = -1; /* small enough */
386   GLint minCol = 0; /* phoudoin: silent compiler! */
387   GLint maxCol = 0; /* phoudoin: silent compiler! */
388
389   struct {
390      GLint flag;
391      GLint key;
392      GLint freq;
393      GLint idx;
394   } hist[N_TEXELS];
395   GLint lenh = 0;
396
397   memset(hist, 0, sizeof(hist));
398
399   for (k = 0; k < n; k++) {
400      GLint l;
401      GLint key = 0;
402      GLint sum = 0;
403      for (i = 0; i < nc; i++) {
404         key <<= 8;
405         key |= input[k][i];
406         sum += input[k][i];
407      }
408      for (l = 0; l < n; l++) {
409         if (!hist[l].flag) {
410            /* alloc new slot */
411            hist[l].flag = !0;
412            hist[l].key = key;
413            hist[l].freq = 1;
414            hist[l].idx = k;
415            lenh = l + 1;
416            break;
417         } else if (hist[l].key == key) {
418            hist[l].freq++;
419            break;
420         }
421      }
422      if (minSum > sum) {
423         minSum = sum;
424         minCol = k;
425      }
426      if (maxSum < sum) {
427         maxSum = sum;
428         maxCol = k;
429      }
430   }
431
432   if (lenh <= nv) {
433      for (j = 0; j < lenh; j++) {
434         for (i = 0; i < nc; i++) {
435            vec[j][i] = (GLfloat)input[hist[j].idx][i];
436         }
437      }
438      for (; j < nv; j++) {
439         for (i = 0; i < nc; i++) {
440            vec[j][i] = vec[0][i];
441         }
442      }
443      return 0;
444   }
445
446   for (j = 0; j < nv; j++) {
447      for (i = 0; i < nc; i++) {
448         vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
449      }
450   }
451#endif
452
453   return !0;
454}
455
456
457static GLint
458fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
459            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
460{
461   /* Use the generalized lloyd's algorithm for VQ:
462    *     find 4 color vectors.
463    *
464    *     for each sample color
465    *         sort to nearest vector.
466    *
467    *     replace each vector with the centroid of its matching colors.
468    *
469    *     repeat until RMS doesn't improve.
470    *
471    *     if a color vector has no samples, or becomes the same as another
472    *     vector, replace it with the color which is farthest from a sample.
473    *
474    * vec[][MAX_COMP]           initial vectors and resulting colors
475    * nv                        number of resulting colors required
476    * input[N_TEXELS][MAX_COMP] input texels
477    * nc                        number of components in input / vec
478    * n                         number of input samples
479    */
480
481   GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
482   GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
483   GLfloat error, lasterror = 1e9;
484
485   GLint i, j, k, rep;
486
487   /* the quantizer */
488   for (rep = 0; rep < LL_N_REP; rep++) {
489      /* reset sums & counters */
490      for (j = 0; j < nv; j++) {
491         for (i = 0; i < nc; i++) {
492            sum[j][i] = 0;
493         }
494         cnt[j] = 0;
495      }
496      error = 0;
497
498      /* scan whole block */
499      for (k = 0; k < n; k++) {
500#if 1
501         GLint best = -1;
502         GLfloat err = 1e9; /* big enough */
503         /* determine best vector */
504         for (j = 0; j < nv; j++) {
505            GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
506                      (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
507                      (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
508            if (nc == 4) {
509               e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
510            }
511            if (e < err) {
512               err = e;
513               best = j;
514            }
515         }
516#else
517         GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
518#endif
519         assert(best >= 0);
520         /* add in closest color */
521         for (i = 0; i < nc; i++) {
522            sum[best][i] += input[k][i];
523         }
524         /* mark this vector as used */
525         cnt[best]++;
526         /* accumulate error */
527         error += err;
528      }
529
530      /* check RMS */
531      if ((error < LL_RMS_E) ||
532          ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
533         return !0; /* good match */
534      }
535      lasterror = error;
536
537      /* move each vector to the barycenter of its closest colors */
538      for (j = 0; j < nv; j++) {
539         if (cnt[j]) {
540            GLfloat div = 1.0F / cnt[j];
541            for (i = 0; i < nc; i++) {
542               vec[j][i] = div * sum[j][i];
543            }
544         } else {
545            /* this vec has no samples or is identical with a previous vec */
546            GLint worst = fxt1_worst(vec[j], input, nc, n);
547            for (i = 0; i < nc; i++) {
548               vec[j][i] = input[worst][i];
549            }
550         }
551      }
552   }
553
554   return 0; /* could not converge fast enough */
555}
556
557
558static void
559fxt1_quantize_CHROMA (GLuint *cc,
560                      GLubyte input[N_TEXELS][MAX_COMP])
561{
562   const GLint n_vect = 4; /* 4 base vectors to find */
563   const GLint n_comp = 3; /* 3 components: R, G, B */
564   GLfloat vec[MAX_VECT][MAX_COMP];
565   GLint i, j, k;
566   Fx64 hi; /* high quadword */
567   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
568
569   if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
570      fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
571   }
572
573   FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
574   for (j = n_vect - 1; j >= 0; j--) {
575      for (i = 0; i < n_comp; i++) {
576         /* add in colors */
577         FX64_SHL(hi, 5);
578         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
579      }
580   }
581   ((Fx64 *)cc)[1] = hi;
582
583   lohi = lolo = 0;
584   /* right microtile */
585   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
586      lohi <<= 2;
587      lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
588   }
589   /* left microtile */
590   for (; k >= 0; k--) {
591      lolo <<= 2;
592      lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
593   }
594   cc[1] = lohi;
595   cc[0] = lolo;
596}
597
598
599static void
600fxt1_quantize_ALPHA0 (GLuint *cc,
601                      GLubyte input[N_TEXELS][MAX_COMP],
602                      GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
603{
604   const GLint n_vect = 3; /* 3 base vectors to find */
605   const GLint n_comp = 4; /* 4 components: R, G, B, A */
606   GLfloat vec[MAX_VECT][MAX_COMP];
607   GLint i, j, k;
608   Fx64 hi; /* high quadword */
609   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
610
611   /* the last vector indicates zero */
612   for (i = 0; i < n_comp; i++) {
613      vec[n_vect][i] = 0;
614   }
615
616   /* the first n texels in reord are guaranteed to be non-zero */
617   if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
618      fxt1_lloyd(vec, n_vect, reord, n_comp, n);
619   }
620
621   FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
622   for (j = n_vect - 1; j >= 0; j--) {
623      /* add in alphas */
624      FX64_SHL(hi, 5);
625      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
626   }
627   for (j = n_vect - 1; j >= 0; j--) {
628      for (i = 0; i < n_comp - 1; i++) {
629         /* add in colors */
630         FX64_SHL(hi, 5);
631         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
632      }
633   }
634   ((Fx64 *)cc)[1] = hi;
635
636   lohi = lolo = 0;
637   /* right microtile */
638   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
639      lohi <<= 2;
640      lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
641   }
642   /* left microtile */
643   for (; k >= 0; k--) {
644      lolo <<= 2;
645      lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
646   }
647   cc[1] = lohi;
648   cc[0] = lolo;
649}
650
651
652static void
653fxt1_quantize_ALPHA1 (GLuint *cc,
654                      GLubyte input[N_TEXELS][MAX_COMP])
655{
656   const GLint n_vect = 3; /* highest vector number in each microtile */
657   const GLint n_comp = 4; /* 4 components: R, G, B, A */
658   GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
659   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
660   GLint i, j, k;
661   Fx64 hi; /* high quadword */
662   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
663
664   GLint minSum;
665   GLint maxSum;
666   GLint minColL = 0, maxColL = 0;
667   GLint minColR = 0, maxColR = 0;
668   GLint sumL = 0, sumR = 0;
669   GLint nn_comp;
670   /* Our solution here is to find the darkest and brightest colors in
671    * the 4x4 tile and use those as the two representative colors.
672    * There are probably better algorithms to use (histogram-based).
673    */
674   nn_comp = n_comp;
675   while ((minColL == maxColL) && nn_comp) {
676       minSum = 2000; /* big enough */
677       maxSum = -1; /* small enough */
678       for (k = 0; k < N_TEXELS / 2; k++) {
679           GLint sum = 0;
680           for (i = 0; i < nn_comp; i++) {
681               sum += input[k][i];
682           }
683           if (minSum > sum) {
684               minSum = sum;
685               minColL = k;
686           }
687           if (maxSum < sum) {
688               maxSum = sum;
689               maxColL = k;
690           }
691           sumL += sum;
692       }
693
694       nn_comp--;
695   }
696
697   nn_comp = n_comp;
698   while ((minColR == maxColR) && nn_comp) {
699       minSum = 2000; /* big enough */
700       maxSum = -1; /* small enough */
701       for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
702           GLint sum = 0;
703           for (i = 0; i < nn_comp; i++) {
704               sum += input[k][i];
705           }
706           if (minSum > sum) {
707               minSum = sum;
708               minColR = k;
709           }
710           if (maxSum < sum) {
711               maxSum = sum;
712               maxColR = k;
713           }
714           sumR += sum;
715       }
716
717       nn_comp--;
718   }
719
720   /* choose the common vector (yuck!) */
721   {
722      GLint j1, j2;
723      GLint v1 = 0, v2 = 0;
724      GLfloat err = 1e9; /* big enough */
725      GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
726      for (i = 0; i < n_comp; i++) {
727         tv[0][i] = input[minColL][i];
728         tv[1][i] = input[maxColL][i];
729         tv[2][i] = input[minColR][i];
730         tv[3][i] = input[maxColR][i];
731      }
732      for (j1 = 0; j1 < 2; j1++) {
733         for (j2 = 2; j2 < 4; j2++) {
734            GLfloat e = 0.0F;
735            for (i = 0; i < n_comp; i++) {
736               e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
737            }
738            if (e < err) {
739               err = e;
740               v1 = j1;
741               v2 = j2;
742            }
743         }
744      }
745      for (i = 0; i < n_comp; i++) {
746         vec[0][i] = tv[1 - v1][i];
747         vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
748         vec[2][i] = tv[5 - v2][i];
749      }
750   }
751
752   /* left microtile */
753   cc[0] = 0;
754   if (minColL != maxColL) {
755      /* compute interpolation vector */
756      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
757
758      /* add in texels */
759      lolo = 0;
760      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
761         GLint texel;
762         /* interpolate color */
763         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
764         /* add in texel */
765         lolo <<= 2;
766         lolo |= texel;
767      }
768
769      cc[0] = lolo;
770   }
771
772   /* right microtile */
773   cc[1] = 0;
774   if (minColR != maxColR) {
775      /* compute interpolation vector */
776      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
777
778      /* add in texels */
779      lohi = 0;
780      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
781         GLint texel;
782         /* interpolate color */
783         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
784         /* add in texel */
785         lohi <<= 2;
786         lohi |= texel;
787      }
788
789      cc[1] = lohi;
790   }
791
792   FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
793   for (j = n_vect - 1; j >= 0; j--) {
794      /* add in alphas */
795      FX64_SHL(hi, 5);
796      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
797   }
798   for (j = n_vect - 1; j >= 0; j--) {
799      for (i = 0; i < n_comp - 1; i++) {
800         /* add in colors */
801         FX64_SHL(hi, 5);
802         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
803      }
804   }
805   ((Fx64 *)cc)[1] = hi;
806}
807
808
809static void
810fxt1_quantize_HI (GLuint *cc,
811                  GLubyte input[N_TEXELS][MAX_COMP],
812                  GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
813{
814   const GLint n_vect = 6; /* highest vector number */
815   const GLint n_comp = 3; /* 3 components: R, G, B */
816   GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
817   GLfloat iv[MAX_COMP];   /* interpolation vector */
818   GLint i, k;
819   GLuint hihi; /* high quadword: hi dword */
820
821   GLint minSum = 2000; /* big enough */
822   GLint maxSum = -1; /* small enough */
823   GLint minCol = 0; /* phoudoin: silent compiler! */
824   GLint maxCol = 0; /* phoudoin: silent compiler! */
825
826   /* Our solution here is to find the darkest and brightest colors in
827    * the 8x4 tile and use those as the two representative colors.
828    * There are probably better algorithms to use (histogram-based).
829    */
830   for (k = 0; k < n; k++) {
831      GLint sum = 0;
832      for (i = 0; i < n_comp; i++) {
833         sum += reord[k][i];
834      }
835      if (minSum > sum) {
836         minSum = sum;
837         minCol = k;
838      }
839      if (maxSum < sum) {
840         maxSum = sum;
841         maxCol = k;
842      }
843   }
844
845   hihi = 0; /* cc-hi = "00" */
846   for (i = 0; i < n_comp; i++) {
847      /* add in colors */
848      hihi <<= 5;
849      hihi |= reord[maxCol][i] >> 3;
850   }
851   for (i = 0; i < n_comp; i++) {
852      /* add in colors */
853      hihi <<= 5;
854      hihi |= reord[minCol][i] >> 3;
855   }
856   cc[3] = hihi;
857   cc[0] = cc[1] = cc[2] = 0;
858
859   /* compute interpolation vector */
860   if (minCol != maxCol) {
861      MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
862   }
863
864   /* add in texels */
865   for (k = N_TEXELS - 1; k >= 0; k--) {
866      GLint t = k * 3;
867      GLuint *kk = (GLuint *)((char *)cc + t / 8);
868      GLint texel = n_vect + 1; /* transparent black */
869
870      if (!ISTBLACK(input[k])) {
871         if (minCol != maxCol) {
872            /* interpolate color */
873            CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
874            /* add in texel */
875            kk[0] |= texel << (t & 7);
876         }
877      } else {
878         /* add in texel */
879         kk[0] |= texel << (t & 7);
880      }
881   }
882}
883
884
885static void
886fxt1_quantize_MIXED1 (GLuint *cc,
887                      GLubyte input[N_TEXELS][MAX_COMP])
888{
889   const GLint n_vect = 2; /* highest vector number in each microtile */
890   const GLint n_comp = 3; /* 3 components: R, G, B */
891   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
892   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
893   GLint i, j, k;
894   Fx64 hi; /* high quadword */
895   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
896
897   GLint minSum;
898   GLint maxSum;
899   GLint minColL = 0, maxColL = -1;
900   GLint minColR = 0, maxColR = -1;
901
902   /* Our solution here is to find the darkest and brightest colors in
903    * the 4x4 tile and use those as the two representative colors.
904    * There are probably better algorithms to use (histogram-based).
905    */
906   minSum = 2000; /* big enough */
907   maxSum = -1; /* small enough */
908   for (k = 0; k < N_TEXELS / 2; k++) {
909      if (!ISTBLACK(input[k])) {
910         GLint sum = 0;
911         for (i = 0; i < n_comp; i++) {
912            sum += input[k][i];
913         }
914         if (minSum > sum) {
915            minSum = sum;
916            minColL = k;
917         }
918         if (maxSum < sum) {
919            maxSum = sum;
920            maxColL = k;
921         }
922      }
923   }
924   minSum = 2000; /* big enough */
925   maxSum = -1; /* small enough */
926   for (; k < N_TEXELS; k++) {
927      if (!ISTBLACK(input[k])) {
928         GLint sum = 0;
929         for (i = 0; i < n_comp; i++) {
930            sum += input[k][i];
931         }
932         if (minSum > sum) {
933            minSum = sum;
934            minColR = k;
935         }
936         if (maxSum < sum) {
937            maxSum = sum;
938            maxColR = k;
939         }
940      }
941   }
942
943   /* left microtile */
944   if (maxColL == -1) {
945      /* all transparent black */
946      cc[0] = ~0u;
947      for (i = 0; i < n_comp; i++) {
948         vec[0][i] = 0;
949         vec[1][i] = 0;
950      }
951   } else {
952      cc[0] = 0;
953      for (i = 0; i < n_comp; i++) {
954         vec[0][i] = input[minColL][i];
955         vec[1][i] = input[maxColL][i];
956      }
957      if (minColL != maxColL) {
958         /* compute interpolation vector */
959         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
960
961         /* add in texels */
962         lolo = 0;
963         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
964            GLint texel = n_vect + 1; /* transparent black */
965            if (!ISTBLACK(input[k])) {
966               /* interpolate color */
967               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
968            }
969            /* add in texel */
970            lolo <<= 2;
971            lolo |= texel;
972         }
973         cc[0] = lolo;
974      }
975   }
976
977   /* right microtile */
978   if (maxColR == -1) {
979      /* all transparent black */
980      cc[1] = ~0u;
981      for (i = 0; i < n_comp; i++) {
982         vec[2][i] = 0;
983         vec[3][i] = 0;
984      }
985   } else {
986      cc[1] = 0;
987      for (i = 0; i < n_comp; i++) {
988         vec[2][i] = input[minColR][i];
989         vec[3][i] = input[maxColR][i];
990      }
991      if (minColR != maxColR) {
992         /* compute interpolation vector */
993         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
994
995         /* add in texels */
996         lohi = 0;
997         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
998            GLint texel = n_vect + 1; /* transparent black */
999            if (!ISTBLACK(input[k])) {
1000               /* interpolate color */
1001               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1002            }
1003            /* add in texel */
1004            lohi <<= 2;
1005            lohi |= texel;
1006         }
1007         cc[1] = lohi;
1008      }
1009   }
1010
1011   FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1012   for (j = 2 * 2 - 1; j >= 0; j--) {
1013      for (i = 0; i < n_comp; i++) {
1014         /* add in colors */
1015         FX64_SHL(hi, 5);
1016         FX64_OR32(hi, vec[j][i] >> 3);
1017      }
1018   }
1019   ((Fx64 *)cc)[1] = hi;
1020}
1021
1022
1023static void
1024fxt1_quantize_MIXED0 (GLuint *cc,
1025                      GLubyte input[N_TEXELS][MAX_COMP])
1026{
1027   const GLint n_vect = 3; /* highest vector number in each microtile */
1028   const GLint n_comp = 3; /* 3 components: R, G, B */
1029   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1030   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1031   GLint i, j, k;
1032   Fx64 hi; /* high quadword */
1033   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1034
1035   GLint minColL = 0, maxColL = 0;
1036   GLint minColR = 0, maxColR = 0;
1037#if 0
1038   GLint minSum;
1039   GLint maxSum;
1040
1041   /* Our solution here is to find the darkest and brightest colors in
1042    * the 4x4 tile and use those as the two representative colors.
1043    * There are probably better algorithms to use (histogram-based).
1044    */
1045   minSum = 2000; /* big enough */
1046   maxSum = -1; /* small enough */
1047   for (k = 0; k < N_TEXELS / 2; k++) {
1048      GLint sum = 0;
1049      for (i = 0; i < n_comp; i++) {
1050         sum += input[k][i];
1051      }
1052      if (minSum > sum) {
1053         minSum = sum;
1054         minColL = k;
1055      }
1056      if (maxSum < sum) {
1057         maxSum = sum;
1058         maxColL = k;
1059      }
1060   }
1061   minSum = 2000; /* big enough */
1062   maxSum = -1; /* small enough */
1063   for (; k < N_TEXELS; k++) {
1064      GLint sum = 0;
1065      for (i = 0; i < n_comp; i++) {
1066         sum += input[k][i];
1067      }
1068      if (minSum > sum) {
1069         minSum = sum;
1070         minColR = k;
1071      }
1072      if (maxSum < sum) {
1073         maxSum = sum;
1074         maxColR = k;
1075      }
1076   }
1077#else
1078   GLint minVal;
1079   GLint maxVal;
1080   GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1081   GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1082
1083   /* Scan the channel with max variance for lo & hi
1084    * and use those as the two representative colors.
1085    */
1086   minVal = 2000; /* big enough */
1087   maxVal = -1; /* small enough */
1088   for (k = 0; k < N_TEXELS / 2; k++) {
1089      GLint t = input[k][maxVarL];
1090      if (minVal > t) {
1091         minVal = t;
1092         minColL = k;
1093      }
1094      if (maxVal < t) {
1095         maxVal = t;
1096         maxColL = k;
1097      }
1098   }
1099   minVal = 2000; /* big enough */
1100   maxVal = -1; /* small enough */
1101   for (; k < N_TEXELS; k++) {
1102      GLint t = input[k][maxVarR];
1103      if (minVal > t) {
1104         minVal = t;
1105         minColR = k;
1106      }
1107      if (maxVal < t) {
1108         maxVal = t;
1109         maxColR = k;
1110      }
1111   }
1112#endif
1113
1114   /* left microtile */
1115   cc[0] = 0;
1116   for (i = 0; i < n_comp; i++) {
1117      vec[0][i] = input[minColL][i];
1118      vec[1][i] = input[maxColL][i];
1119   }
1120   if (minColL != maxColL) {
1121      /* compute interpolation vector */
1122      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1123
1124      /* add in texels */
1125      lolo = 0;
1126      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1127         GLint texel;
1128         /* interpolate color */
1129         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1130         /* add in texel */
1131         lolo <<= 2;
1132         lolo |= texel;
1133      }
1134
1135      /* funky encoding for LSB of green */
1136      if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1137         for (i = 0; i < n_comp; i++) {
1138            vec[1][i] = input[minColL][i];
1139            vec[0][i] = input[maxColL][i];
1140         }
1141         lolo = ~lolo;
1142      }
1143
1144      cc[0] = lolo;
1145   }
1146
1147   /* right microtile */
1148   cc[1] = 0;
1149   for (i = 0; i < n_comp; i++) {
1150      vec[2][i] = input[minColR][i];
1151      vec[3][i] = input[maxColR][i];
1152   }
1153   if (minColR != maxColR) {
1154      /* compute interpolation vector */
1155      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1156
1157      /* add in texels */
1158      lohi = 0;
1159      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1160         GLint texel;
1161         /* interpolate color */
1162         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1163         /* add in texel */
1164         lohi <<= 2;
1165         lohi |= texel;
1166      }
1167
1168      /* funky encoding for LSB of green */
1169      if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1170         for (i = 0; i < n_comp; i++) {
1171            vec[3][i] = input[minColR][i];
1172            vec[2][i] = input[maxColR][i];
1173         }
1174         lohi = ~lohi;
1175      }
1176
1177      cc[1] = lohi;
1178   }
1179
1180   FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1181   for (j = 2 * 2 - 1; j >= 0; j--) {
1182      for (i = 0; i < n_comp; i++) {
1183         /* add in colors */
1184         FX64_SHL(hi, 5);
1185         FX64_OR32(hi, vec[j][i] >> 3);
1186      }
1187   }
1188   ((Fx64 *)cc)[1] = hi;
1189}
1190
1191
1192static void
1193fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1194{
1195   GLint trualpha;
1196   GLubyte reord[N_TEXELS][MAX_COMP];
1197
1198   GLubyte input[N_TEXELS][MAX_COMP];
1199   GLint i, k, l;
1200
1201   if (comps == 3) {
1202      /* make the whole block opaque */
1203      memset(input, -1, sizeof(input));
1204   }
1205
1206   /* 8 texels each line */
1207   for (l = 0; l < 4; l++) {
1208      for (k = 0; k < 4; k++) {
1209         for (i = 0; i < comps; i++) {
1210            input[k + l * 4][i] = *lines[l]++;
1211         }
1212      }
1213      for (; k < 8; k++) {
1214         for (i = 0; i < comps; i++) {
1215            input[k + l * 4 + 12][i] = *lines[l]++;
1216         }
1217      }
1218   }
1219
1220   /* block layout:
1221    * 00, 01, 02, 03, 08, 09, 0a, 0b
1222    * 10, 11, 12, 13, 18, 19, 1a, 1b
1223    * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1224    * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1225    */
1226
1227   /* [dBorca]
1228    * stupidity flows forth from this
1229    */
1230   l = N_TEXELS;
1231   trualpha = 0;
1232   if (comps == 4) {
1233      /* skip all transparent black texels */
1234      l = 0;
1235      for (k = 0; k < N_TEXELS; k++) {
1236         /* test all components against 0 */
1237         if (!ISTBLACK(input[k])) {
1238            /* texel is not transparent black */
1239            COPY_4UBV(reord[l], input[k]);
1240            if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1241               /* non-opaque texel */
1242               trualpha = !0;
1243            }
1244            l++;
1245         }
1246      }
1247   }
1248
1249#if 0
1250   if (trualpha) {
1251      fxt1_quantize_ALPHA0(cc, input, reord, l);
1252   } else if (l == 0) {
1253      cc[0] = cc[1] = cc[2] = -1;
1254      cc[3] = 0;
1255   } else if (l < N_TEXELS) {
1256      fxt1_quantize_HI(cc, input, reord, l);
1257   } else {
1258      fxt1_quantize_CHROMA(cc, input);
1259   }
1260   (void)fxt1_quantize_ALPHA1;
1261   (void)fxt1_quantize_MIXED1;
1262   (void)fxt1_quantize_MIXED0;
1263#else
1264   if (trualpha) {
1265      fxt1_quantize_ALPHA1(cc, input);
1266   } else if (l == 0) {
1267      cc[0] = cc[1] = cc[2] = ~0u;
1268      cc[3] = 0;
1269   } else if (l < N_TEXELS) {
1270      fxt1_quantize_MIXED1(cc, input);
1271   } else {
1272      fxt1_quantize_MIXED0(cc, input);
1273   }
1274   (void)fxt1_quantize_ALPHA0;
1275   (void)fxt1_quantize_HI;
1276   (void)fxt1_quantize_CHROMA;
1277#endif
1278}
1279
1280
1281
1282/**
1283 * Upscale an image by replication, not (typical) stretching.
1284 * We use this when the image width or height is less than a
1285 * certain size (4, 8) and we need to upscale an image.
1286 */
1287static void
1288upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1289                   GLsizei outWidth, GLsizei outHeight,
1290                   GLint comps, const GLubyte *src, GLint srcRowStride,
1291                   GLubyte *dest )
1292{
1293   GLint i, j, k;
1294
1295   ASSERT(outWidth >= inWidth);
1296   ASSERT(outHeight >= inHeight);
1297#if 0
1298   ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1299   ASSERT((outWidth & 3) == 0);
1300   ASSERT((outHeight & 3) == 0);
1301#endif
1302
1303   for (i = 0; i < outHeight; i++) {
1304      const GLint ii = i % inHeight;
1305      for (j = 0; j < outWidth; j++) {
1306         const GLint jj = j % inWidth;
1307         for (k = 0; k < comps; k++) {
1308            dest[(i * outWidth + j) * comps + k]
1309               = src[ii * srcRowStride + jj * comps + k];
1310         }
1311      }
1312   }
1313}
1314
1315
1316static void
1317fxt1_encode (GLuint width, GLuint height, GLint comps,
1318             const void *source, GLint srcRowStride,
1319             void *dest, GLint destRowStride)
1320{
1321   GLuint x, y;
1322   const GLubyte *data;
1323   GLuint *encoded = (GLuint *)dest;
1324   void *newSource = NULL;
1325
1326   assert(comps == 3 || comps == 4);
1327
1328   /* Replicate image if width is not M8 or height is not M4 */
1329   if ((width & 7) | (height & 3)) {
1330      GLint newWidth = (width + 7) & ~7;
1331      GLint newHeight = (height + 3) & ~3;
1332      newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1333      if (!newSource) {
1334         GET_CURRENT_CONTEXT(ctx);
1335         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1336         goto cleanUp;
1337      }
1338      upscale_teximage2d(width, height, newWidth, newHeight,
1339                         comps, (const GLubyte *) source,
1340                         srcRowStride, (GLubyte *) newSource);
1341      source = newSource;
1342      width = newWidth;
1343      height = newHeight;
1344      srcRowStride = comps * newWidth;
1345   }
1346
1347   data = (const GLubyte *) source;
1348   destRowStride = (destRowStride - width * 2) / 4;
1349   for (y = 0; y < height; y += 4) {
1350      GLuint offs = 0 + (y + 0) * srcRowStride;
1351      for (x = 0; x < width; x += 8) {
1352         const GLubyte *lines[4];
1353         lines[0] = &data[offs];
1354         lines[1] = lines[0] + srcRowStride;
1355         lines[2] = lines[1] + srcRowStride;
1356         lines[3] = lines[2] + srcRowStride;
1357         offs += 8 * comps;
1358         fxt1_quantize(encoded, lines, comps);
1359         /* 128 bits per 8x4 block */
1360         encoded += 4;
1361      }
1362      encoded += destRowStride;
1363   }
1364
1365 cleanUp:
1366   if (newSource != NULL) {
1367      free(newSource);
1368   }
1369}
1370
1371
1372/***************************************************************************\
1373 * FXT1 decoder
1374 *
1375 * The decoder is based on GL_3DFX_texture_compression_FXT1
1376 * specification and serves as a concept for the encoder.
1377\***************************************************************************/
1378
1379
1380/* lookup table for scaling 5 bit colors up to 8 bits */
1381static const GLubyte _rgb_scale_5[] = {
1382   0,   8,   16,  25,  33,  41,  49,  58,
1383   66,  74,  82,  90,  99,  107, 115, 123,
1384   132, 140, 148, 156, 165, 173, 181, 189,
1385   197, 206, 214, 222, 230, 239, 247, 255
1386};
1387
1388/* lookup table for scaling 6 bit colors up to 8 bits */
1389static const GLubyte _rgb_scale_6[] = {
1390   0,   4,   8,   12,  16,  20,  24,  28,
1391   32,  36,  40,  45,  49,  53,  57,  61,
1392   65,  69,  73,  77,  81,  85,  89,  93,
1393   97,  101, 105, 109, 113, 117, 121, 125,
1394   130, 134, 138, 142, 146, 150, 154, 158,
1395   162, 166, 170, 174, 178, 182, 186, 190,
1396   194, 198, 202, 206, 210, 215, 219, 223,
1397   227, 231, 235, 239, 243, 247, 251, 255
1398};
1399
1400
1401#define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1402#define UP5(c) _rgb_scale_5[(c) & 31]
1403#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1404#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1405
1406
1407static void
1408fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1409{
1410   const GLuint *cc;
1411
1412   t *= 3;
1413   cc = (const GLuint *)(code + t / 8);
1414   t = (cc[0] >> (t & 7)) & 7;
1415
1416   if (t == 7) {
1417      rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1418   } else {
1419      GLubyte r, g, b;
1420      cc = (const GLuint *)(code + 12);
1421      if (t == 0) {
1422         b = UP5(CC_SEL(cc, 0));
1423         g = UP5(CC_SEL(cc, 5));
1424         r = UP5(CC_SEL(cc, 10));
1425      } else if (t == 6) {
1426         b = UP5(CC_SEL(cc, 15));
1427         g = UP5(CC_SEL(cc, 20));
1428         r = UP5(CC_SEL(cc, 25));
1429      } else {
1430         b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1431         g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1432         r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1433      }
1434      rgba[RCOMP] = r;
1435      rgba[GCOMP] = g;
1436      rgba[BCOMP] = b;
1437      rgba[ACOMP] = 255;
1438   }
1439}
1440
1441
1442static void
1443fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1444{
1445   const GLuint *cc;
1446   GLuint kk;
1447
1448   cc = (const GLuint *)code;
1449   if (t & 16) {
1450      cc++;
1451      t &= 15;
1452   }
1453   t = (cc[0] >> (t * 2)) & 3;
1454
1455   t *= 15;
1456   cc = (const GLuint *)(code + 8 + t / 8);
1457   kk = cc[0] >> (t & 7);
1458   rgba[BCOMP] = UP5(kk);
1459   rgba[GCOMP] = UP5(kk >> 5);
1460   rgba[RCOMP] = UP5(kk >> 10);
1461   rgba[ACOMP] = 255;
1462}
1463
1464
1465static void
1466fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1467{
1468   const GLuint *cc;
1469   GLuint col[2][3];
1470   GLint glsb, selb;
1471
1472   cc = (const GLuint *)code;
1473   if (t & 16) {
1474      t &= 15;
1475      t = (cc[1] >> (t * 2)) & 3;
1476      /* col 2 */
1477      col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1478      col[0][GCOMP] = CC_SEL(cc, 99);
1479      col[0][RCOMP] = CC_SEL(cc, 104);
1480      /* col 3 */
1481      col[1][BCOMP] = CC_SEL(cc, 109);
1482      col[1][GCOMP] = CC_SEL(cc, 114);
1483      col[1][RCOMP] = CC_SEL(cc, 119);
1484      glsb = CC_SEL(cc, 126);
1485      selb = CC_SEL(cc, 33);
1486   } else {
1487      t = (cc[0] >> (t * 2)) & 3;
1488      /* col 0 */
1489      col[0][BCOMP] = CC_SEL(cc, 64);
1490      col[0][GCOMP] = CC_SEL(cc, 69);
1491      col[0][RCOMP] = CC_SEL(cc, 74);
1492      /* col 1 */
1493      col[1][BCOMP] = CC_SEL(cc, 79);
1494      col[1][GCOMP] = CC_SEL(cc, 84);
1495      col[1][RCOMP] = CC_SEL(cc, 89);
1496      glsb = CC_SEL(cc, 125);
1497      selb = CC_SEL(cc, 1);
1498   }
1499
1500   if (CC_SEL(cc, 124) & 1) {
1501      /* alpha[0] == 1 */
1502
1503      if (t == 3) {
1504         /* zero */
1505         rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1506      } else {
1507         GLubyte r, g, b;
1508         if (t == 0) {
1509            b = UP5(col[0][BCOMP]);
1510            g = UP5(col[0][GCOMP]);
1511            r = UP5(col[0][RCOMP]);
1512         } else if (t == 2) {
1513            b = UP5(col[1][BCOMP]);
1514            g = UP6(col[1][GCOMP], glsb);
1515            r = UP5(col[1][RCOMP]);
1516         } else {
1517            b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1518            g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1519            r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1520         }
1521         rgba[RCOMP] = r;
1522         rgba[GCOMP] = g;
1523         rgba[BCOMP] = b;
1524         rgba[ACOMP] = 255;
1525      }
1526   } else {
1527      /* alpha[0] == 0 */
1528      GLubyte r, g, b;
1529      if (t == 0) {
1530         b = UP5(col[0][BCOMP]);
1531         g = UP6(col[0][GCOMP], glsb ^ selb);
1532         r = UP5(col[0][RCOMP]);
1533      } else if (t == 3) {
1534         b = UP5(col[1][BCOMP]);
1535         g = UP6(col[1][GCOMP], glsb);
1536         r = UP5(col[1][RCOMP]);
1537      } else {
1538         b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1539         g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1540                        UP6(col[1][GCOMP], glsb));
1541         r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1542      }
1543      rgba[RCOMP] = r;
1544      rgba[GCOMP] = g;
1545      rgba[BCOMP] = b;
1546      rgba[ACOMP] = 255;
1547   }
1548}
1549
1550
1551static void
1552fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1553{
1554   const GLuint *cc;
1555   GLubyte r, g, b, a;
1556
1557   cc = (const GLuint *)code;
1558   if (CC_SEL(cc, 124) & 1) {
1559      /* lerp == 1 */
1560      GLuint col0[4];
1561
1562      if (t & 16) {
1563         t &= 15;
1564         t = (cc[1] >> (t * 2)) & 3;
1565         /* col 2 */
1566         col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1567         col0[GCOMP] = CC_SEL(cc, 99);
1568         col0[RCOMP] = CC_SEL(cc, 104);
1569         col0[ACOMP] = CC_SEL(cc, 119);
1570      } else {
1571         t = (cc[0] >> (t * 2)) & 3;
1572         /* col 0 */
1573         col0[BCOMP] = CC_SEL(cc, 64);
1574         col0[GCOMP] = CC_SEL(cc, 69);
1575         col0[RCOMP] = CC_SEL(cc, 74);
1576         col0[ACOMP] = CC_SEL(cc, 109);
1577      }
1578
1579      if (t == 0) {
1580         b = UP5(col0[BCOMP]);
1581         g = UP5(col0[GCOMP]);
1582         r = UP5(col0[RCOMP]);
1583         a = UP5(col0[ACOMP]);
1584      } else if (t == 3) {
1585         b = UP5(CC_SEL(cc, 79));
1586         g = UP5(CC_SEL(cc, 84));
1587         r = UP5(CC_SEL(cc, 89));
1588         a = UP5(CC_SEL(cc, 114));
1589      } else {
1590         b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1591         g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1592         r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1593         a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1594      }
1595   } else {
1596      /* lerp == 0 */
1597
1598      if (t & 16) {
1599         cc++;
1600         t &= 15;
1601      }
1602      t = (cc[0] >> (t * 2)) & 3;
1603
1604      if (t == 3) {
1605         /* zero */
1606         r = g = b = a = 0;
1607      } else {
1608         GLuint kk;
1609         cc = (const GLuint *)code;
1610         a = UP5(cc[3] >> (t * 5 + 13));
1611         t *= 15;
1612         cc = (const GLuint *)(code + 8 + t / 8);
1613         kk = cc[0] >> (t & 7);
1614         b = UP5(kk);
1615         g = UP5(kk >> 5);
1616         r = UP5(kk >> 10);
1617      }
1618   }
1619   rgba[RCOMP] = r;
1620   rgba[GCOMP] = g;
1621   rgba[BCOMP] = b;
1622   rgba[ACOMP] = a;
1623}
1624
1625
1626void
1627fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1628               GLint i, GLint j, GLubyte *rgba)
1629{
1630   static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1631      fxt1_decode_1HI,     /* cc-high   = "00?" */
1632      fxt1_decode_1HI,     /* cc-high   = "00?" */
1633      fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1634      fxt1_decode_1ALPHA,  /* alpha     = "011" */
1635      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1636      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1637      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1638      fxt1_decode_1MIXED   /* mixed     = "1??" */
1639   };
1640
1641   const GLubyte *code = (const GLubyte *)texture +
1642                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1643   GLint mode = CC_SEL(code, 125);
1644   GLint t = i & 7;
1645
1646   if (t & 4) {
1647      t += 12;
1648   }
1649   t += (j & 3) * 4;
1650
1651   decode_1[mode](code, t, rgba);
1652}
1653
1654
1655#endif /* FEATURE_texture_fxt1 */
1656