1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26/**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32#include "glheader.h"
33#include "imports.h"
34#include "image.h"
35#include "macros.h"
36#include "mipmap.h"
37#include "texcompress.h"
38#include "texcompress_fxt1.h"
39#include "texstore.h"
40
41
42static void
43fxt1_encode (GLuint width, GLuint height, GLint comps,
44             const void *source, GLint srcRowStride,
45             void *dest, GLint destRowStride);
46
47static void
48fxt1_decode_1 (const void *texture, GLint stride,
49               GLint i, GLint j, GLubyte *rgba);
50
51
52/**
53 * Store user's image in rgb_fxt1 format.
54 */
55GLboolean
56_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
57{
58   const GLubyte *pixels;
59   GLint srcRowStride;
60   GLubyte *dst;
61   const GLubyte *tempImage = NULL;
62
63   assert(dstFormat == MESA_FORMAT_RGB_FXT1);
64
65   if (srcFormat != GL_RGB ||
66       srcType != GL_UNSIGNED_BYTE ||
67       ctx->_ImageTransferState ||
68       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
69       srcPacking->SwapBytes) {
70      /* convert image to RGB/GLubyte */
71      GLubyte *tempImageSlices[1];
72      int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
73      tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
74      if (!tempImage)
75         return GL_FALSE; /* out of memory */
76      tempImageSlices[0] = (GLubyte *) tempImage;
77      _mesa_texstore(ctx, dims,
78                     baseInternalFormat,
79                     MESA_FORMAT_RGB_UNORM8,
80                     rgbRowStride, tempImageSlices,
81                     srcWidth, srcHeight, srcDepth,
82                     srcFormat, srcType, srcAddr,
83                     srcPacking);
84      pixels = tempImage;
85      srcRowStride = 3 * srcWidth;
86      srcFormat = GL_RGB;
87   }
88   else {
89      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
90                                     srcFormat, srcType, 0, 0);
91
92      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
93                                            srcType) / sizeof(GLubyte);
94   }
95
96   dst = dstSlices[0];
97
98   fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
99               dst, dstRowStride);
100
101   free((void*) tempImage);
102
103   return GL_TRUE;
104}
105
106
107/**
108 * Store user's image in rgba_fxt1 format.
109 */
110GLboolean
111_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
112{
113   const GLubyte *pixels;
114   GLint srcRowStride;
115   GLubyte *dst;
116   const GLubyte *tempImage = NULL;
117
118   assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
119
120   if (srcFormat != GL_RGBA ||
121       srcType != GL_UNSIGNED_BYTE ||
122       ctx->_ImageTransferState ||
123       srcPacking->SwapBytes) {
124      /* convert image to RGBA/GLubyte */
125      GLubyte *tempImageSlices[1];
126      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
127      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
128      if (!tempImage)
129         return GL_FALSE; /* out of memory */
130      tempImageSlices[0] = (GLubyte *) tempImage;
131      _mesa_texstore(ctx, dims,
132                     baseInternalFormat,
133                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
134                                           : MESA_FORMAT_A8B8G8R8_UNORM,
135                     rgbaRowStride, tempImageSlices,
136                     srcWidth, srcHeight, srcDepth,
137                     srcFormat, srcType, srcAddr,
138                     srcPacking);
139      pixels = tempImage;
140      srcRowStride = 4 * srcWidth;
141      srcFormat = GL_RGBA;
142   }
143   else {
144      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
145                                     srcFormat, srcType, 0, 0);
146
147      srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
148                                            srcType) / sizeof(GLubyte);
149   }
150
151   dst = dstSlices[0];
152
153   fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
154               dst, dstRowStride);
155
156   free((void*) tempImage);
157
158   return GL_TRUE;
159}
160
161
162/***************************************************************************\
163 * FXT1 encoder
164 *
165 * The encoder was built by reversing the decoder,
166 * and is vaguely based on Texus2 by 3dfx. Note that this code
167 * is merely a proof of concept, since it is highly UNoptimized;
168 * moreover, it is sub-optimal due to initial conditions passed
169 * to Lloyd's algorithm (the interpolation modes are even worse).
170\***************************************************************************/
171
172
173#define MAX_COMP 4 /* ever needed maximum number of components in texel */
174#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
175#define N_TEXELS 32 /* number of texels in a block (always 32) */
176#define LL_N_REP 50 /* number of iterations in lloyd's vq */
177#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
178#define LL_RMS_E 255 /* fault tolerance (maximum error) */
179#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
180static const GLuint zero = 0;
181#define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
182
183/*
184 * Define a 64-bit unsigned integer type and macros
185 */
186#if 1
187
188#define FX64_NATIVE 1
189
190typedef uint64_t Fx64;
191
192#define FX64_MOV32(a, b) a = b
193#define FX64_OR32(a, b)  a |= b
194#define FX64_SHL(a, c)   a <<= c
195
196#else
197
198#define FX64_NATIVE 0
199
200typedef struct {
201   GLuint lo, hi;
202} Fx64;
203
204#define FX64_MOV32(a, b) a.lo = b
205#define FX64_OR32(a, b)  a.lo |= b
206
207#define FX64_SHL(a, c)                                 \
208   do {                                                \
209       if ((c) >= 32) {                                \
210          a.hi = a.lo << ((c) - 32);                   \
211          a.lo = 0;                                    \
212       } else {                                        \
213          a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
214          a.lo <<= (c);                                \
215       }                                               \
216   } while (0)
217
218#endif
219
220
221#define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
222#define SAFECDOT 1 /* for paranoids */
223
224#define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
225   do {                                  \
226      /* compute interpolation vector */ \
227      GLfloat d2 = 0.0F;                 \
228      GLfloat rd2;                       \
229                                         \
230      for (i = 0; i < NC; i++) {         \
231         IV[i] = (V1[i] - V0[i]) * F(i); \
232         d2 += IV[i] * IV[i];            \
233      }                                  \
234      rd2 = (GLfloat)NV / d2;            \
235      B = 0;                             \
236      for (i = 0; i < NC; i++) {         \
237         IV[i] *= F(i);                  \
238         B -= IV[i] * V0[i];             \
239         IV[i] *= rd2;                   \
240      }                                  \
241      B = B * rd2 + 0.5f;                \
242   } while (0)
243
244#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
245   do {                                  \
246      GLfloat dot = 0.0F;                \
247      for (i = 0; i < NC; i++) {         \
248         dot += V[i] * IV[i];            \
249      }                                  \
250      TEXEL = (GLint)(dot + B);          \
251      if (SAFECDOT) {                    \
252         if (TEXEL < 0) {                \
253            TEXEL = 0;                   \
254         } else if (TEXEL > NV) {        \
255            TEXEL = NV;                  \
256         }                               \
257      }                                  \
258   } while (0)
259
260
261static GLint
262fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
263              GLubyte input[MAX_COMP], GLint nc)
264{
265   GLint i, j, best = -1;
266   GLfloat err = 1e9; /* big enough */
267
268   for (j = 0; j < nv; j++) {
269      GLfloat e = 0.0F;
270      for (i = 0; i < nc; i++) {
271         e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
272      }
273      if (e < err) {
274         err = e;
275         best = j;
276      }
277   }
278
279   return best;
280}
281
282
283static GLint
284fxt1_worst (GLfloat vec[MAX_COMP],
285            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
286{
287   GLint i, k, worst = -1;
288   GLfloat err = -1.0F; /* small enough */
289
290   for (k = 0; k < n; k++) {
291      GLfloat e = 0.0F;
292      for (i = 0; i < nc; i++) {
293         e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
294      }
295      if (e > err) {
296         err = e;
297         worst = k;
298      }
299   }
300
301   return worst;
302}
303
304
305static GLint
306fxt1_variance (GLdouble variance[MAX_COMP],
307               GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
308{
309   GLint i, k, best = 0;
310   GLint sx, sx2;
311   GLdouble var, maxvar = -1; /* small enough */
312   GLdouble teenth = 1.0 / n;
313
314   for (i = 0; i < nc; i++) {
315      sx = sx2 = 0;
316      for (k = 0; k < n; k++) {
317         GLint t = input[k][i];
318         sx += t;
319         sx2 += t * t;
320      }
321      var = sx2 * teenth - sx * sx * teenth * teenth;
322      if (maxvar < var) {
323         maxvar = var;
324         best = i;
325      }
326      if (variance) {
327         variance[i] = var;
328      }
329   }
330
331   return best;
332}
333
334
335static GLint
336fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
337             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
338{
339#if 0
340   /* Choose colors from a grid.
341    */
342   GLint i, j;
343
344   for (j = 0; j < nv; j++) {
345      GLint m = j * (n - 1) / (nv - 1);
346      for (i = 0; i < nc; i++) {
347         vec[j][i] = input[m][i];
348      }
349   }
350#else
351   /* Our solution here is to find the darkest and brightest colors in
352    * the 8x4 tile and use those as the two representative colors.
353    * There are probably better algorithms to use (histogram-based).
354    */
355   GLint i, j, k;
356   GLint minSum = 2000; /* big enough */
357   GLint maxSum = -1; /* small enough */
358   GLint minCol = 0; /* phoudoin: silent compiler! */
359   GLint maxCol = 0; /* phoudoin: silent compiler! */
360
361   struct {
362      GLint flag;
363      GLint key;
364      GLint freq;
365      GLint idx;
366   } hist[N_TEXELS];
367   GLint lenh = 0;
368
369   memset(hist, 0, sizeof(hist));
370
371   for (k = 0; k < n; k++) {
372      GLint l;
373      GLint key = 0;
374      GLint sum = 0;
375      for (i = 0; i < nc; i++) {
376         key <<= 8;
377         key |= input[k][i];
378         sum += input[k][i];
379      }
380      for (l = 0; l < n; l++) {
381         if (!hist[l].flag) {
382            /* alloc new slot */
383            hist[l].flag = !0;
384            hist[l].key = key;
385            hist[l].freq = 1;
386            hist[l].idx = k;
387            lenh = l + 1;
388            break;
389         } else if (hist[l].key == key) {
390            hist[l].freq++;
391            break;
392         }
393      }
394      if (minSum > sum) {
395         minSum = sum;
396         minCol = k;
397      }
398      if (maxSum < sum) {
399         maxSum = sum;
400         maxCol = k;
401      }
402   }
403
404   if (lenh <= nv) {
405      for (j = 0; j < lenh; j++) {
406         for (i = 0; i < nc; i++) {
407            vec[j][i] = (GLfloat)input[hist[j].idx][i];
408         }
409      }
410      for (; j < nv; j++) {
411         for (i = 0; i < nc; i++) {
412            vec[j][i] = vec[0][i];
413         }
414      }
415      return 0;
416   }
417
418   for (j = 0; j < nv; j++) {
419      for (i = 0; i < nc; i++) {
420         vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
421      }
422   }
423#endif
424
425   return !0;
426}
427
428
429static GLint
430fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
431            GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
432{
433   /* Use the generalized lloyd's algorithm for VQ:
434    *     find 4 color vectors.
435    *
436    *     for each sample color
437    *         sort to nearest vector.
438    *
439    *     replace each vector with the centroid of its matching colors.
440    *
441    *     repeat until RMS doesn't improve.
442    *
443    *     if a color vector has no samples, or becomes the same as another
444    *     vector, replace it with the color which is farthest from a sample.
445    *
446    * vec[][MAX_COMP]           initial vectors and resulting colors
447    * nv                        number of resulting colors required
448    * input[N_TEXELS][MAX_COMP] input texels
449    * nc                        number of components in input / vec
450    * n                         number of input samples
451    */
452
453   GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
454   GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
455   GLfloat error, lasterror = 1e9;
456
457   GLint i, j, k, rep;
458
459   /* the quantizer */
460   for (rep = 0; rep < LL_N_REP; rep++) {
461      /* reset sums & counters */
462      for (j = 0; j < nv; j++) {
463         for (i = 0; i < nc; i++) {
464            sum[j][i] = 0;
465         }
466         cnt[j] = 0;
467      }
468      error = 0;
469
470      /* scan whole block */
471      for (k = 0; k < n; k++) {
472#if 1
473         GLint best = -1;
474         GLfloat err = 1e9; /* big enough */
475         /* determine best vector */
476         for (j = 0; j < nv; j++) {
477            GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
478                      (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
479                      (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
480            if (nc == 4) {
481               e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
482            }
483            if (e < err) {
484               err = e;
485               best = j;
486            }
487         }
488#else
489         GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
490#endif
491         assert(best >= 0);
492         /* add in closest color */
493         for (i = 0; i < nc; i++) {
494            sum[best][i] += input[k][i];
495         }
496         /* mark this vector as used */
497         cnt[best]++;
498         /* accumulate error */
499         error += err;
500      }
501
502      /* check RMS */
503      if ((error < LL_RMS_E) ||
504          ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
505         return !0; /* good match */
506      }
507      lasterror = error;
508
509      /* move each vector to the barycenter of its closest colors */
510      for (j = 0; j < nv; j++) {
511         if (cnt[j]) {
512            GLfloat div = 1.0F / cnt[j];
513            for (i = 0; i < nc; i++) {
514               vec[j][i] = div * sum[j][i];
515            }
516         } else {
517            /* this vec has no samples or is identical with a previous vec */
518            GLint worst = fxt1_worst(vec[j], input, nc, n);
519            for (i = 0; i < nc; i++) {
520               vec[j][i] = input[worst][i];
521            }
522         }
523      }
524   }
525
526   return 0; /* could not converge fast enough */
527}
528
529
530static void
531fxt1_quantize_CHROMA (GLuint *cc,
532                      GLubyte input[N_TEXELS][MAX_COMP])
533{
534   const GLint n_vect = 4; /* 4 base vectors to find */
535   const GLint n_comp = 3; /* 3 components: R, G, B */
536   GLfloat vec[MAX_VECT][MAX_COMP];
537   GLint i, j, k;
538   Fx64 hi; /* high quadword */
539   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
540
541   if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
542      fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
543   }
544
545   FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
546   for (j = n_vect - 1; j >= 0; j--) {
547      for (i = 0; i < n_comp; i++) {
548         /* add in colors */
549         FX64_SHL(hi, 5);
550         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
551      }
552   }
553   ((Fx64 *)cc)[1] = hi;
554
555   lohi = lolo = 0;
556   /* right microtile */
557   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
558      lohi <<= 2;
559      lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
560   }
561   /* left microtile */
562   for (; k >= 0; k--) {
563      lolo <<= 2;
564      lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
565   }
566   cc[1] = lohi;
567   cc[0] = lolo;
568}
569
570
571static void
572fxt1_quantize_ALPHA0 (GLuint *cc,
573                      GLubyte input[N_TEXELS][MAX_COMP],
574                      GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
575{
576   const GLint n_vect = 3; /* 3 base vectors to find */
577   const GLint n_comp = 4; /* 4 components: R, G, B, A */
578   GLfloat vec[MAX_VECT][MAX_COMP];
579   GLint i, j, k;
580   Fx64 hi; /* high quadword */
581   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
582
583   /* the last vector indicates zero */
584   for (i = 0; i < n_comp; i++) {
585      vec[n_vect][i] = 0;
586   }
587
588   /* the first n texels in reord are guaranteed to be non-zero */
589   if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
590      fxt1_lloyd(vec, n_vect, reord, n_comp, n);
591   }
592
593   FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
594   for (j = n_vect - 1; j >= 0; j--) {
595      /* add in alphas */
596      FX64_SHL(hi, 5);
597      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
598   }
599   for (j = n_vect - 1; j >= 0; j--) {
600      for (i = 0; i < n_comp - 1; i++) {
601         /* add in colors */
602         FX64_SHL(hi, 5);
603         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
604      }
605   }
606   ((Fx64 *)cc)[1] = hi;
607
608   lohi = lolo = 0;
609   /* right microtile */
610   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
611      lohi <<= 2;
612      lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
613   }
614   /* left microtile */
615   for (; k >= 0; k--) {
616      lolo <<= 2;
617      lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
618   }
619   cc[1] = lohi;
620   cc[0] = lolo;
621}
622
623
624static void
625fxt1_quantize_ALPHA1 (GLuint *cc,
626                      GLubyte input[N_TEXELS][MAX_COMP])
627{
628   const GLint n_vect = 3; /* highest vector number in each microtile */
629   const GLint n_comp = 4; /* 4 components: R, G, B, A */
630   GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
631   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
632   GLint i, j, k;
633   Fx64 hi; /* high quadword */
634   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
635
636   GLint minSum;
637   GLint maxSum;
638   GLint minColL = 0, maxColL = 0;
639   GLint minColR = 0, maxColR = 0;
640   GLint sumL = 0, sumR = 0;
641   GLint nn_comp;
642   /* Our solution here is to find the darkest and brightest colors in
643    * the 4x4 tile and use those as the two representative colors.
644    * There are probably better algorithms to use (histogram-based).
645    */
646   nn_comp = n_comp;
647   while ((minColL == maxColL) && nn_comp) {
648       minSum = 2000; /* big enough */
649       maxSum = -1; /* small enough */
650       for (k = 0; k < N_TEXELS / 2; k++) {
651           GLint sum = 0;
652           for (i = 0; i < nn_comp; i++) {
653               sum += input[k][i];
654           }
655           if (minSum > sum) {
656               minSum = sum;
657               minColL = k;
658           }
659           if (maxSum < sum) {
660               maxSum = sum;
661               maxColL = k;
662           }
663           sumL += sum;
664       }
665
666       nn_comp--;
667   }
668
669   nn_comp = n_comp;
670   while ((minColR == maxColR) && nn_comp) {
671       minSum = 2000; /* big enough */
672       maxSum = -1; /* small enough */
673       for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
674           GLint sum = 0;
675           for (i = 0; i < nn_comp; i++) {
676               sum += input[k][i];
677           }
678           if (minSum > sum) {
679               minSum = sum;
680               minColR = k;
681           }
682           if (maxSum < sum) {
683               maxSum = sum;
684               maxColR = k;
685           }
686           sumR += sum;
687       }
688
689       nn_comp--;
690   }
691
692   /* choose the common vector (yuck!) */
693   {
694      GLint j1, j2;
695      GLint v1 = 0, v2 = 0;
696      GLfloat err = 1e9; /* big enough */
697      GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
698      for (i = 0; i < n_comp; i++) {
699         tv[0][i] = input[minColL][i];
700         tv[1][i] = input[maxColL][i];
701         tv[2][i] = input[minColR][i];
702         tv[3][i] = input[maxColR][i];
703      }
704      for (j1 = 0; j1 < 2; j1++) {
705         for (j2 = 2; j2 < 4; j2++) {
706            GLfloat e = 0.0F;
707            for (i = 0; i < n_comp; i++) {
708               e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
709            }
710            if (e < err) {
711               err = e;
712               v1 = j1;
713               v2 = j2;
714            }
715         }
716      }
717      for (i = 0; i < n_comp; i++) {
718         vec[0][i] = tv[1 - v1][i];
719         vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
720         vec[2][i] = tv[5 - v2][i];
721      }
722   }
723
724   /* left microtile */
725   cc[0] = 0;
726   if (minColL != maxColL) {
727      /* compute interpolation vector */
728      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
729
730      /* add in texels */
731      lolo = 0;
732      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
733         GLint texel;
734         /* interpolate color */
735         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
736         /* add in texel */
737         lolo <<= 2;
738         lolo |= texel;
739      }
740
741      cc[0] = lolo;
742   }
743
744   /* right microtile */
745   cc[1] = 0;
746   if (minColR != maxColR) {
747      /* compute interpolation vector */
748      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
749
750      /* add in texels */
751      lohi = 0;
752      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
753         GLint texel;
754         /* interpolate color */
755         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
756         /* add in texel */
757         lohi <<= 2;
758         lohi |= texel;
759      }
760
761      cc[1] = lohi;
762   }
763
764   FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
765   for (j = n_vect - 1; j >= 0; j--) {
766      /* add in alphas */
767      FX64_SHL(hi, 5);
768      FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
769   }
770   for (j = n_vect - 1; j >= 0; j--) {
771      for (i = 0; i < n_comp - 1; i++) {
772         /* add in colors */
773         FX64_SHL(hi, 5);
774         FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
775      }
776   }
777   ((Fx64 *)cc)[1] = hi;
778}
779
780
781static void
782fxt1_quantize_HI (GLuint *cc,
783                  GLubyte input[N_TEXELS][MAX_COMP],
784                  GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
785{
786   const GLint n_vect = 6; /* highest vector number */
787   const GLint n_comp = 3; /* 3 components: R, G, B */
788   GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
789   GLfloat iv[MAX_COMP];   /* interpolation vector */
790   GLint i, k;
791   GLuint hihi; /* high quadword: hi dword */
792
793   GLint minSum = 2000; /* big enough */
794   GLint maxSum = -1; /* small enough */
795   GLint minCol = 0; /* phoudoin: silent compiler! */
796   GLint maxCol = 0; /* phoudoin: silent compiler! */
797
798   /* Our solution here is to find the darkest and brightest colors in
799    * the 8x4 tile and use those as the two representative colors.
800    * There are probably better algorithms to use (histogram-based).
801    */
802   for (k = 0; k < n; k++) {
803      GLint sum = 0;
804      for (i = 0; i < n_comp; i++) {
805         sum += reord[k][i];
806      }
807      if (minSum > sum) {
808         minSum = sum;
809         minCol = k;
810      }
811      if (maxSum < sum) {
812         maxSum = sum;
813         maxCol = k;
814      }
815   }
816
817   hihi = 0; /* cc-hi = "00" */
818   for (i = 0; i < n_comp; i++) {
819      /* add in colors */
820      hihi <<= 5;
821      hihi |= reord[maxCol][i] >> 3;
822   }
823   for (i = 0; i < n_comp; i++) {
824      /* add in colors */
825      hihi <<= 5;
826      hihi |= reord[minCol][i] >> 3;
827   }
828   cc[3] = hihi;
829   cc[0] = cc[1] = cc[2] = 0;
830
831   /* compute interpolation vector */
832   if (minCol != maxCol) {
833      MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
834   }
835
836   /* add in texels */
837   for (k = N_TEXELS - 1; k >= 0; k--) {
838      GLint t = k * 3;
839      GLuint *kk = (GLuint *)((char *)cc + t / 8);
840      GLint texel = n_vect + 1; /* transparent black */
841
842      if (!ISTBLACK(input[k])) {
843         if (minCol != maxCol) {
844            /* interpolate color */
845            CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
846            /* add in texel */
847            kk[0] |= texel << (t & 7);
848         }
849      } else {
850         /* add in texel */
851         kk[0] |= texel << (t & 7);
852      }
853   }
854}
855
856
857static void
858fxt1_quantize_MIXED1 (GLuint *cc,
859                      GLubyte input[N_TEXELS][MAX_COMP])
860{
861   const GLint n_vect = 2; /* highest vector number in each microtile */
862   const GLint n_comp = 3; /* 3 components: R, G, B */
863   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
864   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
865   GLint i, j, k;
866   Fx64 hi; /* high quadword */
867   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
868
869   GLint minSum;
870   GLint maxSum;
871   GLint minColL = 0, maxColL = -1;
872   GLint minColR = 0, maxColR = -1;
873
874   /* Our solution here is to find the darkest and brightest colors in
875    * the 4x4 tile and use those as the two representative colors.
876    * There are probably better algorithms to use (histogram-based).
877    */
878   minSum = 2000; /* big enough */
879   maxSum = -1; /* small enough */
880   for (k = 0; k < N_TEXELS / 2; k++) {
881      if (!ISTBLACK(input[k])) {
882         GLint sum = 0;
883         for (i = 0; i < n_comp; i++) {
884            sum += input[k][i];
885         }
886         if (minSum > sum) {
887            minSum = sum;
888            minColL = k;
889         }
890         if (maxSum < sum) {
891            maxSum = sum;
892            maxColL = k;
893         }
894      }
895   }
896   minSum = 2000; /* big enough */
897   maxSum = -1; /* small enough */
898   for (; k < N_TEXELS; k++) {
899      if (!ISTBLACK(input[k])) {
900         GLint sum = 0;
901         for (i = 0; i < n_comp; i++) {
902            sum += input[k][i];
903         }
904         if (minSum > sum) {
905            minSum = sum;
906            minColR = k;
907         }
908         if (maxSum < sum) {
909            maxSum = sum;
910            maxColR = k;
911         }
912      }
913   }
914
915   /* left microtile */
916   if (maxColL == -1) {
917      /* all transparent black */
918      cc[0] = ~0u;
919      for (i = 0; i < n_comp; i++) {
920         vec[0][i] = 0;
921         vec[1][i] = 0;
922      }
923   } else {
924      cc[0] = 0;
925      for (i = 0; i < n_comp; i++) {
926         vec[0][i] = input[minColL][i];
927         vec[1][i] = input[maxColL][i];
928      }
929      if (minColL != maxColL) {
930         /* compute interpolation vector */
931         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
932
933         /* add in texels */
934         lolo = 0;
935         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
936            GLint texel = n_vect + 1; /* transparent black */
937            if (!ISTBLACK(input[k])) {
938               /* interpolate color */
939               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
940            }
941            /* add in texel */
942            lolo <<= 2;
943            lolo |= texel;
944         }
945         cc[0] = lolo;
946      }
947   }
948
949   /* right microtile */
950   if (maxColR == -1) {
951      /* all transparent black */
952      cc[1] = ~0u;
953      for (i = 0; i < n_comp; i++) {
954         vec[2][i] = 0;
955         vec[3][i] = 0;
956      }
957   } else {
958      cc[1] = 0;
959      for (i = 0; i < n_comp; i++) {
960         vec[2][i] = input[minColR][i];
961         vec[3][i] = input[maxColR][i];
962      }
963      if (minColR != maxColR) {
964         /* compute interpolation vector */
965         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
966
967         /* add in texels */
968         lohi = 0;
969         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
970            GLint texel = n_vect + 1; /* transparent black */
971            if (!ISTBLACK(input[k])) {
972               /* interpolate color */
973               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
974            }
975            /* add in texel */
976            lohi <<= 2;
977            lohi |= texel;
978         }
979         cc[1] = lohi;
980      }
981   }
982
983   FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
984   for (j = 2 * 2 - 1; j >= 0; j--) {
985      for (i = 0; i < n_comp; i++) {
986         /* add in colors */
987         FX64_SHL(hi, 5);
988         FX64_OR32(hi, vec[j][i] >> 3);
989      }
990   }
991   ((Fx64 *)cc)[1] = hi;
992}
993
994
995static void
996fxt1_quantize_MIXED0 (GLuint *cc,
997                      GLubyte input[N_TEXELS][MAX_COMP])
998{
999   const GLint n_vect = 3; /* highest vector number in each microtile */
1000   const GLint n_comp = 3; /* 3 components: R, G, B */
1001   GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1002   GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1003   GLint i, j, k;
1004   Fx64 hi; /* high quadword */
1005   GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1006
1007   GLint minColL = 0, maxColL = 0;
1008   GLint minColR = 0, maxColR = 0;
1009#if 0
1010   GLint minSum;
1011   GLint maxSum;
1012
1013   /* Our solution here is to find the darkest and brightest colors in
1014    * the 4x4 tile and use those as the two representative colors.
1015    * There are probably better algorithms to use (histogram-based).
1016    */
1017   minSum = 2000; /* big enough */
1018   maxSum = -1; /* small enough */
1019   for (k = 0; k < N_TEXELS / 2; k++) {
1020      GLint sum = 0;
1021      for (i = 0; i < n_comp; i++) {
1022         sum += input[k][i];
1023      }
1024      if (minSum > sum) {
1025         minSum = sum;
1026         minColL = k;
1027      }
1028      if (maxSum < sum) {
1029         maxSum = sum;
1030         maxColL = k;
1031      }
1032   }
1033   minSum = 2000; /* big enough */
1034   maxSum = -1; /* small enough */
1035   for (; k < N_TEXELS; k++) {
1036      GLint sum = 0;
1037      for (i = 0; i < n_comp; i++) {
1038         sum += input[k][i];
1039      }
1040      if (minSum > sum) {
1041         minSum = sum;
1042         minColR = k;
1043      }
1044      if (maxSum < sum) {
1045         maxSum = sum;
1046         maxColR = k;
1047      }
1048   }
1049#else
1050   GLint minVal;
1051   GLint maxVal;
1052   GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1053   GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1054
1055   /* Scan the channel with max variance for lo & hi
1056    * and use those as the two representative colors.
1057    */
1058   minVal = 2000; /* big enough */
1059   maxVal = -1; /* small enough */
1060   for (k = 0; k < N_TEXELS / 2; k++) {
1061      GLint t = input[k][maxVarL];
1062      if (minVal > t) {
1063         minVal = t;
1064         minColL = k;
1065      }
1066      if (maxVal < t) {
1067         maxVal = t;
1068         maxColL = k;
1069      }
1070   }
1071   minVal = 2000; /* big enough */
1072   maxVal = -1; /* small enough */
1073   for (; k < N_TEXELS; k++) {
1074      GLint t = input[k][maxVarR];
1075      if (minVal > t) {
1076         minVal = t;
1077         minColR = k;
1078      }
1079      if (maxVal < t) {
1080         maxVal = t;
1081         maxColR = k;
1082      }
1083   }
1084#endif
1085
1086   /* left microtile */
1087   cc[0] = 0;
1088   for (i = 0; i < n_comp; i++) {
1089      vec[0][i] = input[minColL][i];
1090      vec[1][i] = input[maxColL][i];
1091   }
1092   if (minColL != maxColL) {
1093      /* compute interpolation vector */
1094      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1095
1096      /* add in texels */
1097      lolo = 0;
1098      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1099         GLint texel;
1100         /* interpolate color */
1101         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1102         /* add in texel */
1103         lolo <<= 2;
1104         lolo |= texel;
1105      }
1106
1107      /* funky encoding for LSB of green */
1108      if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1109         for (i = 0; i < n_comp; i++) {
1110            vec[1][i] = input[minColL][i];
1111            vec[0][i] = input[maxColL][i];
1112         }
1113         lolo = ~lolo;
1114      }
1115
1116      cc[0] = lolo;
1117   }
1118
1119   /* right microtile */
1120   cc[1] = 0;
1121   for (i = 0; i < n_comp; i++) {
1122      vec[2][i] = input[minColR][i];
1123      vec[3][i] = input[maxColR][i];
1124   }
1125   if (minColR != maxColR) {
1126      /* compute interpolation vector */
1127      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1128
1129      /* add in texels */
1130      lohi = 0;
1131      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1132         GLint texel;
1133         /* interpolate color */
1134         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1135         /* add in texel */
1136         lohi <<= 2;
1137         lohi |= texel;
1138      }
1139
1140      /* funky encoding for LSB of green */
1141      if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1142         for (i = 0; i < n_comp; i++) {
1143            vec[3][i] = input[minColR][i];
1144            vec[2][i] = input[maxColR][i];
1145         }
1146         lohi = ~lohi;
1147      }
1148
1149      cc[1] = lohi;
1150   }
1151
1152   FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1153   for (j = 2 * 2 - 1; j >= 0; j--) {
1154      for (i = 0; i < n_comp; i++) {
1155         /* add in colors */
1156         FX64_SHL(hi, 5);
1157         FX64_OR32(hi, vec[j][i] >> 3);
1158      }
1159   }
1160   ((Fx64 *)cc)[1] = hi;
1161}
1162
1163
1164static void
1165fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1166{
1167   GLint trualpha;
1168   GLubyte reord[N_TEXELS][MAX_COMP];
1169
1170   GLubyte input[N_TEXELS][MAX_COMP];
1171   GLint i, k, l;
1172
1173   if (comps == 3) {
1174      /* make the whole block opaque */
1175      memset(input, -1, sizeof(input));
1176   }
1177
1178   /* 8 texels each line */
1179   for (l = 0; l < 4; l++) {
1180      for (k = 0; k < 4; k++) {
1181         for (i = 0; i < comps; i++) {
1182            input[k + l * 4][i] = *lines[l]++;
1183         }
1184      }
1185      for (; k < 8; k++) {
1186         for (i = 0; i < comps; i++) {
1187            input[k + l * 4 + 12][i] = *lines[l]++;
1188         }
1189      }
1190   }
1191
1192   /* block layout:
1193    * 00, 01, 02, 03, 08, 09, 0a, 0b
1194    * 10, 11, 12, 13, 18, 19, 1a, 1b
1195    * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1196    * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1197    */
1198
1199   /* [dBorca]
1200    * stupidity flows forth from this
1201    */
1202   l = N_TEXELS;
1203   trualpha = 0;
1204   if (comps == 4) {
1205      /* skip all transparent black texels */
1206      l = 0;
1207      for (k = 0; k < N_TEXELS; k++) {
1208         /* test all components against 0 */
1209         if (!ISTBLACK(input[k])) {
1210            /* texel is not transparent black */
1211            COPY_4UBV(reord[l], input[k]);
1212            if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1213               /* non-opaque texel */
1214               trualpha = !0;
1215            }
1216            l++;
1217         }
1218      }
1219   }
1220
1221#if 0
1222   if (trualpha) {
1223      fxt1_quantize_ALPHA0(cc, input, reord, l);
1224   } else if (l == 0) {
1225      cc[0] = cc[1] = cc[2] = -1;
1226      cc[3] = 0;
1227   } else if (l < N_TEXELS) {
1228      fxt1_quantize_HI(cc, input, reord, l);
1229   } else {
1230      fxt1_quantize_CHROMA(cc, input);
1231   }
1232   (void)fxt1_quantize_ALPHA1;
1233   (void)fxt1_quantize_MIXED1;
1234   (void)fxt1_quantize_MIXED0;
1235#else
1236   if (trualpha) {
1237      fxt1_quantize_ALPHA1(cc, input);
1238   } else if (l == 0) {
1239      cc[0] = cc[1] = cc[2] = ~0u;
1240      cc[3] = 0;
1241   } else if (l < N_TEXELS) {
1242      fxt1_quantize_MIXED1(cc, input);
1243   } else {
1244      fxt1_quantize_MIXED0(cc, input);
1245   }
1246   (void)fxt1_quantize_ALPHA0;
1247   (void)fxt1_quantize_HI;
1248   (void)fxt1_quantize_CHROMA;
1249#endif
1250}
1251
1252
1253
1254/**
1255 * Upscale an image by replication, not (typical) stretching.
1256 * We use this when the image width or height is less than a
1257 * certain size (4, 8) and we need to upscale an image.
1258 */
1259static void
1260upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1261                   GLsizei outWidth, GLsizei outHeight,
1262                   GLint comps, const GLubyte *src, GLint srcRowStride,
1263                   GLubyte *dest )
1264{
1265   GLint i, j, k;
1266
1267   assert(outWidth >= inWidth);
1268   assert(outHeight >= inHeight);
1269#if 0
1270   assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1271   assert((outWidth & 3) == 0);
1272   assert((outHeight & 3) == 0);
1273#endif
1274
1275   for (i = 0; i < outHeight; i++) {
1276      const GLint ii = i % inHeight;
1277      for (j = 0; j < outWidth; j++) {
1278         const GLint jj = j % inWidth;
1279         for (k = 0; k < comps; k++) {
1280            dest[(i * outWidth + j) * comps + k]
1281               = src[ii * srcRowStride + jj * comps + k];
1282         }
1283      }
1284   }
1285}
1286
1287
1288static void
1289fxt1_encode (GLuint width, GLuint height, GLint comps,
1290             const void *source, GLint srcRowStride,
1291             void *dest, GLint destRowStride)
1292{
1293   GLuint x, y;
1294   const GLubyte *data;
1295   GLuint *encoded = (GLuint *)dest;
1296   void *newSource = NULL;
1297
1298   assert(comps == 3 || comps == 4);
1299
1300   /* Replicate image if width is not M8 or height is not M4 */
1301   if ((width & 7) | (height & 3)) {
1302      GLint newWidth = (width + 7) & ~7;
1303      GLint newHeight = (height + 3) & ~3;
1304      newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1305      if (!newSource) {
1306         GET_CURRENT_CONTEXT(ctx);
1307         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1308         goto cleanUp;
1309      }
1310      upscale_teximage2d(width, height, newWidth, newHeight,
1311                         comps, (const GLubyte *) source,
1312                         srcRowStride, (GLubyte *) newSource);
1313      source = newSource;
1314      width = newWidth;
1315      height = newHeight;
1316      srcRowStride = comps * newWidth;
1317   }
1318
1319   data = (const GLubyte *) source;
1320   destRowStride = (destRowStride - width * 2) / 4;
1321   for (y = 0; y < height; y += 4) {
1322      GLuint offs = 0 + (y + 0) * srcRowStride;
1323      for (x = 0; x < width; x += 8) {
1324         const GLubyte *lines[4];
1325         lines[0] = &data[offs];
1326         lines[1] = lines[0] + srcRowStride;
1327         lines[2] = lines[1] + srcRowStride;
1328         lines[3] = lines[2] + srcRowStride;
1329         offs += 8 * comps;
1330         fxt1_quantize(encoded, lines, comps);
1331         /* 128 bits per 8x4 block */
1332         encoded += 4;
1333      }
1334      encoded += destRowStride;
1335   }
1336
1337 cleanUp:
1338   free(newSource);
1339}
1340
1341
1342/***************************************************************************\
1343 * FXT1 decoder
1344 *
1345 * The decoder is based on GL_3DFX_texture_compression_FXT1
1346 * specification and serves as a concept for the encoder.
1347\***************************************************************************/
1348
1349
1350/* lookup table for scaling 5 bit colors up to 8 bits */
1351static const GLubyte _rgb_scale_5[] = {
1352   0,   8,   16,  25,  33,  41,  49,  58,
1353   66,  74,  82,  90,  99,  107, 115, 123,
1354   132, 140, 148, 156, 165, 173, 181, 189,
1355   197, 206, 214, 222, 230, 239, 247, 255
1356};
1357
1358/* lookup table for scaling 6 bit colors up to 8 bits */
1359static const GLubyte _rgb_scale_6[] = {
1360   0,   4,   8,   12,  16,  20,  24,  28,
1361   32,  36,  40,  45,  49,  53,  57,  61,
1362   65,  69,  73,  77,  81,  85,  89,  93,
1363   97,  101, 105, 109, 113, 117, 121, 125,
1364   130, 134, 138, 142, 146, 150, 154, 158,
1365   162, 166, 170, 174, 178, 182, 186, 190,
1366   194, 198, 202, 206, 210, 215, 219, 223,
1367   227, 231, 235, 239, 243, 247, 251, 255
1368};
1369
1370
1371#define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1372#define UP5(c) _rgb_scale_5[(c) & 31]
1373#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1374#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1375
1376
1377static void
1378fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1379{
1380   const GLuint *cc;
1381
1382   t *= 3;
1383   cc = (const GLuint *)(code + t / 8);
1384   t = (cc[0] >> (t & 7)) & 7;
1385
1386   if (t == 7) {
1387      rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1388   } else {
1389      GLubyte r, g, b;
1390      cc = (const GLuint *)(code + 12);
1391      if (t == 0) {
1392         b = UP5(CC_SEL(cc, 0));
1393         g = UP5(CC_SEL(cc, 5));
1394         r = UP5(CC_SEL(cc, 10));
1395      } else if (t == 6) {
1396         b = UP5(CC_SEL(cc, 15));
1397         g = UP5(CC_SEL(cc, 20));
1398         r = UP5(CC_SEL(cc, 25));
1399      } else {
1400         b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1401         g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1402         r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1403      }
1404      rgba[RCOMP] = r;
1405      rgba[GCOMP] = g;
1406      rgba[BCOMP] = b;
1407      rgba[ACOMP] = 255;
1408   }
1409}
1410
1411
1412static void
1413fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1414{
1415   const GLuint *cc;
1416   GLuint kk;
1417
1418   cc = (const GLuint *)code;
1419   if (t & 16) {
1420      cc++;
1421      t &= 15;
1422   }
1423   t = (cc[0] >> (t * 2)) & 3;
1424
1425   t *= 15;
1426   cc = (const GLuint *)(code + 8 + t / 8);
1427   kk = cc[0] >> (t & 7);
1428   rgba[BCOMP] = UP5(kk);
1429   rgba[GCOMP] = UP5(kk >> 5);
1430   rgba[RCOMP] = UP5(kk >> 10);
1431   rgba[ACOMP] = 255;
1432}
1433
1434
1435static void
1436fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1437{
1438   const GLuint *cc;
1439   GLuint col[2][3];
1440   GLint glsb, selb;
1441
1442   cc = (const GLuint *)code;
1443   if (t & 16) {
1444      t &= 15;
1445      t = (cc[1] >> (t * 2)) & 3;
1446      /* col 2 */
1447      col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1448      col[0][GCOMP] = CC_SEL(cc, 99);
1449      col[0][RCOMP] = CC_SEL(cc, 104);
1450      /* col 3 */
1451      col[1][BCOMP] = CC_SEL(cc, 109);
1452      col[1][GCOMP] = CC_SEL(cc, 114);
1453      col[1][RCOMP] = CC_SEL(cc, 119);
1454      glsb = CC_SEL(cc, 126);
1455      selb = CC_SEL(cc, 33);
1456   } else {
1457      t = (cc[0] >> (t * 2)) & 3;
1458      /* col 0 */
1459      col[0][BCOMP] = CC_SEL(cc, 64);
1460      col[0][GCOMP] = CC_SEL(cc, 69);
1461      col[0][RCOMP] = CC_SEL(cc, 74);
1462      /* col 1 */
1463      col[1][BCOMP] = CC_SEL(cc, 79);
1464      col[1][GCOMP] = CC_SEL(cc, 84);
1465      col[1][RCOMP] = CC_SEL(cc, 89);
1466      glsb = CC_SEL(cc, 125);
1467      selb = CC_SEL(cc, 1);
1468   }
1469
1470   if (CC_SEL(cc, 124) & 1) {
1471      /* alpha[0] == 1 */
1472
1473      if (t == 3) {
1474         /* zero */
1475         rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1476      } else {
1477         GLubyte r, g, b;
1478         if (t == 0) {
1479            b = UP5(col[0][BCOMP]);
1480            g = UP5(col[0][GCOMP]);
1481            r = UP5(col[0][RCOMP]);
1482         } else if (t == 2) {
1483            b = UP5(col[1][BCOMP]);
1484            g = UP6(col[1][GCOMP], glsb);
1485            r = UP5(col[1][RCOMP]);
1486         } else {
1487            b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1488            g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1489            r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1490         }
1491         rgba[RCOMP] = r;
1492         rgba[GCOMP] = g;
1493         rgba[BCOMP] = b;
1494         rgba[ACOMP] = 255;
1495      }
1496   } else {
1497      /* alpha[0] == 0 */
1498      GLubyte r, g, b;
1499      if (t == 0) {
1500         b = UP5(col[0][BCOMP]);
1501         g = UP6(col[0][GCOMP], glsb ^ selb);
1502         r = UP5(col[0][RCOMP]);
1503      } else if (t == 3) {
1504         b = UP5(col[1][BCOMP]);
1505         g = UP6(col[1][GCOMP], glsb);
1506         r = UP5(col[1][RCOMP]);
1507      } else {
1508         b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1509         g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1510                        UP6(col[1][GCOMP], glsb));
1511         r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1512      }
1513      rgba[RCOMP] = r;
1514      rgba[GCOMP] = g;
1515      rgba[BCOMP] = b;
1516      rgba[ACOMP] = 255;
1517   }
1518}
1519
1520
1521static void
1522fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1523{
1524   const GLuint *cc;
1525   GLubyte r, g, b, a;
1526
1527   cc = (const GLuint *)code;
1528   if (CC_SEL(cc, 124) & 1) {
1529      /* lerp == 1 */
1530      GLuint col0[4];
1531
1532      if (t & 16) {
1533         t &= 15;
1534         t = (cc[1] >> (t * 2)) & 3;
1535         /* col 2 */
1536         col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1537         col0[GCOMP] = CC_SEL(cc, 99);
1538         col0[RCOMP] = CC_SEL(cc, 104);
1539         col0[ACOMP] = CC_SEL(cc, 119);
1540      } else {
1541         t = (cc[0] >> (t * 2)) & 3;
1542         /* col 0 */
1543         col0[BCOMP] = CC_SEL(cc, 64);
1544         col0[GCOMP] = CC_SEL(cc, 69);
1545         col0[RCOMP] = CC_SEL(cc, 74);
1546         col0[ACOMP] = CC_SEL(cc, 109);
1547      }
1548
1549      if (t == 0) {
1550         b = UP5(col0[BCOMP]);
1551         g = UP5(col0[GCOMP]);
1552         r = UP5(col0[RCOMP]);
1553         a = UP5(col0[ACOMP]);
1554      } else if (t == 3) {
1555         b = UP5(CC_SEL(cc, 79));
1556         g = UP5(CC_SEL(cc, 84));
1557         r = UP5(CC_SEL(cc, 89));
1558         a = UP5(CC_SEL(cc, 114));
1559      } else {
1560         b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1561         g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1562         r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1563         a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1564      }
1565   } else {
1566      /* lerp == 0 */
1567
1568      if (t & 16) {
1569         cc++;
1570         t &= 15;
1571      }
1572      t = (cc[0] >> (t * 2)) & 3;
1573
1574      if (t == 3) {
1575         /* zero */
1576         r = g = b = a = 0;
1577      } else {
1578         GLuint kk;
1579         cc = (const GLuint *)code;
1580         a = UP5(cc[3] >> (t * 5 + 13));
1581         t *= 15;
1582         cc = (const GLuint *)(code + 8 + t / 8);
1583         kk = cc[0] >> (t & 7);
1584         b = UP5(kk);
1585         g = UP5(kk >> 5);
1586         r = UP5(kk >> 10);
1587      }
1588   }
1589   rgba[RCOMP] = r;
1590   rgba[GCOMP] = g;
1591   rgba[BCOMP] = b;
1592   rgba[ACOMP] = a;
1593}
1594
1595
1596static void
1597fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1598               GLint i, GLint j, GLubyte *rgba)
1599{
1600   static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1601      fxt1_decode_1HI,     /* cc-high   = "00?" */
1602      fxt1_decode_1HI,     /* cc-high   = "00?" */
1603      fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1604      fxt1_decode_1ALPHA,  /* alpha     = "011" */
1605      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1606      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1607      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1608      fxt1_decode_1MIXED   /* mixed     = "1??" */
1609   };
1610
1611   const GLubyte *code = (const GLubyte *)texture +
1612                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1613   GLint mode = CC_SEL(code, 125);
1614   GLint t = i & 7;
1615
1616   if (t & 4) {
1617      t += 12;
1618   }
1619   t += (j & 3) * 4;
1620
1621   decode_1[mode](code, t, rgba);
1622}
1623
1624
1625
1626
1627static void
1628fetch_rgb_fxt1(const GLubyte *map,
1629               GLint rowStride, GLint i, GLint j, GLfloat *texel)
1630{
1631   GLubyte rgba[4];
1632   fxt1_decode_1(map, rowStride, i, j, rgba);
1633   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1634   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1635   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1636   texel[ACOMP] = 1.0F;
1637}
1638
1639
1640static void
1641fetch_rgba_fxt1(const GLubyte *map,
1642                GLint rowStride, GLint i, GLint j, GLfloat *texel)
1643{
1644   GLubyte rgba[4];
1645   fxt1_decode_1(map, rowStride, i, j, rgba);
1646   texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1647   texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1648   texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1649   texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1650}
1651
1652
1653compressed_fetch_func
1654_mesa_get_fxt_fetch_func(mesa_format format)
1655{
1656   switch (format) {
1657   case MESA_FORMAT_RGB_FXT1:
1658      return fetch_rgb_fxt1;
1659   case MESA_FORMAT_RGBA_FXT1:
1660      return fetch_rgba_fxt1;
1661   default:
1662      return NULL;
1663   }
1664}
1665