1/*
2 * Copyright (C) 2011 Red Hat Inc.
3 *
4 * block compression parts are:
5 * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 *
26 * Author:
27 *    Dave Airlie
28 */
29
30/* included by texcompress_rgtc to define byte/ubyte compressors */
31
32static void TAG(fetch_texel_rgtc)(unsigned srcRowStride, const TYPE *pixdata,
33				  unsigned i, unsigned j, TYPE *value, unsigned comps)
34{
35   TYPE decode;
36   const TYPE *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8 * comps);
37   const TYPE alpha0 = blksrc[0];
38   const TYPE alpha1 = blksrc[1];
39   const char bit_pos = ((j&3) * 4 + (i&3)) * 3;
40   const TYPE acodelow = blksrc[2 + bit_pos / 8];
41   const TYPE acodehigh = (3 + bit_pos / 8) < 8 ? blksrc[3 + bit_pos / 8] : 0;
42   const TYPE code = (acodelow >> (bit_pos & 0x7) |
43      (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
44
45   if (code == 0)
46      decode = alpha0;
47   else if (code == 1)
48      decode = alpha1;
49   else if (alpha0 > alpha1)
50      decode = ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7);
51   else if (code < 6)
52      decode = ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5);
53   else if (code == 6)
54      decode = T_MIN;
55   else
56      decode = T_MAX;
57
58   *value = decode;
59}
60
61static void TAG(write_rgtc_encoded_channel)(TYPE *blkaddr,
62					    TYPE alphabase1,
63					    TYPE alphabase2,
64					    TYPE alphaenc[16])
65{
66   *blkaddr++ = alphabase1;
67   *blkaddr++ = alphabase2;
68   *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
69   *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
70   *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
71   *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
72   *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
73   *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
74}
75
76static void TAG(encode_rgtc_ubyte)(TYPE *blkaddr, TYPE srccolors[4][4],
77			     int numxpixels, int numypixels)
78{
79   TYPE alphabase[2], alphause[2];
80   short alphatest[2] = { 0 };
81   unsigned int alphablockerror1, alphablockerror2, alphablockerror3;
82   TYPE i, j, aindex, acutValues[7];
83   TYPE alphaenc1[16], alphaenc2[16], alphaenc3[16];
84   int alphaabsmin = 0, alphaabsmax = 0;
85   short alphadist;
86
87   /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
88   alphabase[0] = T_MAX; alphabase[1] = T_MIN;
89   for (j = 0; j < numypixels; j++) {
90      for (i = 0; i < numxpixels; i++) {
91	 if (srccolors[j][i] == T_MIN)
92            alphaabsmin = 1;
93         else if (srccolors[j][i] == T_MAX)
94            alphaabsmax = 1;
95         else {
96            if (srccolors[j][i] > alphabase[1])
97               alphabase[1] = srccolors[j][i];
98            if (srccolors[j][i] < alphabase[0])
99               alphabase[0] = srccolors[j][i];
100         }
101      }
102   }
103
104
105   if (((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax))
106       || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax)) { /* one color, either max or min */
107      /* shortcut here since it is a very common case (and also avoids later problems) */
108      /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
109
110      *blkaddr++ = srccolors[0][0];
111      blkaddr++;
112      *blkaddr++ = 0;
113      *blkaddr++ = 0;
114      *blkaddr++ = 0;
115      *blkaddr++ = 0;
116      *blkaddr++ = 0;
117      *blkaddr++ = 0;
118#if RGTC_DEBUG
119      fprintf(stderr, "enc0 used\n");
120#endif
121      return;
122   }
123
124   /* find best encoding for alpha0 > alpha1 */
125   /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
126   alphablockerror1 = 0x0;
127   alphablockerror2 = 0xffffffff;
128   alphablockerror3 = 0xffffffff;
129   if (alphaabsmin) alphause[0] = T_MIN;
130   else alphause[0] = alphabase[0];
131   if (alphaabsmax) alphause[1] = T_MAX;
132   else alphause[1] = alphabase[1];
133   /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
134   for (aindex = 0; aindex < 7; aindex++) {
135      /* don't forget here is always rounded down */
136      acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
137   }
138
139   for (j = 0; j < numypixels; j++) {
140      for (i = 0; i < numxpixels; i++) {
141         /* maybe it's overkill to have the most complicated calculation just for the error
142            calculation which we only need to figure out if encoding1 or encoding2 is better... */
143         if (srccolors[j][i] > acutValues[0]) {
144            alphaenc1[4*j + i] = 0;
145            alphadist = srccolors[j][i] - alphause[1];
146         }
147         else if (srccolors[j][i] > acutValues[1]) {
148            alphaenc1[4*j + i] = 2;
149            alphadist = srccolors[j][i] - (alphause[1] * 6 + alphause[0] * 1) / 7;
150         }
151         else if (srccolors[j][i] > acutValues[2]) {
152            alphaenc1[4*j + i] = 3;
153            alphadist = srccolors[j][i] - (alphause[1] * 5 + alphause[0] * 2) / 7;
154         }
155         else if (srccolors[j][i] > acutValues[3]) {
156            alphaenc1[4*j + i] = 4;
157            alphadist = srccolors[j][i] - (alphause[1] * 4 + alphause[0] * 3) / 7;
158         }
159         else if (srccolors[j][i] > acutValues[4]) {
160            alphaenc1[4*j + i] = 5;
161            alphadist = srccolors[j][i] - (alphause[1] * 3 + alphause[0] * 4) / 7;
162         }
163         else if (srccolors[j][i] > acutValues[5]) {
164            alphaenc1[4*j + i] = 6;
165            alphadist = srccolors[j][i] - (alphause[1] * 2 + alphause[0] * 5) / 7;
166         }
167         else if (srccolors[j][i] > acutValues[6]) {
168            alphaenc1[4*j + i] = 7;
169            alphadist = srccolors[j][i] - (alphause[1] * 1 + alphause[0] * 6) / 7;
170         }
171         else {
172            alphaenc1[4*j + i] = 1;
173            alphadist = srccolors[j][i] - alphause[0];
174         }
175         alphablockerror1 += alphadist * alphadist;
176      }
177   }
178
179#if RGTC_DEBUG
180   for (i = 0; i < 16; i++) {
181      fprintf(stderr, "%d ", alphaenc1[i]);
182   }
183   fprintf(stderr, "cutVals ");
184   for (i = 0; i < 7; i++) {
185      fprintf(stderr, "%d ", acutValues[i]);
186   }
187   fprintf(stderr, "srcVals ");
188   for (j = 0; j < numypixels; j++) {
189      for (i = 0; i < numxpixels; i++) {
190	 fprintf(stderr, "%d ", srccolors[j][i]);
191      }
192   }
193   fprintf(stderr, "\n");
194#endif
195
196   /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
197      are false but try it anyway */
198   if (alphablockerror1 >= 32) {
199
200      /* don't bother if encoding is already very good, this condition should also imply
201      we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
202      alphablockerror2 = 0;
203      for (aindex = 0; aindex < 5; aindex++) {
204         /* don't forget here is always rounded down */
205         acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
206      }
207      for (j = 0; j < numypixels; j++) {
208         for (i = 0; i < numxpixels; i++) {
209             /* maybe it's overkill to have the most complicated calculation just for the error
210               calculation which we only need to figure out if encoding1 or encoding2 is better... */
211            if (srccolors[j][i] == T_MIN) {
212               alphaenc2[4*j + i] = 6;
213               alphadist = 0;
214            }
215            else if (srccolors[j][i] == T_MAX) {
216               alphaenc2[4*j + i] = 7;
217               alphadist = 0;
218            }
219            else if (srccolors[j][i] <= acutValues[0]) {
220               alphaenc2[4*j + i] = 0;
221               alphadist = srccolors[j][i] - alphabase[0];
222            }
223            else if (srccolors[j][i] <= acutValues[1]) {
224               alphaenc2[4*j + i] = 2;
225               alphadist = srccolors[j][i] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
226            }
227            else if (srccolors[j][i] <= acutValues[2]) {
228               alphaenc2[4*j + i] = 3;
229               alphadist = srccolors[j][i] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
230            }
231            else if (srccolors[j][i] <= acutValues[3]) {
232               alphaenc2[4*j + i] = 4;
233               alphadist = srccolors[j][i] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
234            }
235            else if (srccolors[j][i] <= acutValues[4]) {
236               alphaenc2[4*j + i] = 5;
237               alphadist = srccolors[j][i] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
238            }
239            else {
240               alphaenc2[4*j + i] = 1;
241               alphadist = srccolors[j][i] - alphabase[1];
242            }
243            alphablockerror2 += alphadist * alphadist;
244         }
245      }
246
247
248      /* skip this if the error is already very small
249         this encoding is MUCH better on average than #2 though, but expensive! */
250      if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
251         short blockerrlin1 = 0;
252         short blockerrlin2 = 0;
253         TYPE nralphainrangelow = 0;
254         TYPE nralphainrangehigh = 0;
255         alphatest[0] = T_MAX;
256         alphatest[1] = T_MIN;
257         /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
258         for (j = 0; j < numypixels; j++) {
259            for (i = 0; i < numxpixels; i++) {
260               if ((srccolors[j][i] > alphatest[1]) && (srccolors[j][i] < (T_MAX -(alphabase[1] - alphabase[0]) / 28)))
261                  alphatest[1] = srccolors[j][i];
262               if ((srccolors[j][i] < alphatest[0]) && (srccolors[j][i] > (alphabase[1] - alphabase[0]) / 28))
263                  alphatest[0] = srccolors[j][i];
264            }
265         }
266          /* shouldn't happen too often, don't really care about those degenerated cases */
267          if (alphatest[1] <= alphatest[0]) {
268             alphatest[0] = T_MIN+1;
269             alphatest[1] = T_MAX-1;
270         }
271         for (aindex = 0; aindex < 5; aindex++) {
272         /* don't forget here is always rounded down */
273            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
274         }
275
276         /* find the "average" difference between the alpha values and the next encoded value.
277            This is then used to calculate new base values.
278            Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
279            since they will see more improvement, and also because the values in the middle are somewhat
280            likely to get no improvement at all (because the base values might move in different directions)?
281            OTOH it would mean the values in the middle are even less likely to get an improvement
282         */
283         for (j = 0; j < numypixels; j++) {
284            for (i = 0; i < numxpixels; i++) {
285               if (srccolors[j][i] <= alphatest[0] / 2) {
286               }
287               else if (srccolors[j][i] > ((T_MAX + alphatest[1]) / 2)) {
288               }
289               else if (srccolors[j][i] <= acutValues[0]) {
290                  blockerrlin1 += (srccolors[j][i] - alphatest[0]);
291                  nralphainrangelow += 1;
292               }
293               else if (srccolors[j][i] <= acutValues[1]) {
294                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
295                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
296                  nralphainrangelow += 1;
297                  nralphainrangehigh += 1;
298               }
299               else if (srccolors[j][i] <= acutValues[2]) {
300                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
301                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
302                  nralphainrangelow += 1;
303                  nralphainrangehigh += 1;
304               }
305               else if (srccolors[j][i] <= acutValues[3]) {
306                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
307                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
308                  nralphainrangelow += 1;
309                  nralphainrangehigh += 1;
310               }
311               else if (srccolors[j][i] <= acutValues[4]) {
312                  blockerrlin1 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
313                  blockerrlin2 += (srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
314                  nralphainrangelow += 1;
315                  nralphainrangehigh += 1;
316                  }
317               else {
318                  blockerrlin2 += (srccolors[j][i] - alphatest[1]);
319                  nralphainrangehigh += 1;
320               }
321            }
322         }
323         /* shouldn't happen often, needed to avoid div by zero */
324         if (nralphainrangelow == 0) nralphainrangelow = 1;
325         if (nralphainrangehigh == 0) nralphainrangehigh = 1;
326         alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
327#if RGTC_DEBUG
328         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
329         fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);
330#endif
331         /* again shouldn't really happen often... */
332         if (alphatest[0] < T_MIN) {
333            alphatest[0] = T_MIN;
334         }
335         alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
336         if (alphatest[1] > T_MAX) {
337            alphatest[1] = T_MAX;
338         }
339
340         alphablockerror3 = 0;
341         for (aindex = 0; aindex < 5; aindex++) {
342         /* don't forget here is always rounded down */
343            acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
344         }
345         for (j = 0; j < numypixels; j++) {
346            for (i = 0; i < numxpixels; i++) {
347                /* maybe it's overkill to have the most complicated calculation just for the error
348                  calculation which we only need to figure out if encoding1 or encoding2 is better... */
349               if (srccolors[j][i] <= alphatest[0] / 2) {
350                  alphaenc3[4*j + i] = 6;
351                  alphadist = srccolors[j][i];
352               }
353               else if (srccolors[j][i] > ((T_MAX + alphatest[1]) / 2)) {
354                  alphaenc3[4*j + i] = 7;
355                  alphadist = T_MAX - srccolors[j][i];
356               }
357               else if (srccolors[j][i] <= acutValues[0]) {
358                  alphaenc3[4*j + i] = 0;
359                  alphadist = srccolors[j][i] - alphatest[0];
360               }
361               else if (srccolors[j][i] <= acutValues[1]) {
362                 alphaenc3[4*j + i] = 2;
363                 alphadist = srccolors[j][i] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
364               }
365               else if (srccolors[j][i] <= acutValues[2]) {
366                  alphaenc3[4*j + i] = 3;
367                  alphadist = srccolors[j][i] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
368               }
369               else if (srccolors[j][i] <= acutValues[3]) {
370                  alphaenc3[4*j + i] = 4;
371                  alphadist = srccolors[j][i] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
372               }
373               else if (srccolors[j][i] <= acutValues[4]) {
374                  alphaenc3[4*j + i] = 5;
375                  alphadist = srccolors[j][i] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
376               }
377               else {
378                  alphaenc3[4*j + i] = 1;
379                  alphadist = srccolors[j][i] - alphatest[1];
380               }
381               alphablockerror3 += alphadist * alphadist;
382            }
383         }
384      }
385   }
386
387  /* write the alpha values and encoding back. */
388   if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
389#if RGTC_DEBUG
390      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);
391      fprintf(stderr,"w1: min %d max %d au0 %d au1 %d\n",
392	      T_MIN, T_MAX,
393	      alphause[1], alphause[0]);
394#endif
395
396      TAG(write_rgtc_encoded_channel)( blkaddr, alphause[1], alphause[0], alphaenc1 );
397   }
398   else if (alphablockerror2 <= alphablockerror3) {
399#if RGTC_DEBUG
400      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);
401      fprintf(stderr,"w2: min %d max %d au0 %d au1 %d\n",
402	      T_MIN, T_MAX,
403	      alphabase[0], alphabase[1]);
404#endif
405
406      TAG(write_rgtc_encoded_channel)( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
407   }
408   else {
409#if RGTC_DEBUG
410      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);
411      fprintf(stderr,"w3: min %d max %d au0 %d au1 %d\n",
412	      T_MIN, T_MAX,
413	      alphatest[0], alphatest[1]);
414#endif
415
416      TAG(write_rgtc_encoded_channel)( blkaddr, (TYPE)alphatest[0], (TYPE)alphatest[1], alphaenc3 );
417   }
418}
419