1/* libs/opengles/dxt.cpp
2**
3** Copyright 2007, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#define TIMING 0
19
20#if TIMING
21#include <sys/time.h> // for optimization timing
22#include <stdio.h>
23#include <stdlib.h>
24#endif
25
26#include <GLES/gl.h>
27#include <utils/Endian.h>
28
29#include "context.h"
30
31#define TIMING 0
32
33namespace android {
34
35static uint8_t avg23tab[64*64];
36static volatile int tables_initialized = 0;
37
38// Definitions below are equivalent to these over the valid range of arguments
39//  #define div5(x) ((x)/5)
40//  #define div7(x) ((x)/7)
41
42// Use fixed-point to divide by 5 and 7
43// 3277 = 2^14/5 + 1
44// 2341 = 2^14/7 + 1
45#define div5(x) (((x)*3277) >> 14)
46#define div7(x) (((x)*2341) >> 14)
47
48// Table with entry [a << 6 | b] = (2*a + b)/3 for 0 <= a,b < 64
49#define avg23(x0,x1) avg23tab[((x0) << 6) | (x1)]
50
51// Extract 5/6/5 RGB
52#define red(x)   (((x) >> 11) & 0x1f)
53#define green(x) (((x) >>  5) & 0x3f)
54#define blue(x)  ( (x)        & 0x1f)
55
56/*
57 * Convert 5/6/5 RGB (as 3 ints) to 8/8/8
58 *
59 * Operation count: 8 <<, 0 &, 5 |
60 */
61inline static int rgb565SepTo888(int r, int g, int b)
62
63{
64    return ((((r << 3) | (r >> 2)) << 16) |
65            (((g << 2) | (g >> 4)) <<  8) |
66             ((b << 3) | (b >> 2)));
67}
68
69/*
70 * Convert 5/6/5 RGB (as a single 16-bit word) to 8/8/8
71 *
72 *                   r4r3r2r1 r0g5g4g3 g2g1g0b4 b3b2b1b0   rgb
73 *            r4r3r2 r1r0g5g4 g3g2g1g0 b4b3b2b1 b0 0 0 0   rgb << 3
74 * r4r3r2r1 r0r4r3r2 g5g4g3g2 g1g0g5g4 b4b3b2b1 b0b4b3b2   desired result
75 *
76 * Construct the 24-bit RGB word as:
77 *
78 * r4r3r2r1 r0------ -------- -------- -------- --------  (rgb << 8) & 0xf80000
79 *            r4r3r2 -------- -------- -------- --------  (rgb << 3) & 0x070000
80 *                   g5g4g3g2 g1g0---- -------- --------  (rgb << 5) & 0x00fc00
81 *                                g5g4 -------- --------  (rgb >> 1) & 0x000300
82 *                                     b4b3b2b1 b0------  (rgb << 3) & 0x0000f8
83 *                                                b4b3b2  (rgb >> 2) & 0x000007
84 *
85 * Operation count: 5 <<, 6 &, 5 | (n.b. rgb >> 3 is used twice)
86 */
87inline static int rgb565To888(int rgb)
88
89{
90    int rgb3 = rgb >> 3;
91    return (((rgb << 8) & 0xf80000) |
92            ( rgb3      & 0x070000) |
93            ((rgb << 5) & 0x00fc00) |
94            ((rgb >> 1) & 0x000300) |
95            ( rgb3      & 0x0000f8) |
96            ((rgb >> 2) & 0x000007));
97}
98
99#if __BYTE_ORDER == __BIG_ENDIAN
100static uint32_t swap(uint32_t x) {
101    int b0 = (x >> 24) & 0xff;
102    int b1 = (x >> 16) & 0xff;
103    int b2 = (x >>  8) & 0xff;
104    int b3 = (x      ) & 0xff;
105
106    return (uint32_t)((b3 << 24) | (b2 << 16) | (b1 << 8) | b0);
107}
108#endif
109
110static void
111init_tables()
112{
113    if (tables_initialized) {
114        return;
115    }
116
117    for (int i = 0; i < 64; i++) {
118        for (int j = 0; j < 64; j++) {
119            int avg = (2*i + j)/3;
120            avg23tab[(i << 6) | j] = avg;
121        }
122    }
123
124    asm volatile ("" : : : "memory");
125    tables_initialized = 1;
126}
127
128/*
129 * Utility to scan a DXT1 compressed texture to determine whether it
130 * contains a transparent pixel (color0 < color1, code == 3).  This
131 * may be useful if the application lacks information as to whether
132 * the true format is GL_COMPRESSED_RGB_S3TC_DXT1_EXT or
133 * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT.
134 */
135bool
136DXT1HasAlpha(const GLvoid *data, int width, int height) {
137#if TIMING
138    struct timeval start_t, end_t;
139    struct timezone tz;
140
141    gettimeofday(&start_t, &tz);
142#endif
143
144    bool hasAlpha = false;
145
146    int xblocks = (width + 3)/4;
147    int yblocks = (height + 3)/4;
148    int numblocks = xblocks*yblocks;
149
150    uint32_t const *d32 = (uint32_t *)data;
151    for (int b = 0; b < numblocks; b++) {
152        uint32_t colors = *d32++;
153
154#if __BYTE_ORDER == __BIG_ENDIAN
155        colors = swap(colors);
156#endif
157
158        uint16_t color0 = colors & 0xffff;
159        uint16_t color1 = colors >> 16;
160
161        if (color0 < color1) {
162            // There's no need to endian-swap within 'bits'
163            // since we don't care which pixel is the transparent one
164            uint32_t bits = *d32++;
165
166            // Detect if any (odd, even) pair of bits are '11'
167            //      bits: b31 b30 b29 ... b3 b2 b1 b0
168            // bits >> 1: b31 b31 b30 ... b4 b3 b2 b1
169            //         &: b31 (b31 & b30) (b29 & b28) ... (b2 & b1) (b1 & b0)
170            //  & 0x55..:   0 (b31 & b30)       0     ...     0     (b1 & b0)
171            if (((bits & (bits >> 1)) & 0x55555555) != 0) {
172                hasAlpha = true;
173                goto done;
174            }
175        } else {
176            // Skip 4 bytes
177            ++d32;
178        }
179    }
180
181 done:
182#if TIMING
183    gettimeofday(&end_t, &tz);
184    long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 +
185        (end_t.tv_usec - start_t.tv_usec);
186
187    printf("Scanned w=%d h=%d in %ld usec\n", width, height, usec);
188#endif
189
190    return hasAlpha;
191}
192
193static void
194decodeDXT1(const GLvoid *data, int width, int height,
195           void *surface, int stride,
196           bool hasAlpha)
197
198{
199    init_tables();
200
201    uint32_t const *d32 = (uint32_t *)data;
202
203    // Color table for the current block
204    uint16_t c[4];
205    c[0] = c[1] = c[2] = c[3] = 0;
206
207    // Specified colors from the previous block
208    uint16_t prev_color0 = 0x0000;
209    uint16_t prev_color1 = 0x0000;
210
211    uint16_t* rowPtr = (uint16_t*)surface;
212    for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
213        uint16_t *blockPtr = rowPtr;
214        for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
215            uint32_t colors = *d32++;
216            uint32_t bits = *d32++;
217
218#if __BYTE_ORDER == __BIG_ENDIAN
219            colors = swap(colors);
220            bits = swap(bits);
221#endif
222
223            // Raw colors
224            uint16_t color0 = colors & 0xffff;
225            uint16_t color1 = colors >> 16;
226
227            // If the new block has the same base colors as the
228            // previous one, we don't need to recompute the color
229            // table c[]
230            if (color0 != prev_color0 || color1 != prev_color1) {
231                // Store raw colors for comparison with next block
232                prev_color0 = color0;
233                prev_color1 = color1;
234
235                int r0 =   red(color0);
236                int g0 = green(color0);
237                int b0 =  blue(color0);
238
239                int r1 =   red(color1);
240                int g1 = green(color1);
241                int b1 =  blue(color1);
242
243                if (hasAlpha) {
244                    c[0] = (r0 << 11) | ((g0 >> 1) << 6) | (b0 << 1) | 0x1;
245                    c[1] = (r1 << 11) | ((g1 >> 1) << 6) | (b1 << 1) | 0x1;
246                } else {
247                    c[0] = color0;
248                    c[1] = color1;
249                }
250
251                int r2, g2, b2, r3, g3, b3, a3;
252
253                int bbits = bits >> 1;
254                bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
255                bool has3 = ((bbits &  bits) & 0x55555555) != 0;
256
257                if (has2 || has3) {
258                    if (color0 > color1) {
259                        r2 = avg23(r0, r1);
260                        g2 = avg23(g0, g1);
261                        b2 = avg23(b0, b1);
262
263                        r3 = avg23(r1, r0);
264                        g3 = avg23(g1, g0);
265                        b3 = avg23(b1, b0);
266                        a3 = 1;
267                    } else {
268                        r2 = (r0 + r1) >> 1;
269                        g2 = (g0 + g1) >> 1;
270                        b2 = (b0 + b1) >> 1;
271
272                        r3 = g3 = b3 = a3 = 0;
273                    }
274                    if (hasAlpha) {
275                        c[2] = (r2 << 11) | ((g2 >> 1) << 6) |
276                            (b2 << 1) | 0x1;
277                        c[3] = (r3 << 11) | ((g3 >> 1) << 6) |
278                            (b3 << 1) | a3;
279                    } else {
280                        c[2] = (r2 << 11) | (g2 << 5) | b2;
281                        c[3] = (r3 << 11) | (g3 << 5) | b3;
282                    }
283                }
284            }
285
286            uint16_t* blockRowPtr = blockPtr;
287            for (int y = 0; y < 4; y++, blockRowPtr += stride) {
288                // Don't process rows past the botom
289                if (base_y + y >= height) {
290                    break;
291                }
292
293                int w = min(width - base_x, 4);
294                for (int x = 0; x < w; x++) {
295                    int code = bits & 0x3;
296                    bits >>= 2;
297
298                    blockRowPtr[x] = c[code];
299                }
300            }
301        }
302    }
303}
304
305// Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE
306static void
307decodeDXT3(const GLvoid *data, int width, int height,
308           void *surface, int stride)
309
310{
311    init_tables();
312
313    uint32_t const *d32 = (uint32_t *)data;
314
315    // Specified colors from the previous block
316    uint16_t prev_color0 = 0x0000;
317    uint16_t prev_color1 = 0x0000;
318
319    // Color table for the current block
320    uint32_t c[4];
321    c[0] = c[1] = c[2] = c[3] = 0;
322
323    uint32_t* rowPtr = (uint32_t*)surface;
324    for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
325        uint32_t *blockPtr = rowPtr;
326        for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
327
328#if __BYTE_ORDER == __BIG_ENDIAN
329            uint32_t alphahi = *d32++;
330            uint32_t alphalo = *d32++;
331            alphahi = swap(alphahi);
332            alphalo = swap(alphalo);
333#else
334            uint32_t alphalo = *d32++;
335            uint32_t alphahi = *d32++;
336#endif
337
338            uint32_t colors = *d32++;
339            uint32_t bits = *d32++;
340
341#if __BYTE_ORDER == __BIG_ENDIAN
342            colors = swap(colors);
343            bits = swap(bits);
344#endif
345
346            uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo;
347
348            // Raw colors
349            uint16_t color0 = colors & 0xffff;
350            uint16_t color1 = colors >> 16;
351
352            // If the new block has the same base colors as the
353            // previous one, we don't need to recompute the color
354            // table c[]
355            if (color0 != prev_color0 || color1 != prev_color1) {
356                // Store raw colors for comparison with next block
357                prev_color0 = color0;
358                prev_color1 = color1;
359
360                int bbits = bits >> 1;
361                bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
362                bool has3 = ((bbits &  bits) & 0x55555555) != 0;
363
364                if (has2 || has3) {
365                    int r0 =   red(color0);
366                    int g0 = green(color0);
367                    int b0 =  blue(color0);
368
369                    int r1 =   red(color1);
370                    int g1 = green(color1);
371                    int b1 =  blue(color1);
372
373                    int r2 = avg23(r0, r1);
374                    int g2 = avg23(g0, g1);
375                    int b2 = avg23(b0, b1);
376
377                    int r3 = avg23(r1, r0);
378                    int g3 = avg23(g1, g0);
379                    int b3 = avg23(b1, b0);
380
381                    c[0] = rgb565SepTo888(r0, g0, b0);
382                    c[1] = rgb565SepTo888(r1, g1, b1);
383                    c[2] = rgb565SepTo888(r2, g2, b2);
384                    c[3] = rgb565SepTo888(r3, g3, b3);
385                } else {
386                    // Convert to 8 bits
387                    c[0] = rgb565To888(color0);
388                    c[1] = rgb565To888(color1);
389                }
390            }
391
392            uint32_t* blockRowPtr = blockPtr;
393            for (int y = 0; y < 4; y++, blockRowPtr += stride) {
394                // Don't process rows past the botom
395                if (base_y + y >= height) {
396                    break;
397                }
398
399                int w = min(width - base_x, 4);
400                for (int x = 0; x < w; x++) {
401                    int a = alpha & 0xf;
402                    alpha >>= 4;
403
404                    int code = bits & 0x3;
405                    bits >>= 2;
406
407                    blockRowPtr[x] = c[code] | (a << 28) | (a << 24);
408                }
409            }
410        }
411    }
412}
413
414// Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE
415static void
416decodeDXT5(const GLvoid *data, int width, int height,
417           void *surface, int stride)
418
419{
420    init_tables();
421
422    uint32_t const *d32 = (uint32_t *)data;
423
424    // Specified alphas from the previous block
425    uint8_t prev_alpha0 = 0x00;
426    uint8_t prev_alpha1 = 0x00;
427
428    // Specified colors from the previous block
429    uint16_t prev_color0 = 0x0000;
430     uint16_t prev_color1 = 0x0000;
431
432    // Alpha table for the current block
433    uint8_t a[8];
434    a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = 0;
435
436    // Color table for the current block
437    uint32_t c[4];
438    c[0] = c[1] = c[2] = c[3] = 0;
439
440    int good_a5 = 0;
441    int bad_a5 = 0;
442    int good_a6 = 0;
443    int bad_a6 = 0;
444    int good_a7 = 0;
445    int bad_a7 = 0;
446
447    uint32_t* rowPtr = (uint32_t*)surface;
448    for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
449        uint32_t *blockPtr = rowPtr;
450        for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
451
452#if __BYTE_ORDER == __BIG_ENDIAN
453            uint32_t alphahi = *d32++;
454            uint32_t alphalo = *d32++;
455            alphahi = swap(alphahi);
456            alphalo = swap(alphalo);
457#else
458             uint32_t alphalo = *d32++;
459             uint32_t alphahi = *d32++;
460#endif
461
462            uint32_t colors = *d32++;
463            uint32_t bits = *d32++;
464
465#if __BYTE_ORDER == __BIG_ENDIANx
466            colors = swap(colors);
467            bits = swap(bits);
468#endif
469
470            uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo;
471            uint64_t alpha0 = alpha & 0xff;
472            alpha >>= 8;
473            uint64_t alpha1 = alpha & 0xff;
474            alpha >>= 8;
475
476            if (alpha0 != prev_alpha0 || alpha1 != prev_alpha1) {
477                prev_alpha0 = alpha0;
478                prev_alpha1 = alpha1;
479
480                a[0] = alpha0;
481                a[1] = alpha1;
482                int a01 = alpha0 + alpha1 - 1;
483                if (alpha0 > alpha1) {
484                    a[2] = div7(6*alpha0 +   alpha1);
485                    a[4] = div7(4*alpha0 + 3*alpha1);
486                    a[6] = div7(2*alpha0 + 5*alpha1);
487
488                    // Use symmetry to derive half of the values
489                    // A few values will be off by 1 (~.5%)
490                    // Alternate which values are computed directly
491                    // and which are derived to try to reduce bias
492                    a[3] = a01 - a[6];
493                    a[5] = a01 - a[4];
494                    a[7] = a01 - a[2];
495                } else {
496                    a[2] = div5(4*alpha0 +   alpha1);
497                    a[4] = div5(2*alpha0 + 3*alpha1);
498                    a[3] = a01 - a[4];
499                    a[5] = a01 - a[2];
500                    a[6] = 0x00;
501                    a[7] = 0xff;
502                }
503            }
504
505            // Raw colors
506            uint16_t color0 = colors & 0xffff;
507            uint16_t color1 = colors >> 16;
508
509            // If the new block has the same base colors as the
510            // previous one, we don't need to recompute the color
511            // table c[]
512            if (color0 != prev_color0 || color1 != prev_color1) {
513                // Store raw colors for comparison with next block
514                prev_color0 = color0;
515                prev_color1 = color1;
516
517                int bbits = bits >> 1;
518                bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
519                bool has3 = ((bbits &  bits) & 0x55555555) != 0;
520
521                if (has2 || has3) {
522                    int r0 =   red(color0);
523                    int g0 = green(color0);
524                    int b0 =  blue(color0);
525
526                    int r1 =   red(color1);
527                    int g1 = green(color1);
528                    int b1 =  blue(color1);
529
530                    int r2 = avg23(r0, r1);
531                    int g2 = avg23(g0, g1);
532                    int b2 = avg23(b0, b1);
533
534                    int r3 = avg23(r1, r0);
535                    int g3 = avg23(g1, g0);
536                    int b3 = avg23(b1, b0);
537
538                    c[0] = rgb565SepTo888(r0, g0, b0);
539                    c[1] = rgb565SepTo888(r1, g1, b1);
540                    c[2] = rgb565SepTo888(r2, g2, b2);
541                    c[3] = rgb565SepTo888(r3, g3, b3);
542                } else {
543                    // Convert to 8 bits
544                    c[0] = rgb565To888(color0);
545                    c[1] = rgb565To888(color1);
546                }
547            }
548
549            uint32_t* blockRowPtr = blockPtr;
550            for (int y = 0; y < 4; y++, blockRowPtr += stride) {
551                // Don't process rows past the botom
552                if (base_y + y >= height) {
553                    break;
554                }
555
556                int w = min(width - base_x, 4);
557                for (int x = 0; x < w; x++) {
558                    int acode = alpha & 0x7;
559                    alpha >>= 3;
560
561                    int code = bits & 0x3;
562                    bits >>= 2;
563
564                    blockRowPtr[x] = c[code] | (a[acode] << 24);
565                }
566            }
567        }
568    }
569}
570
571/*
572 * Decode a DXT-compressed texture into memory.  DXT textures consist of
573 * a series of 4x4 pixel blocks in left-to-right, top-down order.
574 * The number of blocks is given by ceil(width/4)*ceil(height/4).
575 *
576 * 'data' points to the texture data. 'width' and 'height' indicate the
577 * dimensions of the texture.  We assume width and height are >= 0 but
578 * do not require them to be powers of 2 or divisible by any factor.
579 *
580 * The output is written to 'surface' with each scanline separated by
581 * 'stride' 2- or 4-byte words.
582 *
583 * 'format' indicates the type of compression and must be one of the following:
584 *
585 *   GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
586 *      The output is written as 5/6/5 opaque RGB (16 bit words).
587 *      8 bytes are read from 'data' for each block.
588 *
589 *   GL_COMPRESSED_RGBA_S3TC_DXT1_EXT
590 *      The output is written as 5/5/5/1 RGBA (16 bit words)
591 *      8 bytes are read from 'data' for each block.
592 *
593 *   GL_COMPRESSED_RGBA_S3TC_DXT3_EXT
594 *   GL_COMPRESSED_RGBA_S3TC_DXT5_EXT
595 *      The output is written as 8/8/8/8 ARGB (32 bit words)
596 *      16 bytes are read from 'data' for each block.
597 */
598void
599decodeDXT(const GLvoid *data, int width, int height,
600          void *surface, int stride, int format)
601{
602#if TIMING
603    struct timeval start_t, end_t;
604    struct timezone tz;
605
606    gettimeofday(&start_t, &tz);
607#endif
608
609    switch (format) {
610    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
611        decodeDXT1(data, width, height, surface, stride, false);
612        break;
613
614    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
615        decodeDXT1(data, width, height, surface, stride, true);
616        break;
617
618    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
619        decodeDXT3(data, width, height, surface, stride);
620        break;
621
622    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
623        decodeDXT5(data, width, height, surface, stride);
624        break;
625    }
626
627#if TIMING
628    gettimeofday(&end_t, &tz);
629    long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 +
630        (end_t.tv_usec - start_t.tv_usec);
631
632    printf("Loaded w=%d h=%d in %ld usec\n", width, height, usec);
633#endif
634}
635
636} // namespace android
637