1/*
2 * Copyright 2014 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkTextureCompressor_ASTC.h"
9#include "SkTextureCompressor_Blitter.h"
10
11#include "SkBlitter.h"
12#include "SkEndian.h"
13#include "SkMath.h"
14
15// This table contains the weight values for each texel. This is used in determining
16// how to convert a 12x12 grid of alpha values into a 6x5 grid of index values. Since
17// we have a 6x5 grid, that gives 30 values that we have to compute. For each index,
18// we store up to 20 different triplets of values. In order the triplets are:
19// weight, texel-x, texel-y
20// The weight value corresponds to the amount that this index contributes to the final
21// index value of the given texel. Hence, we need to reconstruct the 6x5 index grid
22// from their relative contribution to the 12x12 texel grid.
23//
24// The algorithm is something like this:
25// foreach index i:
26//    total-weight = 0;
27//    total-alpha = 0;
28//    for w = 1 to 20:
29//       weight = table[i][w*3];
30//       texel-x = table[i][w*3 + 1];
31//       texel-y = table[i][w*3 + 2];
32//       if weight >= 0:
33//           total-weight += weight;
34//           total-alpha += weight * alphas[texel-x][texel-y];
35//
36//    total-alpha /= total-weight;
37//    index = top three bits of total-alpha
38//
39// If the associated index does not contribute to 20 different texels (e.g. it's in
40// a corner), then the extra texels are stored with -1's in the table.
41
42static const int8_t k6x5To12x12Table[30][60] = {
43{ 16, 0, 0, 9, 1, 0, 1, 2, 0, 10, 0, 1, 6, 1, 1, 1, 2, 1, 4, 0, 2, 2,
44  1, 2, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
45  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
46{ 7, 1, 0, 15, 2, 0, 10, 3, 0, 3, 4, 0, 4, 1, 1, 9, 2, 1, 6, 3, 1, 2,
47  4, 1, 2, 1, 2, 4, 2, 2, 3, 3, 2, 1, 4, 2, -1, 0, 0, -1, 0, 0, -1, 0,
48  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
49{ 6, 3, 0, 13, 4, 0, 12, 5, 0, 4, 6, 0, 4, 3, 1, 8, 4, 1, 8, 5, 1, 3,
50  6, 1, 1, 3, 2, 3, 4, 2, 3, 5, 2, 1, 6, 2, -1, 0, 0, -1, 0, 0, -1, 0,
51  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
52{ 4, 5, 0, 12, 6, 0, 13, 7, 0, 6, 8, 0, 2, 5, 1, 7, 6, 1, 8, 7, 1, 4,
53  8, 1, 1, 5, 2, 3, 6, 2, 3, 7, 2, 2, 8, 2, -1, 0, 0, -1, 0, 0, -1, 0,
54  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
55{ 3, 7, 0, 10, 8, 0, 15, 9, 0, 7, 10, 0, 2, 7, 1, 6, 8, 1, 9, 9, 1, 4,
56  10, 1, 1, 7, 2, 2, 8, 2, 4, 9, 2, 2, 10, 2, -1, 0, 0, -1, 0, 0, -1, 0,
57  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
58{ 1, 9, 0, 9, 10, 0, 16, 11, 0, 1, 9, 1, 6, 10, 1, 10, 11, 1, 2, 10, 2, 4,
59  11, 2, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
60  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
61{ 6, 0, 1, 3, 1, 1, 12, 0, 2, 7, 1, 2, 1, 2, 2, 15, 0, 3, 8, 1, 3, 1,
62  2, 3, 9, 0, 4, 5, 1, 4, 1, 2, 4, 3, 0, 5, 2, 1, 5, -1, 0, 0, -1, 0,
63  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
64{ 3, 1, 1, 6, 2, 1, 4, 3, 1, 1, 4, 1, 5, 1, 2, 11, 2, 2, 7, 3, 2, 2,
65  4, 2, 7, 1, 3, 14, 2, 3, 9, 3, 3, 3, 4, 3, 4, 1, 4, 8, 2, 4, 6, 3,
66  4, 2, 4, 4, 1, 1, 5, 3, 2, 5, 2, 3, 5, 1, 4, 5}, // n = 20
67{ 2, 3, 1, 5, 4, 1, 4, 5, 1, 1, 6, 1, 5, 3, 2, 10, 4, 2, 9, 5, 2, 3,
68  6, 2, 6, 3, 3, 12, 4, 3, 11, 5, 3, 4, 6, 3, 3, 3, 4, 7, 4, 4, 7, 5,
69  4, 2, 6, 4, 1, 3, 5, 2, 4, 5, 2, 5, 5, 1, 6, 5}, // n = 20
70{ 2, 5, 1, 5, 6, 1, 5, 7, 1, 2, 8, 1, 3, 5, 2, 9, 6, 2, 10, 7, 2, 4,
71  8, 2, 4, 5, 3, 11, 6, 3, 12, 7, 3, 6, 8, 3, 2, 5, 4, 7, 6, 4, 7, 7,
72  4, 3, 8, 4, 1, 5, 5, 2, 6, 5, 2, 7, 5, 1, 8, 5}, // n = 20
73{ 1, 7, 1, 4, 8, 1, 6, 9, 1, 3, 10, 1, 2, 7, 2, 8, 8, 2, 11, 9, 2, 5,
74  10, 2, 3, 7, 3, 9, 8, 3, 14, 9, 3, 7, 10, 3, 2, 7, 4, 6, 8, 4, 8, 9,
75  4, 4, 10, 4, 1, 7, 5, 2, 8, 5, 3, 9, 5, 1, 10, 5}, // n = 20
76{ 3, 10, 1, 6, 11, 1, 1, 9, 2, 7, 10, 2, 12, 11, 2, 1, 9, 3, 8, 10, 3, 15,
77  11, 3, 1, 9, 4, 5, 10, 4, 9, 11, 4, 2, 10, 5, 3, 11, 5, -1, 0, 0, -1, 0,
78  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
79{ 1, 0, 3, 1, 1, 3, 7, 0, 4, 4, 1, 4, 13, 0, 5, 7, 1, 5, 1, 2, 5, 13,
80  0, 6, 7, 1, 6, 1, 2, 6, 7, 0, 7, 4, 1, 7, 1, 0, 8, 1, 1, 8, -1, 0,
81  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
82{ 1, 2, 3, 1, 3, 3, 3, 1, 4, 7, 2, 4, 4, 3, 4, 1, 4, 4, 6, 1, 5, 12,
83  2, 5, 8, 3, 5, 2, 4, 5, 6, 1, 6, 12, 2, 6, 8, 3, 6, 2, 4, 6, 3, 1,
84  7, 7, 2, 7, 4, 3, 7, 1, 4, 7, 1, 2, 8, 1, 3, 8}, // n = 20
85{ 1, 4, 3, 1, 5, 3, 3, 3, 4, 6, 4, 4, 5, 5, 4, 2, 6, 4, 5, 3, 5, 11,
86  4, 5, 10, 5, 5, 3, 6, 5, 5, 3, 6, 11, 4, 6, 10, 5, 6, 3, 6, 6, 3, 3,
87  7, 6, 4, 7, 5, 5, 7, 2, 6, 7, 1, 4, 8, 1, 5, 8}, // n = 20
88{ 1, 6, 3, 1, 7, 3, 2, 5, 4, 5, 6, 4, 6, 7, 4, 3, 8, 4, 3, 5, 5, 10,
89  6, 5, 11, 7, 5, 5, 8, 5, 3, 5, 6, 10, 6, 6, 11, 7, 6, 5, 8, 6, 2, 5,
90  7, 5, 6, 7, 6, 7, 7, 3, 8, 7, 1, 6, 8, 1, 7, 8}, // n = 20
91{ 1, 8, 3, 1, 9, 3, 1, 7, 4, 4, 8, 4, 7, 9, 4, 3, 10, 4, 2, 7, 5, 8,
92  8, 5, 12, 9, 5, 6, 10, 5, 2, 7, 6, 8, 8, 6, 12, 9, 6, 6, 10, 6, 1, 7,
93  7, 4, 8, 7, 7, 9, 7, 3, 10, 7, 1, 8, 8, 1, 9, 8}, // n = 20
94{ 1, 10, 3, 1, 11, 3, 4, 10, 4, 7, 11, 4, 1, 9, 5, 7, 10, 5, 13, 11, 5, 1,
95  9, 6, 7, 10, 6, 13, 11, 6, 4, 10, 7, 7, 11, 7, 1, 10, 8, 1, 11, 8, -1, 0,
96  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
97{ 3, 0, 6, 2, 1, 6, 9, 0, 7, 5, 1, 7, 1, 2, 7, 15, 0, 8, 8, 1, 8, 1,
98  2, 8, 12, 0, 9, 7, 1, 9, 1, 2, 9, 6, 0, 10, 3, 1, 10, -1, 0, 0, -1, 0,
99  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
100{ 1, 1, 6, 3, 2, 6, 2, 3, 6, 1, 4, 6, 4, 1, 7, 8, 2, 7, 6, 3, 7, 2,
101  4, 7, 7, 1, 8, 14, 2, 8, 9, 3, 8, 3, 4, 8, 5, 1, 9, 11, 2, 9, 8, 3,
102  9, 2, 4, 9, 3, 1, 10, 6, 2, 10, 4, 3, 10, 1, 4, 10}, // n = 20
103{ 1, 3, 6, 2, 4, 6, 2, 5, 6, 1, 6, 6, 3, 3, 7, 7, 4, 7, 7, 5, 7, 2,
104  6, 7, 6, 3, 8, 12, 4, 8, 11, 5, 8, 4, 6, 8, 4, 3, 9, 10, 4, 9, 9, 5,
105  9, 3, 6, 9, 2, 3, 10, 5, 4, 10, 5, 5, 10, 2, 6, 10}, // n = 20
106{ 1, 5, 6, 2, 6, 6, 2, 7, 6, 1, 8, 6, 2, 5, 7, 7, 6, 7, 7, 7, 7, 3,
107  8, 7, 4, 5, 8, 11, 6, 8, 12, 7, 8, 6, 8, 8, 3, 5, 9, 9, 6, 9, 10, 7,
108  9, 5, 8, 9, 1, 5, 10, 4, 6, 10, 5, 7, 10, 2, 8, 10}, // n = 20
109{ 1, 7, 6, 2, 8, 6, 3, 9, 6, 1, 10, 6, 2, 7, 7, 6, 8, 7, 8, 9, 7, 4,
110  10, 7, 3, 7, 8, 9, 8, 8, 14, 9, 8, 7, 10, 8, 2, 7, 9, 7, 8, 9, 11, 9,
111  9, 5, 10, 9, 1, 7, 10, 4, 8, 10, 6, 9, 10, 3, 10, 10}, // n = 20
112{ 2, 10, 6, 3, 11, 6, 1, 9, 7, 5, 10, 7, 9, 11, 7, 1, 9, 8, 8, 10, 8, 15,
113  11, 8, 1, 9, 9, 7, 10, 9, 12, 11, 9, 3, 10, 10, 6, 11, 10, -1, 0, 0, -1, 0,
114  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
115{ 4, 0, 9, 2, 1, 9, 10, 0, 10, 6, 1, 10, 1, 2, 10, 16, 0, 11, 9, 1, 11, 1,
116  2, 11, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
117  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
118{ 2, 1, 9, 4, 2, 9, 2, 3, 9, 1, 4, 9, 4, 1, 10, 9, 2, 10, 6, 3, 10, 2,
119  4, 10, 7, 1, 11, 15, 2, 11, 10, 3, 11, 3, 4, 11, -1, 0, 0, -1, 0, 0, -1, 0,
120  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
121{ 2, 3, 9, 3, 4, 9, 3, 5, 9, 1, 6, 9, 4, 3, 10, 8, 4, 10, 7, 5, 10, 2,
122  6, 10, 6, 3, 11, 13, 4, 11, 12, 5, 11, 4, 6, 11, -1, 0, 0, -1, 0, 0, -1, 0,
123  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
124{ 1, 5, 9, 3, 6, 9, 3, 7, 9, 1, 8, 9, 3, 5, 10, 8, 6, 10, 8, 7, 10, 4,
125  8, 10, 4, 5, 11, 12, 6, 11, 13, 7, 11, 6, 8, 11, -1, 0, 0, -1, 0, 0, -1, 0,
126  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
127{ 1, 7, 9, 3, 8, 9, 4, 9, 9, 2, 10, 9, 2, 7, 10, 6, 8, 10, 9, 9, 10, 4,
128  10, 10, 3, 7, 11, 10, 8, 11, 15, 9, 11, 7, 10, 11, -1, 0, 0, -1, 0, 0, -1, 0,
129  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
130{ 2, 10, 9, 4, 11, 9, 1, 9, 10, 6, 10, 10, 10, 11, 10, 1, 9, 11, 9, 10, 11, 16,
131  11, 11, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
132  0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0} // n = 20
133};
134
135// Returns the alpha value of a texel at position (x, y) from src.
136// (x, y) are assumed to be in the range [0, 12).
137inline uint8_t GetAlpha(const uint8_t *src, int rowBytes, int x, int y) {
138    SkASSERT(x >= 0 && x < 12);
139    SkASSERT(y >= 0 && y < 12);
140    SkASSERT(rowBytes >= 12);
141    return *(src + y*rowBytes + x);
142}
143
144inline uint8_t GetAlphaTranspose(const uint8_t *src, int rowBytes, int x, int y) {
145    return GetAlpha(src, rowBytes, y, x);
146}
147
148// Output the 16 bytes stored in top and bottom and advance the pointer. The bytes
149// are stored as the integers are represented in memory, so they should be swapped
150// if necessary.
151static inline void send_packing(uint8_t** dst, const uint64_t top, const uint64_t bottom) {
152    uint64_t* dst64 = reinterpret_cast<uint64_t*>(*dst);
153    dst64[0] = top;
154    dst64[1] = bottom;
155    *dst += 16;
156}
157
158// Compresses an ASTC block, by looking up the proper contributions from
159// k6x5To12x12Table and computing an index from the associated values.
160typedef uint8_t (*GetAlphaProc)(const uint8_t* src, int rowBytes, int x, int y);
161
162template<GetAlphaProc getAlphaProc>
163static void compress_a8_astc_block(uint8_t** dst, const uint8_t* src, int rowBytes) {
164    // Check for single color
165    bool constant = true;
166    const uint32_t firstInt = *(reinterpret_cast<const uint32_t*>(src));
167    for (int i = 0; i < 12; ++i) {
168        const uint32_t *rowInt = reinterpret_cast<const uint32_t *>(src + i*rowBytes);
169        constant = constant && (rowInt[0] == firstInt);
170        constant = constant && (rowInt[1] == firstInt);
171        constant = constant && (rowInt[2] == firstInt);
172    }
173
174    if (constant) {
175        if (0 == firstInt) {
176            // All of the indices are set to zero, and the colors are
177            // v0 = 0, v1 = 255, so everything will be transparent.
178            send_packing(dst, SkTEndian_SwapLE64(0x0000000001FE000173ULL), 0);
179            return;
180        } else if (0xFFFFFFFF == firstInt) {
181            // All of the indices are set to zero, and the colors are
182            // v0 = 255, v1 = 0, so everything will be opaque.
183            send_packing(dst, SkTEndian_SwapLE64(0x000000000001FE0173ULL), 0);
184            return;
185        }
186    }
187
188    uint8_t indices[30]; // 6x5 index grid
189    for (int idx = 0; idx < 30; ++idx) {
190        int weightTot = 0;
191        int alphaTot = 0;
192        for (int w = 0; w < 20; ++w) {
193            const int8_t weight = k6x5To12x12Table[idx][w*3];
194            if (weight > 0) {
195                const int x = k6x5To12x12Table[idx][w*3 + 1];
196                const int y = k6x5To12x12Table[idx][w*3 + 2];
197                weightTot += weight;
198                alphaTot += weight * getAlphaProc(src, rowBytes, x, y);
199            } else {
200                // In our table, not every entry has 20 weights, and all
201                // of them are nonzero. Once we hit a negative weight, we
202                // know that all of the other weights are not valid either.
203                break;
204            }
205        }
206
207        indices[idx] = (alphaTot / weightTot) >> 5;
208    }
209
210    // Pack indices... The ASTC block layout is fairly complicated. An extensive
211    // description can be found here:
212    // https://www.opengl.org/registry/specs/KHR/texture_compression_astc_hdr.txt
213    //
214    // Here is a summary of the options that we've chosen:
215    // 1. Block mode: 0b00101110011
216    //     - 6x5 texel grid
217    //     - Single plane
218    //     - Low-precision index values
219    //     - Index range 0-7 (three bits per index)
220    // 2. Partitions: 0b00
221    //     - One partition
222    // 3. Color Endpoint Mode: 0b0000
223    //     - Direct luminance -- e0=(v0,v0,v0,0xFF); e1=(v1,v1,v1,0xFF);
224    // 4. 8-bit endpoints:
225    //     v0 = 0, v1 = 255
226    //
227    // The rest of the block contains the 30 index values from before, which
228    // are currently stored in the indices variable.
229
230    uint64_t top = 0x0000000001FE000173ULL;
231    uint64_t bottom = 0;
232
233    for (int idx = 0; idx <= 20; ++idx) {
234        const uint8_t index = indices[idx];
235        bottom |= static_cast<uint64_t>(index) << (61-(idx*3));
236    }
237
238    // index 21 straddles top and bottom
239    {
240        const uint8_t index = indices[21];
241        bottom |= index & 1;
242        top |= static_cast<uint64_t>((index >> 2) | (index & 2)) << 62;
243    }
244
245    for (int idx = 22; idx < 30; ++idx) {
246        const uint8_t index = indices[idx];
247        top |= static_cast<uint64_t>(index) << (59-(idx-22)*3);
248    }
249
250    // Reverse each 3-bit index since indices are read in reverse order...
251    uint64_t t = (bottom ^ (bottom >> 2)) & 0x2492492492492492ULL;
252    bottom = bottom ^ t ^ (t << 2);
253
254    t = (top ^ (top >> 2)) & 0x0924924000000000ULL;
255    top = top ^ t ^ (t << 2);
256
257    send_packing(dst, SkEndian_SwapLE64(top), SkEndian_SwapLE64(bottom));
258}
259
260inline void CompressA8ASTCBlockVertical(uint8_t* dst, const uint8_t* src) {
261    compress_a8_astc_block<GetAlphaTranspose>(&dst, src, 12);
262}
263
264////////////////////////////////////////////////////////////////////////////////
265//
266// ASTC Decoder
267//
268// Full details available in the spec:
269// http://www.khronos.org/registry/gles/extensions/OES/OES_texture_compression_astc.txt
270//
271////////////////////////////////////////////////////////////////////////////////
272
273// Enable this to assert whenever a decoded block has invalid ASTC values. Otherwise,
274// each invalid block will result in a disgusting magenta color.
275#define ASSERT_ASTC_DECODE_ERROR 0
276
277// Reverse 64-bit integer taken from TAOCP 4a, although it's better
278// documented at this site:
279// http://matthewarcus.wordpress.com/2012/11/18/reversing-a-64-bit-word/
280
281template <typename T, T m, int k>
282static inline T swap_bits(T p) {
283    T q = ((p>>k)^p) & m;
284    return p^q^(q<<k);
285}
286
287static inline uint64_t reverse64(uint64_t n) {
288    static const uint64_t m0 = 0x5555555555555555ULL;
289    static const uint64_t m1 = 0x0300c0303030c303ULL;
290    static const uint64_t m2 = 0x00c0300c03f0003fULL;
291    static const uint64_t m3 = 0x00000ffc00003fffULL;
292    n = ((n>>1)&m0) | (n&m0)<<1;
293    n = swap_bits<uint64_t, m1, 4>(n);
294    n = swap_bits<uint64_t, m2, 8>(n);
295    n = swap_bits<uint64_t, m3, 20>(n);
296    n = (n >> 34) | (n << 30);
297    return n;
298}
299
300// An ASTC block is 128 bits. We represent it as two 64-bit integers in order
301// to efficiently operate on the block using bitwise operations.
302struct ASTCBlock {
303    uint64_t fLow;
304    uint64_t fHigh;
305
306    // Reverses the bits of an ASTC block, making the LSB of the
307    // 128 bit block the MSB.
308    inline void reverse() {
309        const uint64_t newLow = reverse64(this->fHigh);
310        this->fHigh = reverse64(this->fLow);
311        this->fLow = newLow;
312    }
313};
314
315// Writes the given color to every pixel in the block. This is used by void-extent
316// blocks (a special constant-color encoding of a block) and by the error function.
317static inline void write_constant_color(uint8_t* dst, int blockDimX, int blockDimY,
318                                        int dstRowBytes, SkColor color) {
319    for (int y = 0; y < blockDimY; ++y) {
320        SkColor *dstColors = reinterpret_cast<SkColor*>(dst);
321        for (int x = 0; x < blockDimX; ++x) {
322            dstColors[x] = color;
323        }
324        dst += dstRowBytes;
325    }
326}
327
328// Sets the entire block to the ASTC "error" color, a disgusting magenta
329// that's not supposed to appear in natural images.
330static inline void write_error_color(uint8_t* dst, int blockDimX, int blockDimY,
331                                     int dstRowBytes) {
332    static const SkColor kASTCErrorColor = SkColorSetRGB(0xFF, 0, 0xFF);
333
334#if ASSERT_ASTC_DECODE_ERROR
335    SkDEBUGFAIL("ASTC decoding error!\n");
336#endif
337
338    write_constant_color(dst, blockDimX, blockDimY, dstRowBytes, kASTCErrorColor);
339}
340
341// Reads up to 64 bits of the ASTC block starting from bit
342// 'from' and going up to but not including bit 'to'. 'from' starts
343// counting from the LSB, counting up to the MSB. Returns -1 on
344// error.
345static uint64_t read_astc_bits(const ASTCBlock &block, int from, int to) {
346    SkASSERT(0 <= from && from <= 128);
347    SkASSERT(0 <= to && to <= 128);
348
349    const int nBits = to - from;
350    if (0 == nBits) {
351        return 0;
352    }
353
354    if (nBits < 0 || 64 <= nBits) {
355        SkDEBUGFAIL("ASTC -- shouldn't read more than 64 bits");
356        return -1;
357    }
358
359    // Remember, the 'to' bit isn't read.
360    uint64_t result = 0;
361    if (to <= 64) {
362        // All desired bits are in the low 64-bits.
363        result = (block.fLow >> from) & ((1ULL << nBits) - 1);
364    } else if (from >= 64) {
365        // All desired bits are in the high 64-bits.
366        result = (block.fHigh >> (from - 64)) & ((1ULL << nBits) - 1);
367    } else {
368        // from < 64 && to > 64
369        SkASSERT(nBits > (64 - from));
370        const int nLow = 64 - from;
371        const int nHigh = nBits - nLow;
372        result =
373            ((block.fLow >> from) & ((1ULL << nLow) - 1)) |
374            ((block.fHigh & ((1ULL << nHigh) - 1)) << nLow);
375    }
376
377    return result;
378}
379
380// Returns the number of bits needed to represent a number
381// in the given power-of-two range (excluding the power of two itself).
382static inline int bits_for_range(int x) {
383    SkASSERT(SkIsPow2(x));
384    SkASSERT(0 != x);
385    // Since we know it's a power of two, there should only be one bit set,
386    // meaning the number of trailing zeros is 31 minus the number of leading
387    // zeros.
388    return 31 - SkCLZ(x);
389}
390
391// Clamps an integer to the range [0, 255]
392static inline int clamp_byte(int x) {
393    return SkClampMax(x, 255);
394}
395
396// Helper function defined in the ASTC spec, section C.2.14
397// It transfers a few bits of precision from one value to another.
398static inline void bit_transfer_signed(int *a, int *b) {
399    *b >>= 1;
400    *b |= *a & 0x80;
401    *a >>= 1;
402    *a &= 0x3F;
403    if ( (*a & 0x20) != 0 ) {
404        *a -= 0x40;
405    }
406}
407
408// Helper function defined in the ASTC spec, section C.2.14
409// It uses the value in the blue channel to tint the red and green
410static inline SkColor blue_contract(int a, int r, int g, int b) {
411    return SkColorSetARGB(a, (r + b) >> 1, (g + b) >> 1, b);
412}
413
414// Helper function that decodes two colors from eight values. If isRGB is true,
415// then the pointer 'v' contains six values and the last two are considered to be
416// 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This
417// corresponds to the decode procedure for the following endpoint modes:
418//   kLDR_RGB_Direct_ColorEndpointMode
419//   kLDR_RGBA_Direct_ColorEndpointMode
420static inline void decode_rgba_direct(const int *v, SkColor *endpoints, bool isRGB) {
421
422    int v6 = 0xFF;
423    int v7 = 0xFF;
424    if (!isRGB) {
425        v6 = v[6];
426        v7 = v[7];
427    }
428
429    const int s0 = v[0] + v[2] + v[4];
430    const int s1 = v[1] + v[3] + v[5];
431
432    if (s1 >= s0) {
433        endpoints[0] = SkColorSetARGB(v6, v[0], v[2], v[4]);
434        endpoints[1] = SkColorSetARGB(v7, v[1], v[3], v[5]);
435    } else {
436        endpoints[0] = blue_contract(v7, v[1], v[3], v[5]);
437        endpoints[1] = blue_contract(v6, v[0], v[2], v[4]);
438    }
439}
440
441// Helper function that decodes two colors from six values. If isRGB is true,
442// then the pointer 'v' contains four values and the last two are considered to be
443// 0xFF. If isRGB is false, then all six values come from the pointer 'v'. This
444// corresponds to the decode procedure for the following endpoint modes:
445//   kLDR_RGB_BaseScale_ColorEndpointMode
446//   kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode
447static inline void decode_rgba_basescale(const int *v, SkColor *endpoints, bool isRGB) {
448
449    int v4 = 0xFF;
450    int v5 = 0xFF;
451    if (!isRGB) {
452        v4 = v[4];
453        v5 = v[5];
454    }
455
456    endpoints[0] = SkColorSetARGB(v4,
457                                  (v[0]*v[3]) >> 8,
458                                  (v[1]*v[3]) >> 8,
459                                  (v[2]*v[3]) >> 8);
460    endpoints[1] = SkColorSetARGB(v5, v[0], v[1], v[2]);
461}
462
463// Helper function that decodes two colors from eight values. If isRGB is true,
464// then the pointer 'v' contains six values and the last two are considered to be
465// 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This
466// corresponds to the decode procedure for the following endpoint modes:
467//   kLDR_RGB_BaseOffset_ColorEndpointMode
468//   kLDR_RGBA_BaseOffset_ColorEndpointMode
469//
470// If isRGB is true, then treat this as if v6 and v7 are meant to encode full alpha values.
471static inline void decode_rgba_baseoffset(const int *v, SkColor *endpoints, bool isRGB) {
472    int v0 = v[0];
473    int v1 = v[1];
474    int v2 = v[2];
475    int v3 = v[3];
476    int v4 = v[4];
477    int v5 = v[5];
478    int v6 = isRGB ? 0xFF : v[6];
479    // The 0 is here because this is an offset, not a direct value
480    int v7 = isRGB ? 0 : v[7];
481
482    bit_transfer_signed(&v1, &v0);
483    bit_transfer_signed(&v3, &v2);
484    bit_transfer_signed(&v5, &v4);
485    if (!isRGB) {
486        bit_transfer_signed(&v7, &v6);
487    }
488
489    int c[2][4];
490    if ((v1 + v3 + v5) >= 0) {
491        c[0][0] = v6;
492        c[0][1] = v0;
493        c[0][2] = v2;
494        c[0][3] = v4;
495
496        c[1][0] = v6 + v7;
497        c[1][1] = v0 + v1;
498        c[1][2] = v2 + v3;
499        c[1][3] = v4 + v5;
500    } else {
501        c[0][0] = v6 + v7;
502        c[0][1] = (v0 + v1 + v4 + v5) >> 1;
503        c[0][2] = (v2 + v3 + v4 + v5) >> 1;
504        c[0][3] = v4 + v5;
505
506        c[1][0] = v6;
507        c[1][1] = (v0 + v4) >> 1;
508        c[1][2] = (v2 + v4) >> 1;
509        c[1][3] = v4;
510    }
511
512    endpoints[0] = SkColorSetARGB(clamp_byte(c[0][0]),
513                                  clamp_byte(c[0][1]),
514                                  clamp_byte(c[0][2]),
515                                  clamp_byte(c[0][3]));
516
517    endpoints[1] = SkColorSetARGB(clamp_byte(c[1][0]),
518                                  clamp_byte(c[1][1]),
519                                  clamp_byte(c[1][2]),
520                                  clamp_byte(c[1][3]));
521}
522
523
524// A helper class used to decode bit values from standard integer values.
525// We can't use this class with ASTCBlock because then it would need to
526// handle multi-value ranges, and it's non-trivial to lookup a range of bits
527// that splits across two different ints.
528template <typename T>
529class SkTBits {
530public:
531    SkTBits(const T val) : fVal(val) { }
532
533    // Returns the bit at the given position
534    T operator [](const int idx) const {
535        return (fVal >> idx) & 1;
536    }
537
538    // Returns the bits in the given range, inclusive
539    T operator ()(const int end, const int start) const {
540        SkASSERT(end >= start);
541        return (fVal >> start) & ((1ULL << ((end - start) + 1)) - 1);
542    }
543
544private:
545    const T fVal;
546};
547
548// This algorithm matches the trit block decoding in the spec (Table C.2.14)
549static void decode_trit_block(int* dst, int nBits, const uint64_t &block) {
550
551    SkTBits<uint64_t> blockBits(block);
552
553    // According to the spec, a trit block, which contains five values,
554    // has the following layout:
555    //
556    // 27  26  25  24  23  22  21  20  19  18  17  16
557    //  -----------------------------------------------
558    // |T7 |     m4        |T6  T5 |     m3        |T4 |
559    //  -----------------------------------------------
560    //
561    // 15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
562    //  --------------------------------------------------------------
563    // |    m2        |T3  T2 |      m1       |T1  T0 |      m0       |
564    //  --------------------------------------------------------------
565    //
566    // Where the m's are variable width depending on the number of bits used
567    // to encode the values (anywhere from 0 to 6). Since 3^5 = 243, the extra
568    // byte labeled T (whose bits are interleaved where 0 is the LSB and 7 is
569    // the MSB), contains five trit values. To decode the trit values, the spec
570    // says that we need to follow the following algorithm:
571    //
572    // if T[4:2] = 111
573    //     C = { T[7:5], T[1:0] }; t4 = t3 = 2
574    // else
575    //     C = T[4:0]
576    //
577    // if T[6:5] = 11
578    //     t4 = 2; t3 = T[7]
579    // else
580    //     t4 = T[7]; t3 = T[6:5]
581    //
582    // if C[1:0] = 11
583    //     t2 = 2; t1 = C[4]; t0 = { C[3], C[2]&~C[3] }
584    // else if C[3:2] = 11
585    //     t2 = 2; t1 = 2; t0 = C[1:0]
586    // else
587    //     t2 = C[4]; t1 = C[3:2]; t0 = { C[1], C[0]&~C[1] }
588    //
589    // The following C++ code is meant to mirror this layout and algorithm as
590    // closely as possible.
591
592    int m[5];
593    if (0 == nBits) {
594        memset(m, 0, sizeof(m));
595    } else {
596        SkASSERT(nBits < 8);
597        m[0] = static_cast<int>(blockBits(nBits - 1, 0));
598        m[1] = static_cast<int>(blockBits(2*nBits - 1 + 2, nBits + 2));
599        m[2] = static_cast<int>(blockBits(3*nBits - 1 + 4, 2*nBits + 4));
600        m[3] = static_cast<int>(blockBits(4*nBits - 1 + 5, 3*nBits + 5));
601        m[4] = static_cast<int>(blockBits(5*nBits - 1 + 7, 4*nBits + 7));
602    }
603
604    int T =
605        static_cast<int>(blockBits(nBits + 1, nBits)) |
606        (static_cast<int>(blockBits(2*nBits + 2 + 1, 2*nBits + 2)) << 2) |
607        (static_cast<int>(blockBits[3*nBits + 4] << 4)) |
608        (static_cast<int>(blockBits(4*nBits + 5 + 1, 4*nBits + 5)) << 5) |
609        (static_cast<int>(blockBits[5*nBits + 7] << 7));
610
611    int t[5];
612
613    int C;
614    SkTBits<int> Tbits(T);
615    if (0x7 == Tbits(4, 2)) {
616        C = (Tbits(7, 5) << 2) | Tbits(1, 0);
617        t[3] = t[4] = 2;
618    } else {
619        C = Tbits(4, 0);
620        if (Tbits(6, 5) == 0x3) {
621            t[4] = 2; t[3] = Tbits[7];
622        } else {
623            t[4] = Tbits[7]; t[3] = Tbits(6, 5);
624        }
625    }
626
627    SkTBits<int> Cbits(C);
628    if (Cbits(1, 0) == 0x3) {
629        t[2] = 2;
630        t[1] = Cbits[4];
631        t[0] = (Cbits[3] << 1) | (Cbits[2] & (0x1 & ~(Cbits[3])));
632    } else if (Cbits(3, 2) == 0x3) {
633        t[2] = 2;
634        t[1] = 2;
635        t[0] = Cbits(1, 0);
636    } else {
637        t[2] = Cbits[4];
638        t[1] = Cbits(3, 2);
639        t[0] = (Cbits[1] << 1) | (Cbits[0] & (0x1 & ~(Cbits[1])));
640    }
641
642#ifdef SK_DEBUG
643    // Make sure all of the decoded values have a trit less than three
644    // and a bit value within the range of the allocated bits.
645    for (int i = 0; i < 5; ++i) {
646        SkASSERT(t[i] < 3);
647        SkASSERT(m[i] < (1 << nBits));
648    }
649#endif
650
651    for (int i = 0; i < 5; ++i) {
652        *dst = (t[i] << nBits) + m[i];
653        ++dst;
654    }
655}
656
657// This algorithm matches the quint block decoding in the spec (Table C.2.15)
658static void decode_quint_block(int* dst, int nBits, const uint64_t &block) {
659    SkTBits<uint64_t> blockBits(block);
660
661    // According to the spec, a quint block, which contains three values,
662    // has the following layout:
663    //
664    //
665    // 18  17  16  15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
666    //  --------------------------------------------------------------------------
667    // |Q6  Q5 |     m2       |Q4  Q3 |     m1        |Q2  Q1  Q0 |      m0       |
668    //  --------------------------------------------------------------------------
669    //
670    // Where the m's are variable width depending on the number of bits used
671    // to encode the values (anywhere from 0 to 4). Since 5^3 = 125, the extra
672    // 7-bit value labeled Q (whose bits are interleaved where 0 is the LSB and 6 is
673    // the MSB), contains three quint values. To decode the quint values, the spec
674    // says that we need to follow the following algorithm:
675    //
676    // if Q[2:1] = 11 and Q[6:5] = 00
677    //     q2 = { Q[0], Q[4]&~Q[0], Q[3]&~Q[0] }; q1 = q0 = 4
678    // else
679    //     if Q[2:1] = 11
680    //         q2 = 4; C = { Q[4:3], ~Q[6:5], Q[0] }
681    //     else
682    //         q2 = T[6:5]; C = Q[4:0]
683    //
684    //     if C[2:0] = 101
685    //         q1 = 4; q0 = C[4:3]
686    //     else
687    //         q1 = C[4:3]; q0 = C[2:0]
688    //
689    // The following C++ code is meant to mirror this layout and algorithm as
690    // closely as possible.
691
692    int m[3];
693    if (0 == nBits) {
694        memset(m, 0, sizeof(m));
695    } else {
696        SkASSERT(nBits < 8);
697        m[0] = static_cast<int>(blockBits(nBits - 1, 0));
698        m[1] = static_cast<int>(blockBits(2*nBits - 1 + 3, nBits + 3));
699        m[2] = static_cast<int>(blockBits(3*nBits - 1 + 5, 2*nBits + 5));
700    }
701
702    int Q =
703        static_cast<int>(blockBits(nBits + 2, nBits)) |
704        (static_cast<int>(blockBits(2*nBits + 3 + 1, 2*nBits + 3)) << 3) |
705        (static_cast<int>(blockBits(3*nBits + 5 + 1, 3*nBits + 5)) << 5);
706
707    int q[3];
708    SkTBits<int> Qbits(Q); // quantum?
709
710    if (Qbits(2, 1) == 0x3 && Qbits(6, 5) == 0) {
711        const int notBitZero = (0x1 & ~(Qbits[0]));
712        q[2] = (Qbits[0] << 2) | ((Qbits[4] & notBitZero) << 1) | (Qbits[3] & notBitZero);
713        q[1] = 4;
714        q[0] = 4;
715    } else {
716        int C;
717        if (Qbits(2, 1) == 0x3) {
718            q[2] = 4;
719            C = (Qbits(4, 3) << 3) | ((0x3 & ~(Qbits(6, 5))) << 1) | Qbits[0];
720        } else {
721            q[2] = Qbits(6, 5);
722            C = Qbits(4, 0);
723        }
724
725        SkTBits<int> Cbits(C);
726        if (Cbits(2, 0) == 0x5) {
727            q[1] = 4;
728            q[0] = Cbits(4, 3);
729        } else {
730            q[1] = Cbits(4, 3);
731            q[0] = Cbits(2, 0);
732        }
733    }
734
735#ifdef SK_DEBUG
736    for (int i = 0; i < 3; ++i) {
737        SkASSERT(q[i] < 5);
738        SkASSERT(m[i] < (1 << nBits));
739    }
740#endif
741
742    for (int i = 0; i < 3; ++i) {
743        *dst = (q[i] << nBits) + m[i];
744        ++dst;
745    }
746}
747
748// Function that decodes a sequence of integers stored as an ISE (Integer
749// Sequence Encoding) bit stream. The full details of this function are outlined
750// in section C.2.12 of the ASTC spec. A brief overview is as follows:
751//
752// - Each integer in the sequence is bounded by a specific range r.
753// - The range of each value determines the way the bit stream is interpreted,
754// - If the range is a power of two, then the sequence is a sequence of bits
755// - If the range is of the form 3*2^n, then the sequence is stored as a
756//   sequence of blocks, each block contains 5 trits and 5 bit sequences, which
757//   decodes into 5 values.
758// - Similarly, if the range is of the form 5*2^n, then the sequence is stored as a
759//   sequence of blocks, each block contains 3 quints and 3 bit sequences, which
760//   decodes into 3 values.
761static bool decode_integer_sequence(
762    int* dst,                 // The array holding the destination bits
763    int dstSize,              // The maximum size of the array
764    int nVals,                // The number of values that we'd like to decode
765    const ASTCBlock &block,   // The block that we're decoding from
766    int startBit,             // The bit from which we're going to do the reading
767    int endBit,               // The bit at which we stop reading (not inclusive)
768    bool bReadForward,        // If true, then read LSB -> MSB, else read MSB -> LSB
769    int nBits,                // The number of bits representing this encoding
770    int nTrits,               // The number of trits representing this encoding
771    int nQuints               // The number of quints representing this encoding
772) {
773    // If we want more values than we have, then fail.
774    if (nVals > dstSize) {
775        return false;
776    }
777
778    ASTCBlock src = block;
779
780    if (!bReadForward) {
781        src.reverse();
782        startBit = 128 - startBit;
783        endBit = 128 - endBit;
784    }
785
786    while (nVals > 0) {
787
788        if (nTrits > 0) {
789            SkASSERT(0 == nQuints);
790
791            int endBlockBit = startBit + 8 + 5*nBits;
792            if (endBlockBit > endBit) {
793                endBlockBit = endBit;
794            }
795
796            // Trit blocks are three values large.
797            int trits[5];
798            decode_trit_block(trits, nBits, read_astc_bits(src, startBit, endBlockBit));
799            memcpy(dst, trits, SkMin32(nVals, 5)*sizeof(int));
800
801            dst += 5;
802            nVals -= 5;
803            startBit = endBlockBit;
804
805        } else if (nQuints > 0) {
806            SkASSERT(0 == nTrits);
807
808            int endBlockBit = startBit + 7 + 3*nBits;
809            if (endBlockBit > endBit) {
810                endBlockBit = endBit;
811            }
812
813            // Quint blocks are three values large
814            int quints[3];
815            decode_quint_block(quints, nBits, read_astc_bits(src, startBit, endBlockBit));
816            memcpy(dst, quints, SkMin32(nVals, 3)*sizeof(int));
817
818            dst += 3;
819            nVals -= 3;
820            startBit = endBlockBit;
821
822        } else {
823            // Just read the bits, but don't read more than we have...
824            int endValBit = startBit + nBits;
825            if (endValBit > endBit) {
826                endValBit = endBit;
827            }
828
829            SkASSERT(endValBit - startBit < 31);
830            *dst = static_cast<int>(read_astc_bits(src, startBit, endValBit));
831            ++dst;
832            --nVals;
833            startBit = endValBit;
834        }
835    }
836
837    return true;
838}
839
840// Helper function that unquantizes some (seemingly random) generated
841// numbers... meant to match the ASTC hardware. This function is used
842// to unquantize both colors (Table C.2.16) and weights (Table C.2.26)
843static inline int unquantize_value(unsigned mask, int A, int B, int C, int D) {
844    int T = D * C + B;
845    T = T ^ A;
846    T = (A & mask) | (T >> 2);
847    SkASSERT(T < 256);
848    return T;
849}
850
851// Helper function to replicate the bits in x that represents an oldPrec
852// precision integer into a prec precision integer. For example:
853//   255 == replicate_bits(7, 3, 8);
854static inline int replicate_bits(int x, int oldPrec, int prec) {
855    while (oldPrec < prec) {
856        const int toShift = SkMin32(prec-oldPrec, oldPrec);
857        x = (x << toShift) | (x >> (oldPrec - toShift));
858        oldPrec += toShift;
859    }
860
861    // Make sure that no bits are set outside the desired precision.
862    SkASSERT((-(1 << prec) & x) == 0);
863    return x;
864}
865
866// Returns the unquantized value of a color that's represented only as
867// a set of bits.
868static inline int unquantize_bits_color(int val, int nBits) {
869    return replicate_bits(val, nBits, 8);
870}
871
872// Returns the unquantized value of a color that's represented as a
873// trit followed by nBits bits. This algorithm follows the sequence
874// defined in section C.2.13 of the ASTC spec.
875static inline int unquantize_trit_color(int val, int nBits) {
876    SkASSERT(nBits > 0);
877    SkASSERT(nBits < 7);
878
879    const int D = (val >> nBits) & 0x3;
880    SkASSERT(D < 3);
881
882    const int A = -(val & 0x1) & 0x1FF;
883
884    static const int Cvals[6] = { 204, 93, 44, 22, 11, 5 };
885    const int C = Cvals[nBits - 1];
886
887    int B = 0;
888    const SkTBits<int> valBits(val);
889    switch (nBits) {
890        case 1:
891            B = 0;
892            break;
893
894        case 2: {
895            const int b = valBits[1];
896            B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
897        }
898        break;
899
900        case 3: {
901            const int cb = valBits(2, 1);
902            B = cb | (cb << 2) | (cb << 7);
903        }
904        break;
905
906        case 4: {
907            const int dcb = valBits(3, 1);
908            B = dcb | (dcb << 6);
909        }
910        break;
911
912        case 5: {
913            const int edcb = valBits(4, 1);
914            B = (edcb << 5) | (edcb >> 2);
915        }
916        break;
917
918        case 6: {
919            const int fedcb = valBits(5, 1);
920            B = (fedcb << 4) | (fedcb >> 4);
921        }
922        break;
923    }
924
925    return unquantize_value(0x80, A, B, C, D);
926}
927
928// Returns the unquantized value of a color that's represented as a
929// quint followed by nBits bits. This algorithm follows the sequence
930// defined in section C.2.13 of the ASTC spec.
931static inline int unquantize_quint_color(int val, int nBits) {
932    const int D = (val >> nBits) & 0x7;
933    SkASSERT(D < 5);
934
935    const int A = -(val & 0x1) & 0x1FF;
936
937    static const int Cvals[5] = { 113, 54, 26, 13, 6 };
938    SkASSERT(nBits > 0);
939    SkASSERT(nBits < 6);
940
941    const int C = Cvals[nBits - 1];
942
943    int B = 0;
944    const SkTBits<int> valBits(val);
945    switch (nBits) {
946        case 1:
947            B = 0;
948            break;
949
950        case 2: {
951            const int b = valBits[1];
952            B = (b << 2) | (b << 3) | (b << 8);
953        }
954        break;
955
956        case 3: {
957            const int cb = valBits(2, 1);
958            B = (cb >> 1) | (cb << 1) | (cb << 7);
959        }
960        break;
961
962        case 4: {
963            const int dcb = valBits(3, 1);
964            B = (dcb >> 1) | (dcb << 6);
965        }
966        break;
967
968        case 5: {
969            const int edcb = valBits(4, 1);
970            B = (edcb << 5) | (edcb >> 3);
971        }
972        break;
973    }
974
975    return unquantize_value(0x80, A, B, C, D);
976}
977
978// This algorithm takes a list of integers, stored in vals, and unquantizes them
979// in place. This follows the algorithm laid out in section C.2.13 of the ASTC spec.
980static void unquantize_colors(int *vals, int nVals, int nBits, int nTrits, int nQuints) {
981    for (int i = 0; i < nVals; ++i) {
982        if (nTrits > 0) {
983            SkASSERT(nQuints == 0);
984            vals[i] = unquantize_trit_color(vals[i], nBits);
985        } else if (nQuints > 0) {
986            SkASSERT(nTrits == 0);
987            vals[i] = unquantize_quint_color(vals[i], nBits);
988        } else {
989            SkASSERT(nQuints == 0 && nTrits == 0);
990            vals[i] = unquantize_bits_color(vals[i], nBits);
991        }
992    }
993}
994
995// Returns an interpolated value between c0 and c1 based on the weight. This
996// follows the algorithm laid out in section C.2.19 of the ASTC spec.
997static int interpolate_channel(int c0, int c1, int weight) {
998    SkASSERT(0 <= c0 && c0 < 256);
999    SkASSERT(0 <= c1 && c1 < 256);
1000
1001    c0 = (c0 << 8) | c0;
1002    c1 = (c1 << 8) | c1;
1003
1004    const int result = ((c0*(64 - weight) + c1*weight + 32) / 64) >> 8;
1005
1006    if (result > 255) {
1007        return 255;
1008    }
1009
1010    SkASSERT(result >= 0);
1011    return result;
1012}
1013
1014// Returns an interpolated color between the two endpoints based on the weight.
1015static SkColor interpolate_endpoints(const SkColor endpoints[2], int weight) {
1016    return SkColorSetARGB(
1017        interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight),
1018        interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight),
1019        interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight),
1020        interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight));
1021}
1022
1023// Returns an interpolated color between the two endpoints based on the weight.
1024// It uses separate weights for the channel depending on the value of the 'plane'
1025// variable. By default, all channels will use weight 0, and the value of plane
1026// means that weight1 will be used for:
1027// 0: red
1028// 1: green
1029// 2: blue
1030// 3: alpha
1031static SkColor interpolate_dual_endpoints(
1032    const SkColor endpoints[2], int weight0, int weight1, int plane) {
1033    int a = interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight0);
1034    int r = interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight0);
1035    int g = interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight0);
1036    int b = interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight0);
1037
1038    switch (plane) {
1039
1040        case 0:
1041            r = interpolate_channel(
1042                SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight1);
1043            break;
1044
1045        case 1:
1046            g = interpolate_channel(
1047                SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight1);
1048            break;
1049
1050        case 2:
1051            b = interpolate_channel(
1052                SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight1);
1053            break;
1054
1055        case 3:
1056            a = interpolate_channel(
1057                SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight1);
1058            break;
1059
1060        default:
1061            SkDEBUGFAIL("Plane should be 0-3");
1062            break;
1063    }
1064
1065    return SkColorSetARGB(a, r, g, b);
1066}
1067
1068// A struct of decoded values that we use to carry around information
1069// about the block. dimX and dimY are the dimension in texels of the block,
1070// for which there is only a limited subset of valid values:
1071//
1072// 4x4, 5x4, 5x5, 6x5, 6x6, 8x5, 8x6, 8x8, 10x5, 10x6, 10x8, 10x10, 12x10, 12x12
1073
1074struct ASTCDecompressionData {
1075    ASTCDecompressionData(int dimX, int dimY) : fDimX(dimX), fDimY(dimY) { }
1076    const int   fDimX;      // the X dimension of the decompressed block
1077    const int   fDimY;      // the Y dimension of the decompressed block
1078    ASTCBlock   fBlock;     // the block data
1079    int         fBlockMode; // the block header that contains the block mode.
1080
1081    bool fDualPlaneEnabled; // is this block compressing dual weight planes?
1082    int  fDualPlane;        // the independent plane in dual plane mode.
1083
1084    bool fVoidExtent;       // is this block a single color?
1085    bool fError;            // does this block have an error encoding?
1086
1087    int  fWeightDimX;       // the x dimension of the weight grid
1088    int  fWeightDimY;       // the y dimension of the weight grid
1089
1090    int  fWeightBits;       // the number of bits used for each weight value
1091    int  fWeightTrits;      // the number of trits used for each weight value
1092    int  fWeightQuints;     // the number of quints used for each weight value
1093
1094    int  fPartCount;        // the number of partitions in this block
1095    int  fPartIndex;        // the partition index: only relevant if fPartCount > 0
1096
1097    // CEM values can be anything in the range 0-15, and each corresponds to a different
1098    // mode that represents the color data. We only support LDR modes.
1099    enum ColorEndpointMode {
1100        kLDR_Luminance_Direct_ColorEndpointMode          = 0,
1101        kLDR_Luminance_BaseOffset_ColorEndpointMode      = 1,
1102        kHDR_Luminance_LargeRange_ColorEndpointMode      = 2,
1103        kHDR_Luminance_SmallRange_ColorEndpointMode      = 3,
1104        kLDR_LuminanceAlpha_Direct_ColorEndpointMode     = 4,
1105        kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode = 5,
1106        kLDR_RGB_BaseScale_ColorEndpointMode             = 6,
1107        kHDR_RGB_BaseScale_ColorEndpointMode             = 7,
1108        kLDR_RGB_Direct_ColorEndpointMode                = 8,
1109        kLDR_RGB_BaseOffset_ColorEndpointMode            = 9,
1110        kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode    = 10,
1111        kHDR_RGB_ColorEndpointMode                       = 11,
1112        kLDR_RGBA_Direct_ColorEndpointMode               = 12,
1113        kLDR_RGBA_BaseOffset_ColorEndpointMode           = 13,
1114        kHDR_RGB_LDRAlpha_ColorEndpointMode              = 14,
1115        kHDR_RGB_HDRAlpha_ColorEndpointMode              = 15
1116    };
1117    static const int kMaxColorEndpointModes = 16;
1118
1119    // the color endpoint modes for this block.
1120    static const int kMaxPartitions = 4;
1121    ColorEndpointMode fCEM[kMaxPartitions];
1122
1123    int  fColorStartBit;    // The bit position of the first bit of the color data
1124    int  fColorEndBit;      // The bit position of the last *possible* bit of the color data
1125
1126    // Returns the number of partitions for this block.
1127    int numPartitions() const {
1128        return fPartCount;
1129    }
1130
1131    // Returns the total number of weight values that are stored in this block
1132    int numWeights() const {
1133        return fWeightDimX * fWeightDimY * (fDualPlaneEnabled ? 2 : 1);
1134    }
1135
1136#ifdef SK_DEBUG
1137    // Returns the maximum value that any weight can take. We really only use
1138    // this function for debugging.
1139    int maxWeightValue() const {
1140        int maxVal = (1 << fWeightBits);
1141        if (fWeightTrits > 0) {
1142            SkASSERT(0 == fWeightQuints);
1143            maxVal *= 3;
1144        } else if (fWeightQuints > 0) {
1145            SkASSERT(0 == fWeightTrits);
1146            maxVal *= 5;
1147        }
1148        return maxVal - 1;
1149    }
1150#endif
1151
1152    // The number of bits needed to represent the texel weight data. This
1153    // comes from the 'data size determination' section of the ASTC spec (C.2.22)
1154    int numWeightBits() const {
1155        const int nWeights = this->numWeights();
1156        return
1157            ((nWeights*8*fWeightTrits + 4) / 5) +
1158            ((nWeights*7*fWeightQuints + 2) / 3) +
1159            (nWeights*fWeightBits);
1160    }
1161
1162    // Returns the number of color values stored in this block. The number of
1163    // values stored is directly a function of the color endpoint modes.
1164    int numColorValues() const {
1165        int numValues = 0;
1166        for (int i = 0; i < this->numPartitions(); ++i) {
1167            int cemInt = static_cast<int>(fCEM[i]);
1168            numValues += ((cemInt >> 2) + 1) * 2;
1169        }
1170
1171        return numValues;
1172    }
1173
1174    // Figures out the number of bits available for color values, and fills
1175    // in the maximum encoding that will fit the number of color values that
1176    // we need. Returns false on error. (See section C.2.22 of the spec)
1177    bool getColorValueEncoding(int *nBits, int *nTrits, int *nQuints) const {
1178        if (NULL == nBits || NULL == nTrits || NULL == nQuints) {
1179            return false;
1180        }
1181
1182        const int nColorVals = this->numColorValues();
1183        if (nColorVals <= 0) {
1184            return false;
1185        }
1186
1187        const int colorBits = fColorEndBit - fColorStartBit;
1188        SkASSERT(colorBits > 0);
1189
1190        // This is the minimum amount of accuracy required by the spec.
1191        if (colorBits < ((13 * nColorVals + 4) / 5)) {
1192            return false;
1193        }
1194
1195        // Values can be represented as at most 8-bit values.
1196        // !SPEED! place this in a lookup table based on colorBits and nColorVals
1197        for (int i = 255; i > 0; --i) {
1198            int range = i + 1;
1199            int bits = 0, trits = 0, quints = 0;
1200            bool valid = false;
1201            if (SkIsPow2(range)) {
1202                bits = bits_for_range(range);
1203                valid = true;
1204            } else if ((range % 3) == 0 && SkIsPow2(range/3)) {
1205                trits = 1;
1206                bits = bits_for_range(range/3);
1207                valid = true;
1208            } else if ((range % 5) == 0 && SkIsPow2(range/5)) {
1209                quints = 1;
1210                bits = bits_for_range(range/5);
1211                valid = true;
1212            }
1213
1214            if (valid) {
1215                const int actualColorBits =
1216                    ((nColorVals*8*trits + 4) / 5) +
1217                    ((nColorVals*7*quints + 2) / 3) +
1218                    (nColorVals*bits);
1219                if (actualColorBits <= colorBits) {
1220                    *nTrits = trits;
1221                    *nQuints = quints;
1222                    *nBits = bits;
1223                    return true;
1224                }
1225            }
1226        }
1227
1228        return false;
1229    }
1230
1231    // Converts the sequence of color values into endpoints. The algorithm here
1232    // corresponds to the values determined by section C.2.14 of the ASTC spec
1233    void colorEndpoints(SkColor endpoints[4][2], const int* colorValues) const {
1234        for (int i = 0; i < this->numPartitions(); ++i) {
1235            switch (fCEM[i]) {
1236                case kLDR_Luminance_Direct_ColorEndpointMode: {
1237                    const int* v = colorValues;
1238                    endpoints[i][0] = SkColorSetARGB(0xFF, v[0], v[0], v[0]);
1239                    endpoints[i][1] = SkColorSetARGB(0xFF, v[1], v[1], v[1]);
1240
1241                    colorValues += 2;
1242                }
1243                break;
1244
1245                case kLDR_Luminance_BaseOffset_ColorEndpointMode: {
1246                    const int* v = colorValues;
1247                    const int L0 = (v[0] >> 2) | (v[1] & 0xC0);
1248                    const int L1 = clamp_byte(L0 + (v[1] & 0x3F));
1249
1250                    endpoints[i][0] = SkColorSetARGB(0xFF, L0, L0, L0);
1251                    endpoints[i][1] = SkColorSetARGB(0xFF, L1, L1, L1);
1252
1253                    colorValues += 2;
1254                }
1255                break;
1256
1257                case kLDR_LuminanceAlpha_Direct_ColorEndpointMode: {
1258                    const int* v = colorValues;
1259
1260                    endpoints[i][0] = SkColorSetARGB(v[2], v[0], v[0], v[0]);
1261                    endpoints[i][1] = SkColorSetARGB(v[3], v[1], v[1], v[1]);
1262
1263                    colorValues += 4;
1264                }
1265                break;
1266
1267                case kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode: {
1268                    int v0 = colorValues[0];
1269                    int v1 = colorValues[1];
1270                    int v2 = colorValues[2];
1271                    int v3 = colorValues[3];
1272
1273                    bit_transfer_signed(&v1, &v0);
1274                    bit_transfer_signed(&v3, &v2);
1275
1276                    endpoints[i][0] = SkColorSetARGB(v2, v0, v0, v0);
1277                    endpoints[i][1] = SkColorSetARGB(
1278                        clamp_byte(v3+v2),
1279                        clamp_byte(v1+v0),
1280                        clamp_byte(v1+v0),
1281                        clamp_byte(v1+v0));
1282
1283                    colorValues += 4;
1284                }
1285                break;
1286
1287                case kLDR_RGB_BaseScale_ColorEndpointMode: {
1288                    decode_rgba_basescale(colorValues, endpoints[i], true);
1289                    colorValues += 4;
1290                }
1291                break;
1292
1293                case kLDR_RGB_Direct_ColorEndpointMode: {
1294                    decode_rgba_direct(colorValues, endpoints[i], true);
1295                    colorValues += 6;
1296                }
1297                break;
1298
1299                case kLDR_RGB_BaseOffset_ColorEndpointMode: {
1300                    decode_rgba_baseoffset(colorValues, endpoints[i], true);
1301                    colorValues += 6;
1302                }
1303                break;
1304
1305                case kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode: {
1306                    decode_rgba_basescale(colorValues, endpoints[i], false);
1307                    colorValues += 6;
1308                }
1309                break;
1310
1311                case kLDR_RGBA_Direct_ColorEndpointMode: {
1312                    decode_rgba_direct(colorValues, endpoints[i], false);
1313                    colorValues += 8;
1314                }
1315                break;
1316
1317                case kLDR_RGBA_BaseOffset_ColorEndpointMode: {
1318                    decode_rgba_baseoffset(colorValues, endpoints[i], false);
1319                    colorValues += 8;
1320                }
1321                break;
1322
1323                default:
1324                    SkDEBUGFAIL("HDR mode unsupported! This should be caught sooner.");
1325                    break;
1326            }
1327        }
1328    }
1329
1330    // Follows the procedure from section C.2.17 of the ASTC specification
1331    int unquantizeWeight(int x) const {
1332        SkASSERT(x <= this->maxWeightValue());
1333
1334        const int D = (x >> fWeightBits) & 0x7;
1335        const int A = -(x & 0x1) & 0x7F;
1336
1337        SkTBits<int> xbits(x);
1338
1339        int T = 0;
1340        if (fWeightTrits > 0) {
1341            SkASSERT(0 == fWeightQuints);
1342            switch (fWeightBits) {
1343                case 0: {
1344                    // x is a single trit
1345                    SkASSERT(x < 3);
1346
1347                    static const int kUnquantizationTable[3] = { 0, 32, 63 };
1348                    T = kUnquantizationTable[x];
1349                }
1350                break;
1351
1352                case 1: {
1353                    const int B = 0;
1354                    const int C = 50;
1355                    T = unquantize_value(0x20, A, B, C, D);
1356                }
1357                break;
1358
1359                case 2: {
1360                    const int b = xbits[1];
1361                    const int B = b | (b << 2) | (b << 6);
1362                    const int C = 23;
1363                    T = unquantize_value(0x20, A, B, C, D);
1364                }
1365                break;
1366
1367                case 3: {
1368                    const int cb = xbits(2, 1);
1369                    const int B = cb | (cb << 5);
1370                    const int C = 11;
1371                    T = unquantize_value(0x20, A, B, C, D);
1372                }
1373                break;
1374
1375                default:
1376                    SkDEBUGFAIL("Too many bits for trit encoding");
1377                    break;
1378            }
1379
1380        } else if (fWeightQuints > 0) {
1381            SkASSERT(0 == fWeightTrits);
1382            switch (fWeightBits) {
1383                case 0: {
1384                    // x is a single quint
1385                    SkASSERT(x < 5);
1386
1387                    static const int kUnquantizationTable[5] = { 0, 16, 32, 47, 63 };
1388                    T = kUnquantizationTable[x];
1389                }
1390                break;
1391
1392                case 1: {
1393                    const int B = 0;
1394                    const int C = 28;
1395                    T = unquantize_value(0x20, A, B, C, D);
1396                }
1397                break;
1398
1399                case 2: {
1400                    const int b = xbits[1];
1401                    const int B = (b << 1) | (b << 6);
1402                    const int C = 13;
1403                    T = unquantize_value(0x20, A, B, C, D);
1404                }
1405                break;
1406
1407                default:
1408                    SkDEBUGFAIL("Too many bits for quint encoding");
1409                    break;
1410            }
1411        } else {
1412            SkASSERT(0 == fWeightTrits);
1413            SkASSERT(0 == fWeightQuints);
1414
1415            T = replicate_bits(x, fWeightBits, 6);
1416        }
1417
1418        // This should bring the value within [0, 63]..
1419        SkASSERT(T <= 63);
1420
1421        if (T > 32) {
1422            T += 1;
1423        }
1424
1425        SkASSERT(T <= 64);
1426
1427        return T;
1428    }
1429
1430    // Returns the weight at the associated index. If the index is out of bounds, it
1431    // returns zero. It also chooses the weight appropriately based on the given dual
1432    // plane.
1433    int getWeight(const int* unquantizedWeights, int idx, bool dualPlane) const {
1434        const int maxIdx = (fDualPlaneEnabled ? 2 : 1) * fWeightDimX * fWeightDimY - 1;
1435        if (fDualPlaneEnabled) {
1436            const int effectiveIdx = 2*idx + (dualPlane ? 1 : 0);
1437            if (effectiveIdx > maxIdx) {
1438                return 0;
1439            }
1440            return unquantizedWeights[effectiveIdx];
1441        }
1442
1443        SkASSERT(!dualPlane);
1444
1445        if (idx > maxIdx) {
1446            return 0;
1447        } else {
1448            return unquantizedWeights[idx];
1449        }
1450    }
1451
1452    // This computes the effective weight at location (s, t) of the block. This
1453    // weight is computed by sampling the texel weight grid (it's usually not 1-1), and
1454    // then applying a bilerp. The algorithm outlined here follows the algorithm
1455    // defined in section C.2.18 of the ASTC spec.
1456    int infillWeight(const int* unquantizedValues, int s, int t, bool dualPlane) const {
1457        const int Ds = (1024 + fDimX/2) / (fDimX - 1);
1458        const int Dt = (1024 + fDimY/2) / (fDimY - 1);
1459
1460        const int cs = Ds * s;
1461        const int ct = Dt * t;
1462
1463        const int gs = (cs*(fWeightDimX - 1) + 32) >> 6;
1464        const int gt = (ct*(fWeightDimY - 1) + 32) >> 6;
1465
1466        const int js = gs >> 4;
1467        const int jt = gt >> 4;
1468
1469        const int fs = gs & 0xF;
1470        const int ft = gt & 0xF;
1471
1472        const int idx = js + jt*fWeightDimX;
1473        const int p00 = this->getWeight(unquantizedValues, idx, dualPlane);
1474        const int p01 = this->getWeight(unquantizedValues, idx + 1, dualPlane);
1475        const int p10 = this->getWeight(unquantizedValues, idx + fWeightDimX, dualPlane);
1476        const int p11 = this->getWeight(unquantizedValues, idx + fWeightDimX + 1, dualPlane);
1477
1478        const int w11 = (fs*ft + 8) >> 4;
1479        const int w10 = ft - w11;
1480        const int w01 = fs - w11;
1481        const int w00 = 16 - fs - ft + w11;
1482
1483        const int weight = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1484        SkASSERT(weight <= 64);
1485        return weight;
1486    }
1487
1488    // Unquantizes the decoded texel weights as described in section C.2.17 of
1489    // the ASTC specification. Additionally, it populates texelWeights with
1490    // the expanded weight grid, which is computed according to section C.2.18
1491    void texelWeights(int texelWeights[2][12][12], const int* texelValues) const {
1492        // Unquantized texel weights...
1493        int unquantizedValues[144*2]; // 12x12 blocks with dual plane decoding...
1494        SkASSERT(this->numWeights() <= 144*2);
1495
1496        // Unquantize the weights and cache them
1497        for (int j = 0; j < this->numWeights(); ++j) {
1498            unquantizedValues[j] = this->unquantizeWeight(texelValues[j]);
1499        }
1500
1501        // Do weight infill...
1502        for (int y = 0; y < fDimY; ++y) {
1503            for (int x = 0; x < fDimX; ++x) {
1504                texelWeights[0][x][y] = this->infillWeight(unquantizedValues, x, y, false);
1505                if (fDualPlaneEnabled) {
1506                    texelWeights[1][x][y] = this->infillWeight(unquantizedValues, x, y, true);
1507                }
1508            }
1509        }
1510    }
1511
1512    // Returns the partition for the texel located at position (x, y).
1513    // Adapted from C.2.21 of the ASTC specification
1514    int getPartition(int x, int y) const {
1515        const int partitionCount = this->numPartitions();
1516        int seed = fPartIndex;
1517        if ((fDimX * fDimY) < 31) {
1518            x <<= 1;
1519            y <<= 1;
1520        }
1521
1522        seed += (partitionCount - 1) * 1024;
1523
1524        uint32_t p = seed;
1525        p ^= p >> 15;  p -= p << 17;  p += p << 7; p += p <<  4;
1526        p ^= p >>  5;  p += p << 16;  p ^= p >> 7; p ^= p >> 3;
1527        p ^= p <<  6;  p ^= p >> 17;
1528
1529        uint32_t rnum = p;
1530        uint8_t seed1  =  rnum        & 0xF;
1531        uint8_t seed2  = (rnum >>  4) & 0xF;
1532        uint8_t seed3  = (rnum >>  8) & 0xF;
1533        uint8_t seed4  = (rnum >> 12) & 0xF;
1534        uint8_t seed5  = (rnum >> 16) & 0xF;
1535        uint8_t seed6  = (rnum >> 20) & 0xF;
1536        uint8_t seed7  = (rnum >> 24) & 0xF;
1537        uint8_t seed8  = (rnum >> 28) & 0xF;
1538        uint8_t seed9  = (rnum >> 18) & 0xF;
1539        uint8_t seed10 = (rnum >> 22) & 0xF;
1540        uint8_t seed11 = (rnum >> 26) & 0xF;
1541        uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
1542
1543        seed1 *= seed1;     seed2 *= seed2;
1544        seed3 *= seed3;     seed4 *= seed4;
1545        seed5 *= seed5;     seed6 *= seed6;
1546        seed7 *= seed7;     seed8 *= seed8;
1547        seed9 *= seed9;     seed10 *= seed10;
1548        seed11 *= seed11;   seed12 *= seed12;
1549
1550        int sh1, sh2, sh3;
1551        if (0 != (seed & 1)) {
1552            sh1 = (0 != (seed & 2))? 4 : 5;
1553            sh2 = (partitionCount == 3)? 6 : 5;
1554        } else {
1555            sh1 = (partitionCount==3)? 6 : 5;
1556            sh2 = (0 != (seed & 2))? 4 : 5;
1557        }
1558        sh3 = (0 != (seed & 0x10))? sh1 : sh2;
1559
1560        seed1 >>= sh1; seed2  >>= sh2; seed3  >>= sh1; seed4  >>= sh2;
1561        seed5 >>= sh1; seed6  >>= sh2; seed7  >>= sh1; seed8  >>= sh2;
1562        seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
1563
1564        const int z = 0;
1565        int a = seed1*x + seed2*y + seed11*z + (rnum >> 14);
1566        int b = seed3*x + seed4*y + seed12*z + (rnum >> 10);
1567        int c = seed5*x + seed6*y + seed9 *z + (rnum >>  6);
1568        int d = seed7*x + seed8*y + seed10*z + (rnum >>  2);
1569
1570        a &= 0x3F;
1571        b &= 0x3F;
1572        c &= 0x3F;
1573        d &= 0x3F;
1574
1575        if (partitionCount < 4) {
1576            d = 0;
1577        }
1578
1579        if (partitionCount < 3) {
1580            c = 0;
1581        }
1582
1583        if (a >= b && a >= c && a >= d) {
1584            return 0;
1585        } else if (b >= c && b >= d) {
1586            return 1;
1587        } else if (c >= d) {
1588            return 2;
1589        } else {
1590            return 3;
1591        }
1592    }
1593
1594    // Performs the proper interpolation of the texel based on the
1595    // endpoints and weights.
1596    SkColor getTexel(const SkColor endpoints[4][2],
1597                     const int weights[2][12][12],
1598                     int x, int y) const {
1599        int part = 0;
1600        if (this->numPartitions() > 1) {
1601            part = this->getPartition(x, y);
1602        }
1603
1604        SkColor result;
1605        if (fDualPlaneEnabled) {
1606            result = interpolate_dual_endpoints(
1607                endpoints[part], weights[0][x][y], weights[1][x][y], fDualPlane);
1608        } else {
1609            result = interpolate_endpoints(endpoints[part], weights[0][x][y]);
1610        }
1611
1612#if 1
1613        // !FIXME! if we're writing directly to a bitmap, then we don't need
1614        // to swap the red and blue channels, but since we're usually being used
1615        // by the SkImageDecoder_astc module, the results are expected to be in RGBA.
1616        result = SkColorSetARGB(
1617            SkColorGetA(result), SkColorGetB(result), SkColorGetG(result), SkColorGetR(result));
1618#endif
1619
1620        return result;
1621    }
1622
1623    void decode() {
1624        // First decode the block mode.
1625        this->decodeBlockMode();
1626
1627        // Now we can decode the partition information.
1628        fPartIndex = static_cast<int>(read_astc_bits(fBlock, 11, 23));
1629        fPartCount = (fPartIndex & 0x3) + 1;
1630        fPartIndex >>= 2;
1631
1632        // This is illegal
1633        if (fDualPlaneEnabled && this->numPartitions() == 4) {
1634            fError = true;
1635            return;
1636        }
1637
1638        // Based on the partition info, we can decode the color information.
1639        this->decodeColorData();
1640    }
1641
1642    // Decodes the dual plane based on the given bit location. The final
1643    // location, if the dual plane is enabled, is also the end of our color data.
1644    // This function is only meant to be used from this->decodeColorData()
1645    void decodeDualPlane(int bitLoc) {
1646        if (fDualPlaneEnabled) {
1647            fDualPlane = static_cast<int>(read_astc_bits(fBlock, bitLoc - 2, bitLoc));
1648            fColorEndBit = bitLoc - 2;
1649        } else {
1650            fColorEndBit = bitLoc;
1651        }
1652    }
1653
1654    // Decodes the color information based on the ASTC spec.
1655    void decodeColorData() {
1656
1657        // By default, the last color bit is at the end of the texel weights
1658        const int lastWeight = 128 - this->numWeightBits();
1659
1660        // If we have a dual plane then it will be at this location, too.
1661        int dualPlaneBitLoc = lastWeight;
1662
1663        // If there's only one partition, then our job is (relatively) easy.
1664        if (this->numPartitions() == 1) {
1665            fCEM[0] = static_cast<ColorEndpointMode>(read_astc_bits(fBlock, 13, 17));
1666            fColorStartBit = 17;
1667
1668            // Handle dual plane mode...
1669            this->decodeDualPlane(dualPlaneBitLoc);
1670
1671            return;
1672        }
1673
1674        // If we have more than one partition, then we need to make
1675        // room for the partition index.
1676        fColorStartBit = 29;
1677
1678        // Read the base CEM. If it's zero, then we have no additional
1679        // CEM data and the endpoints for each partition share the same CEM.
1680        const int baseCEM = static_cast<int>(read_astc_bits(fBlock, 23, 25));
1681        if (0 == baseCEM) {
1682
1683            const ColorEndpointMode sameCEM =
1684                static_cast<ColorEndpointMode>(read_astc_bits(fBlock, 25, 29));
1685
1686            for (int i = 0; i < kMaxPartitions; ++i) {
1687                fCEM[i] = sameCEM;
1688            }
1689
1690            // Handle dual plane mode...
1691            this->decodeDualPlane(dualPlaneBitLoc);
1692
1693            return;
1694        }
1695
1696        // Move the dual plane selector bits down based on how many
1697        // partitions the block contains.
1698        switch (this->numPartitions()) {
1699            case 2:
1700                dualPlaneBitLoc -= 2;
1701                break;
1702
1703            case 3:
1704                dualPlaneBitLoc -= 5;
1705                break;
1706
1707            case 4:
1708                dualPlaneBitLoc -= 8;
1709                break;
1710
1711            default:
1712                SkDEBUGFAIL("Internal ASTC decoding error.");
1713                break;
1714        }
1715
1716        // The rest of the CEM config will be between the dual plane bit selector
1717        // and the texel weight grid.
1718        const int lowCEM = static_cast<int>(read_astc_bits(fBlock, 23, 29));
1719        SkASSERT(lastWeight >= dualPlaneBitLoc);
1720        SkASSERT(lastWeight - dualPlaneBitLoc < 31);
1721        int fullCEM = static_cast<int>(read_astc_bits(fBlock, dualPlaneBitLoc, lastWeight));
1722
1723        // Attach the config at the end of the weight grid to the CEM values
1724        // in the beginning of the block.
1725        fullCEM = (fullCEM << 6) | lowCEM;
1726
1727        // Ignore the two least significant bits, since those are our baseCEM above.
1728        fullCEM = fullCEM >> 2;
1729
1730        int C[kMaxPartitions]; // Next, decode C and M from the spec (Table C.2.12)
1731        for (int i = 0; i < this->numPartitions(); ++i) {
1732            C[i] = fullCEM & 1;
1733            fullCEM = fullCEM >> 1;
1734        }
1735
1736        int M[kMaxPartitions];
1737        for (int i = 0; i < this->numPartitions(); ++i) {
1738            M[i] = fullCEM & 0x3;
1739            fullCEM = fullCEM >> 2;
1740        }
1741
1742        // Construct our CEMs..
1743        SkASSERT(baseCEM > 0);
1744        for (int i = 0; i < this->numPartitions(); ++i) {
1745            int cem = (baseCEM - 1) * 4;
1746            cem += (0 == C[i])? 0 : 4;
1747            cem += M[i];
1748
1749            SkASSERT(cem < 16);
1750            fCEM[i] = static_cast<ColorEndpointMode>(cem);
1751        }
1752
1753        // Finally, if we have dual plane mode, then read the plane selector.
1754        this->decodeDualPlane(dualPlaneBitLoc);
1755    }
1756
1757    // Decodes the block mode. This function determines whether or not we use
1758    // dual plane encoding, the size of the texel weight grid, and the number of
1759    // bits, trits and quints that are used to encode it. For more information,
1760    // see section C.2.10 of the ASTC spec.
1761    //
1762    // For 2D blocks, the Block Mode field is laid out as follows:
1763    //
1764    // -------------------------------------------------------------------------
1765    // 10  9   8   7   6   5   4   3   2   1   0   Width Height Notes
1766    // -------------------------------------------------------------------------
1767    // D   H     B       A     R0  0   0   R2  R1  B+4   A+2
1768    // D   H     B       A     R0  0   1   R2  R1  B+8   A+2
1769    // D   H     B       A     R0  1   0   R2  R1  A+2   B+8
1770    // D   H   0   B     A     R0  1   1   R2  R1  A+2   B+6
1771    // D   H   1   B     A     R0  1   1   R2  R1  B+2   A+2
1772    // D   H   0   0     A     R0  R2  R1  0   0   12    A+2
1773    // D   H   0   1     A     R0  R2  R1  0   0   A+2   12
1774    // D   H   1   1   0   0   R0  R2  R1  0   0   6     10
1775    // D   H   1   1   0   1   R0  R2  R1  0   0   10    6
1776    //   B     1   0     A     R0  R2  R1  0   0   A+6   B+6   D=0, H=0
1777    // x   x   1   1   1   1   1   1   1   0   0   -     -     Void-extent
1778    // x   x   1   1   1   x   x   x   x   0   0   -     -     Reserved*
1779    // x   x   x   x   x   x   x   0   0   0   0   -     -     Reserved
1780    // -------------------------------------------------------------------------
1781    //
1782    // D - dual plane enabled
1783    // H, R - used to determine the number of bits/trits/quints in texel weight encoding
1784    //        R is a three bit value whose LSB is R0 and MSB is R1
1785    // Width, Height - dimensions of the texel weight grid (determined by A and B)
1786
1787    void decodeBlockMode() {
1788        const int blockMode = static_cast<int>(read_astc_bits(fBlock, 0, 11));
1789
1790        // Check for special void extent encoding
1791        fVoidExtent = (blockMode & 0x1FF) == 0x1FC;
1792
1793        // Check for reserved block modes
1794        fError = ((blockMode & 0x1C3) == 0x1C0) || ((blockMode & 0xF) == 0);
1795
1796        // Neither reserved nor void-extent, decode as usual
1797        // This code corresponds to table C.2.8 of the ASTC spec
1798        bool highPrecision = false;
1799        int R = 0;
1800        if ((blockMode & 0x3) == 0) {
1801            R = ((0xC & blockMode) >> 1) | ((0x10 & blockMode) >> 4);
1802            const int bitsSevenAndEight = (blockMode & 0x180) >> 7;
1803            SkASSERT(0 <= bitsSevenAndEight && bitsSevenAndEight < 4);
1804
1805            const int A = (blockMode >> 5) & 0x3;
1806            const int B = (blockMode >> 9) & 0x3;
1807
1808            fDualPlaneEnabled = (blockMode >> 10) & 0x1;
1809            highPrecision = (blockMode >> 9) & 0x1;
1810
1811            switch (bitsSevenAndEight) {
1812                default:
1813                case 0:
1814                    fWeightDimX = 12;
1815                    fWeightDimY = A + 2;
1816                    break;
1817
1818                case 1:
1819                    fWeightDimX = A + 2;
1820                    fWeightDimY = 12;
1821                    break;
1822
1823                case 2:
1824                    fWeightDimX = A + 6;
1825                    fWeightDimY = B + 6;
1826                    fDualPlaneEnabled = false;
1827                    highPrecision = false;
1828                    break;
1829
1830                case 3:
1831                    if (0 == A) {
1832                        fWeightDimX = 6;
1833                        fWeightDimY = 10;
1834                    } else {
1835                        fWeightDimX = 10;
1836                        fWeightDimY = 6;
1837                    }
1838                    break;
1839            }
1840        } else { // (blockMode & 0x3) != 0
1841            R = ((blockMode & 0x3) << 1) | ((blockMode & 0x10) >> 4);
1842
1843            const int bitsTwoAndThree = (blockMode >> 2) & 0x3;
1844            SkASSERT(0 <= bitsTwoAndThree && bitsTwoAndThree < 4);
1845
1846            const int A = (blockMode >> 5) & 0x3;
1847            const int B = (blockMode >> 7) & 0x3;
1848
1849            fDualPlaneEnabled = (blockMode >> 10) & 0x1;
1850            highPrecision = (blockMode >> 9) & 0x1;
1851
1852            switch (bitsTwoAndThree) {
1853                case 0:
1854                    fWeightDimX = B + 4;
1855                    fWeightDimY = A + 2;
1856                    break;
1857                case 1:
1858                    fWeightDimX = B + 8;
1859                    fWeightDimY = A + 2;
1860                    break;
1861                case 2:
1862                    fWeightDimX = A + 2;
1863                    fWeightDimY = B + 8;
1864                    break;
1865                case 3:
1866                    if ((B & 0x2) == 0) {
1867                        fWeightDimX = A + 2;
1868                        fWeightDimY = (B & 1) + 6;
1869                    } else {
1870                        fWeightDimX = (B & 1) + 2;
1871                        fWeightDimY = A + 2;
1872                    }
1873                    break;
1874            }
1875        }
1876
1877        // We should have set the values of R and highPrecision
1878        // from decoding the block mode, these are used to determine
1879        // the proper dimensions of our weight grid.
1880        if ((R & 0x6) == 0) {
1881            fError = true;
1882        } else {
1883            static const int kBitAllocationTable[2][6][3] = {
1884                {
1885                    {  1, 0, 0 },
1886                    {  0, 1, 0 },
1887                    {  2, 0, 0 },
1888                    {  0, 0, 1 },
1889                    {  1, 1, 0 },
1890                    {  3, 0, 0 }
1891                },
1892                {
1893                    {  1, 0, 1 },
1894                    {  2, 1, 0 },
1895                    {  4, 0, 0 },
1896                    {  2, 0, 1 },
1897                    {  3, 1, 0 },
1898                    {  5, 0, 0 }
1899                }
1900            };
1901
1902            fWeightBits = kBitAllocationTable[highPrecision][R - 2][0];
1903            fWeightTrits = kBitAllocationTable[highPrecision][R - 2][1];
1904            fWeightQuints = kBitAllocationTable[highPrecision][R - 2][2];
1905        }
1906    }
1907};
1908
1909// Reads an ASTC block from the given pointer.
1910static inline void read_astc_block(ASTCDecompressionData *dst, const uint8_t* src) {
1911    const uint64_t* qword = reinterpret_cast<const uint64_t*>(src);
1912    dst->fBlock.fLow = SkEndian_SwapLE64(qword[0]);
1913    dst->fBlock.fHigh = SkEndian_SwapLE64(qword[1]);
1914    dst->decode();
1915}
1916
1917// Take a known void-extent block, and write out the values as a constant color.
1918static void decompress_void_extent(uint8_t* dst, int dstRowBytes,
1919                                   const ASTCDecompressionData &data) {
1920    // The top 64 bits contain 4 16-bit RGBA values.
1921    int a = (static_cast<int>(read_astc_bits(data.fBlock, 112, 128)) + 255) >> 8;
1922    int b = (static_cast<int>(read_astc_bits(data.fBlock, 96, 112)) + 255) >> 8;
1923    int g = (static_cast<int>(read_astc_bits(data.fBlock, 80, 96)) + 255) >> 8;
1924    int r = (static_cast<int>(read_astc_bits(data.fBlock, 64, 80)) + 255) >> 8;
1925
1926    write_constant_color(dst, data.fDimX, data.fDimY, dstRowBytes, SkColorSetARGB(a, r, g, b));
1927}
1928
1929// Decompresses a single ASTC block. It's assumed that data.fDimX and data.fDimY are
1930// set and that the block has already been decoded (i.e. data.decode() has been called)
1931static void decompress_astc_block(uint8_t* dst, int dstRowBytes,
1932                                  const ASTCDecompressionData &data) {
1933    if (data.fError) {
1934        write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1935        return;
1936    }
1937
1938    if (data.fVoidExtent) {
1939        decompress_void_extent(dst, dstRowBytes, data);
1940        return;
1941    }
1942
1943    // According to the spec, any more than 64 values is illegal. (C.2.24)
1944    static const int kMaxTexelValues = 64;
1945
1946    // Decode the texel weights.
1947    int texelValues[kMaxTexelValues];
1948    bool success = decode_integer_sequence(
1949        texelValues, kMaxTexelValues, data.numWeights(),
1950        // texel data goes to the end of the 128 bit block.
1951        data.fBlock, 128, 128 - data.numWeightBits(), false,
1952        data.fWeightBits, data.fWeightTrits, data.fWeightQuints);
1953
1954    if (!success) {
1955        write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1956        return;
1957    }
1958
1959    // Decode the color endpoints
1960    int colorBits, colorTrits, colorQuints;
1961    if (!data.getColorValueEncoding(&colorBits, &colorTrits, &colorQuints)) {
1962        write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1963        return;
1964    }
1965
1966    // According to the spec, any more than 18 color values is illegal. (C.2.24)
1967    static const int kMaxColorValues = 18;
1968
1969    int colorValues[kMaxColorValues];
1970    success = decode_integer_sequence(
1971        colorValues, kMaxColorValues, data.numColorValues(),
1972        data.fBlock, data.fColorStartBit, data.fColorEndBit, true,
1973        colorBits, colorTrits, colorQuints);
1974
1975    if (!success) {
1976        write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1977        return;
1978    }
1979
1980    // Unquantize the color values after they've been decoded.
1981    unquantize_colors(colorValues, data.numColorValues(), colorBits, colorTrits, colorQuints);
1982
1983    // Decode the colors into the appropriate endpoints.
1984    SkColor endpoints[4][2];
1985    data.colorEndpoints(endpoints, colorValues);
1986
1987    // Do texel infill and decode the texel values.
1988    int texelWeights[2][12][12];
1989    data.texelWeights(texelWeights, texelValues);
1990
1991    // Write the texels by interpolating them based on the information
1992    // stored in the block.
1993    dst += data.fDimY * dstRowBytes;
1994    for (int y = 0; y < data.fDimY; ++y) {
1995        dst -= dstRowBytes;
1996        SkColor* colorPtr = reinterpret_cast<SkColor*>(dst);
1997        for (int x = 0; x < data.fDimX; ++x) {
1998            colorPtr[x] = data.getTexel(endpoints, texelWeights, x, y);
1999        }
2000    }
2001}
2002
2003////////////////////////////////////////////////////////////////////////////////
2004//
2005// ASTC Comrpession Struct
2006//
2007////////////////////////////////////////////////////////////////////////////////
2008
2009// This is the type passed as the CompressorType argument of the compressed
2010// blitter for the ASTC format. The static functions required to be in this
2011// struct are documented in SkTextureCompressor_Blitter.h
2012struct CompressorASTC {
2013    static inline void CompressA8Vertical(uint8_t* dst, const uint8_t* src) {
2014        compress_a8_astc_block<GetAlphaTranspose>(&dst, src, 12);
2015    }
2016
2017    static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
2018                                            int srcRowBytes) {
2019        compress_a8_astc_block<GetAlpha>(&dst, src, srcRowBytes);
2020    }
2021
2022#if PEDANTIC_BLIT_RECT
2023    static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
2024                                   const uint8_t* mask) {
2025        // TODO: krajcevski
2026        // This is kind of difficult for ASTC because the weight values are calculated
2027        // as an average of the actual weights. The best we can do is decompress the
2028        // weights and recalculate them based on the new texel values. This should
2029        // be "not too bad" since we know that anytime we hit this function, we're
2030        // compressing 12x12 block dimension alpha-only, and we know the layout
2031        // of the block
2032        SkFAIL("Implement me!");
2033    }
2034#endif
2035};
2036
2037////////////////////////////////////////////////////////////////////////////////
2038
2039namespace SkTextureCompressor {
2040
2041bool CompressA8To12x12ASTC(uint8_t* dst, const uint8_t* src,
2042                           int width, int height, int rowBytes) {
2043    if (width < 0 || ((width % 12) != 0) || height < 0 || ((height % 12) != 0)) {
2044        return false;
2045    }
2046
2047    uint8_t** dstPtr = &dst;
2048    for (int y = 0; y < height; y += 12) {
2049        for (int x = 0; x < width; x += 12) {
2050            compress_a8_astc_block<GetAlpha>(dstPtr, src + y*rowBytes + x, rowBytes);
2051        }
2052    }
2053
2054    return true;
2055}
2056
2057SkBlitter* CreateASTCBlitter(int width, int height, void* outputBuffer,
2058                             SkTBlitterAllocator* allocator) {
2059    if ((width % 12) != 0 || (height % 12) != 0) {
2060        return NULL;
2061    }
2062
2063    // Memset the output buffer to an encoding that decodes to zero. We must do this
2064    // in order to avoid having uninitialized values in the buffer if the blitter
2065    // decides not to write certain scanlines (and skip entire rows of blocks).
2066    // In the case of ASTC, if everything index is zero, then the interpolated value
2067    // will decode to zero provided we have the right header. We use the encoding
2068    // from recognizing all zero blocks from above.
2069    const int nBlocks = (width * height / 144);
2070    uint8_t *dst = reinterpret_cast<uint8_t *>(outputBuffer);
2071    for (int i = 0; i < nBlocks; ++i) {
2072        send_packing(&dst, SkTEndian_SwapLE64(0x0000000001FE000173ULL), 0);
2073    }
2074
2075    return allocator->createT<
2076        SkTCompressedAlphaBlitter<12, 16, CompressorASTC>, int, int, void* >
2077        (width, height, outputBuffer);
2078}
2079
2080void DecompressASTC(uint8_t* dst, int dstRowBytes, const uint8_t* src,
2081                    int width, int height, int blockDimX, int blockDimY) {
2082    // ASTC is encoded in what they call "raster order", so that the first
2083    // block is the bottom-left block in the image, and the first pixel
2084    // is the bottom-left pixel of the image
2085    dst += height * dstRowBytes;
2086
2087    ASTCDecompressionData data(blockDimX, blockDimY);
2088    for (int y = 0; y < height; y += blockDimY) {
2089        dst -= blockDimY * dstRowBytes;
2090        SkColor *colorPtr = reinterpret_cast<SkColor*>(dst);
2091        for (int x = 0; x < width; x += blockDimX) {
2092            read_astc_block(&data, src);
2093            decompress_astc_block(reinterpret_cast<uint8_t*>(colorPtr + x), dstRowBytes, data);
2094
2095            // ASTC encoded blocks are 16 bytes (128 bits) large.
2096            src += 16;
2097        }
2098    }
2099}
2100
2101}  // SkTextureCompressor
2102