1/*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file texcompress_bptc.c
26 * GL_ARB_texture_compression_bptc support.
27 */
28
29#include <stdbool.h>
30#include "texcompress.h"
31#include "texcompress_bptc.h"
32#include "util/format_srgb.h"
33#include "util/half_float.h"
34#include "texstore.h"
35#include "macros.h"
36#include "image.h"
37
38#define BLOCK_SIZE 4
39#define N_PARTITIONS 64
40#define BLOCK_BYTES 16
41
42struct bptc_unorm_mode {
43   int n_subsets;
44   int n_partition_bits;
45   bool has_rotation_bits;
46   bool has_index_selection_bit;
47   int n_color_bits;
48   int n_alpha_bits;
49   bool has_endpoint_pbits;
50   bool has_shared_pbits;
51   int n_index_bits;
52   int n_secondary_index_bits;
53};
54
55struct bptc_float_bitfield {
56   int8_t endpoint;
57   uint8_t component;
58   uint8_t offset;
59   uint8_t n_bits;
60   bool reverse;
61};
62
63struct bptc_float_mode {
64   bool reserved;
65   bool transformed_endpoints;
66   int n_partition_bits;
67   int n_endpoint_bits;
68   int n_index_bits;
69   int n_delta_bits[3];
70   struct bptc_float_bitfield bitfields[24];
71};
72
73struct bit_writer {
74   uint8_t buf;
75   int pos;
76   uint8_t *dst;
77};
78
79static const struct bptc_unorm_mode
80bptc_unorm_modes[] = {
81   /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
82   /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
83   /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
84   /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
85   /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
86   /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
87   /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
88   /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
89};
90
91static const struct bptc_float_mode
92bptc_float_modes[] = {
93   /* 00 */
94   { false, true, 5, 10, 3, { 5, 5, 5 },
95     { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
96       { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
97       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
98       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
99       { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
100       { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
101       { 3, 2, 3, 1, false },
102       { -1 } }
103   },
104   /* 01 */
105   { false, true, 5, 7, 3, { 6, 6, 6 },
106     { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
107       { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
108       { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
109       { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
110       { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
111       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
112       { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
113       { 2, 0, 0, 6, false },
114       { 3, 0, 0, 6, false },
115       { -1 } }
116   },
117   /* 00010 */
118   { false, true, 5, 11, 3, { 5, 4, 4 },
119     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
120       { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
121       { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
122       { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
123       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
124       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
125       { -1 } }
126   },
127   /* 00011 */
128   { false, false, 0, 10, 4, { 10, 10, 10 },
129     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
130       { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
131       { -1 } }
132   },
133   /* 00110 */
134   { false, true, 5, 11, 3, { 4, 5, 4 },
135     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
136       { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
137       { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
138       { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
139       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
140       { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
141       { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
142       { -1 } }
143   },
144   /* 00111 */
145   { false, true, 0, 11, 4, { 9, 9, 9 },
146     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
147       { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
148       { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
149       { -1 } }
150   },
151   /* 01010 */
152   { false, true, 5, 11, 3, { 4, 4, 5 },
153     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
154       { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
155       { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
156       { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
157       { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
158       { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
159       { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
160       { -1 } }
161   },
162   /* 01011 */
163   { false, true, 0, 12, 4, { 8, 8, 8 },
164     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
165       { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
166       { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
167       { -1 } }
168   },
169   /* 01110 */
170   { false, true, 5, 9, 3, { 5, 5, 5 },
171     { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
172       { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
173       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
174       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
175       { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
176       { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
177       { 3, 2, 3, 1, false },
178       { -1 } }
179   },
180   /* 01111 */
181   { false, true, 0, 16, 4, { 4, 4, 4 },
182     { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
183       { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
184       { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
185       { -1 } }
186   },
187   /* 10010 */
188   { false, true, 5, 8, 3, { 6, 5, 5 },
189     { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
190       { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
191       { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
192       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
193       { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
194       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
195       { 3, 0, 0, 6, false },
196       { -1 } }
197   },
198   /* 10011 */
199   { true /* reserved */ },
200   /* 10110 */
201   { false, true, 5, 8, 3, { 5, 6, 5 },
202     { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
203       { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
204       { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
205       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
206       { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
207       { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
208       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
209       { -1 } }
210   },
211   /* 10111 */
212   { true /* reserved */ },
213   /* 11010 */
214   { false, true, 5, 8, 3, { 5, 5, 6 },
215     { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
216       { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
217       { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
218       { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
219       { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
220       { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
221       { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
222       { -1 } }
223   },
224   /* 11011 */
225   { true /* reserved */ },
226   /* 11110 */
227   { false, false, 5, 6, 3, { 6, 6, 6 },
228     { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
229       { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
230       { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
231       { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
232       { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
233       { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
234       { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
235       { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
236       { -1 } }
237   },
238   /* 11111 */
239   { true /* reserved */ },
240};
241
242/* This partition table is used when the mode has two subsets. Each
243 * partition is represented by a 32-bit value which gives 2 bits per texel
244 * within the block. The value of the two bits represents which subset to use
245 * (0 or 1).
246 */
247static const uint32_t
248partition_table1[N_PARTITIONS] = {
249   0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
250   0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
251   0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
252   0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
253   0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
254   0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
255   0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
256   0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
257   0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
258   0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
259   0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
260   0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
261   0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
262   0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
263   0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
264   0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
265};
266
267/* This partition table is used when the mode has three subsets. In this case
268 * the values can be 0, 1 or 2.
269 */
270static const uint32_t
271partition_table2[N_PARTITIONS] = {
272   0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
273   0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
274   0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
275   0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
276   0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
277   0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
278   0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
279   0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
280   0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
281   0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
282   0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
283   0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
284   0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
285   0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
286   0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
287   0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
288};
289
290static const uint8_t
291anchor_indices[][N_PARTITIONS] = {
292   /* Anchor index values for the second subset of two-subset partitioning */
293   {
294      0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
295      0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
296      0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
297      0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
298   },
299
300   /* Anchor index values for the second subset of three-subset partitioning */
301   {
302      0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
303      0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
304      0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
305      0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
306   },
307
308   /* Anchor index values for the third subset of three-subset
309    * partitioning
310    */
311   {
312      0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
313      0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
314      0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
315      0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
316   }
317};
318
319static int
320extract_bits(const uint8_t *block,
321             int offset,
322             int n_bits)
323{
324   int byte_index = offset / 8;
325   int bit_index = offset % 8;
326   int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
327   int result = 0;
328   int bit = 0;
329
330   while (true) {
331      result |= ((block[byte_index] >> bit_index) &
332                 ((1 << n_bits_in_byte) - 1)) << bit;
333
334      n_bits -= n_bits_in_byte;
335
336      if (n_bits <= 0)
337         return result;
338
339      bit += n_bits_in_byte;
340      byte_index++;
341      bit_index = 0;
342      n_bits_in_byte = MIN2(n_bits, 8);
343   }
344}
345
346static uint8_t
347expand_component(uint8_t byte,
348                 int n_bits)
349{
350   /* Expands a n-bit quantity into a byte by copying the most-significant
351    * bits into the unused least-significant bits.
352    */
353   return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
354}
355
356static int
357extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
358                        const uint8_t *block,
359                        int bit_offset,
360                        uint8_t endpoints[][4])
361{
362   int component;
363   int subset;
364   int endpoint;
365   int pbit;
366   int n_components;
367
368   /* Extract each color component */
369   for (component = 0; component < 3; component++) {
370      for (subset = 0; subset < mode->n_subsets; subset++) {
371         for (endpoint = 0; endpoint < 2; endpoint++) {
372            endpoints[subset * 2 + endpoint][component] =
373               extract_bits(block, bit_offset, mode->n_color_bits);
374            bit_offset += mode->n_color_bits;
375         }
376      }
377   }
378
379   /* Extract the alpha values */
380   if (mode->n_alpha_bits > 0) {
381      for (subset = 0; subset < mode->n_subsets; subset++) {
382         for (endpoint = 0; endpoint < 2; endpoint++) {
383            endpoints[subset * 2 + endpoint][3] =
384               extract_bits(block, bit_offset, mode->n_alpha_bits);
385            bit_offset += mode->n_alpha_bits;
386         }
387      }
388
389      n_components = 4;
390   } else {
391      for (subset = 0; subset < mode->n_subsets; subset++)
392         for (endpoint = 0; endpoint < 2; endpoint++)
393            endpoints[subset * 2 + endpoint][3] = 255;
394
395      n_components = 3;
396   }
397
398   /* Add in the p-bits */
399   if (mode->has_endpoint_pbits) {
400      for (subset = 0; subset < mode->n_subsets; subset++) {
401         for (endpoint = 0; endpoint < 2; endpoint++) {
402            pbit = extract_bits(block, bit_offset, 1);
403            bit_offset += 1;
404
405            for (component = 0; component < n_components; component++) {
406               endpoints[subset * 2 + endpoint][component] <<= 1;
407               endpoints[subset * 2 + endpoint][component] |= pbit;
408            }
409         }
410      }
411   } else if (mode->has_shared_pbits) {
412      for (subset = 0; subset < mode->n_subsets; subset++) {
413         pbit = extract_bits(block, bit_offset, 1);
414         bit_offset += 1;
415
416         for (endpoint = 0; endpoint < 2; endpoint++) {
417            for (component = 0; component < n_components; component++) {
418               endpoints[subset * 2 + endpoint][component] <<= 1;
419               endpoints[subset * 2 + endpoint][component] |= pbit;
420            }
421         }
422      }
423   }
424
425   /* Expand the n-bit values to a byte */
426   for (subset = 0; subset < mode->n_subsets; subset++) {
427      for (endpoint = 0; endpoint < 2; endpoint++) {
428         for (component = 0; component < 3; component++) {
429            endpoints[subset * 2 + endpoint][component] =
430               expand_component(endpoints[subset * 2 + endpoint][component],
431                                mode->n_color_bits +
432                                mode->has_endpoint_pbits +
433                                mode->has_shared_pbits);
434         }
435
436         if (mode->n_alpha_bits > 0) {
437            endpoints[subset * 2 + endpoint][3] =
438               expand_component(endpoints[subset * 2 + endpoint][3],
439                                mode->n_alpha_bits +
440                                mode->has_endpoint_pbits +
441                                mode->has_shared_pbits);
442         }
443      }
444   }
445
446   return bit_offset;
447}
448
449static bool
450is_anchor(int n_subsets,
451          int partition_num,
452          int texel)
453{
454   if (texel == 0)
455      return true;
456
457   switch (n_subsets) {
458   case 1:
459      return false;
460   case 2:
461      return anchor_indices[0][partition_num] == texel;
462   case 3:
463      return (anchor_indices[1][partition_num] == texel ||
464              anchor_indices[2][partition_num] == texel);
465   default:
466      assert(false);
467      return false;
468   }
469}
470
471static int
472count_anchors_before_texel(int n_subsets,
473                           int partition_num,
474                           int texel)
475{
476   int count = 1;
477
478   if (texel == 0)
479      return 0;
480
481   switch (n_subsets) {
482   case 1:
483      break;
484   case 2:
485      if (texel > anchor_indices[0][partition_num])
486         count++;
487      break;
488   case 3:
489      if (texel > anchor_indices[1][partition_num])
490         count++;
491      if (texel > anchor_indices[2][partition_num])
492         count++;
493      break;
494   default:
495      assert(false);
496      return 0;
497   }
498
499   return count;
500}
501
502static int32_t
503interpolate(int32_t a, int32_t b,
504            int index,
505            int index_bits)
506{
507   static const uint8_t weights2[] = { 0, 21, 43, 64 };
508   static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
509   static const uint8_t weights4[] =
510      { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
511   static const uint8_t *weights[] = {
512      NULL, NULL, weights2, weights3, weights4
513   };
514   int weight;
515
516   weight = weights[index_bits][index];
517
518   return ((64 - weight) * a + weight * b + 32) >> 6;
519}
520
521static void
522apply_rotation(int rotation,
523               uint8_t *result)
524{
525   uint8_t t;
526
527   if (rotation == 0)
528      return;
529
530   rotation--;
531
532   t = result[rotation];
533   result[rotation] = result[3];
534   result[3] = t;
535}
536
537static void
538fetch_rgba_unorm_from_block(const uint8_t *block,
539                            uint8_t *result,
540                            int texel)
541{
542   int mode_num = ffs(block[0]);
543   const struct bptc_unorm_mode *mode;
544   int bit_offset, secondary_bit_offset;
545   int partition_num;
546   int subset_num;
547   int rotation;
548   int index_selection;
549   int index_bits;
550   int indices[2];
551   int index;
552   int anchors_before_texel;
553   bool anchor;
554   uint8_t endpoints[3 * 2][4];
555   uint32_t subsets;
556   int component;
557
558   if (mode_num == 0) {
559      /* According to the spec this mode is reserved and shouldn't be used. */
560      memset(result, 0, 3);
561      result[3] = 0xff;
562      return;
563   }
564
565   mode = bptc_unorm_modes + mode_num - 1;
566   bit_offset = mode_num;
567
568   partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
569   bit_offset += mode->n_partition_bits;
570
571   switch (mode->n_subsets) {
572   case 1:
573      subsets = 0;
574      break;
575   case 2:
576      subsets = partition_table1[partition_num];
577      break;
578   case 3:
579      subsets = partition_table2[partition_num];
580      break;
581   default:
582      assert(false);
583      return;
584   }
585
586   if (mode->has_rotation_bits) {
587      rotation = extract_bits(block, bit_offset, 2);
588      bit_offset += 2;
589   } else {
590      rotation = 0;
591   }
592
593   if (mode->has_index_selection_bit) {
594      index_selection = extract_bits(block, bit_offset, 1);
595      bit_offset++;
596   } else {
597      index_selection = 0;
598   }
599
600   bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
601
602   anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
603                                                     partition_num, texel);
604
605   /* Calculate the offset to the secondary index */
606   secondary_bit_offset = (bit_offset +
607                           BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
608                           mode->n_subsets +
609                           mode->n_secondary_index_bits * texel -
610                           anchors_before_texel);
611
612   /* Calculate the offset to the primary index for this texel */
613   bit_offset += mode->n_index_bits * texel - anchors_before_texel;
614
615   subset_num = (subsets >> (texel * 2)) & 3;
616
617   anchor = is_anchor(mode->n_subsets, partition_num, texel);
618
619   index_bits = mode->n_index_bits;
620   if (anchor)
621      index_bits--;
622   indices[0] = extract_bits(block, bit_offset, index_bits);
623
624   if (mode->n_secondary_index_bits) {
625      index_bits = mode->n_secondary_index_bits;
626      if (anchor)
627         index_bits--;
628      indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
629   }
630
631   index = indices[index_selection];
632   index_bits = (index_selection ?
633                 mode->n_secondary_index_bits :
634                 mode->n_index_bits);
635
636   for (component = 0; component < 3; component++)
637      result[component] = interpolate(endpoints[subset_num * 2][component],
638                                      endpoints[subset_num * 2 + 1][component],
639                                      index,
640                                      index_bits);
641
642   /* Alpha uses the opposite index from the color components */
643   if (mode->n_secondary_index_bits && !index_selection) {
644      index = indices[1];
645      index_bits = mode->n_secondary_index_bits;
646   } else {
647      index = indices[0];
648      index_bits = mode->n_index_bits;
649   }
650
651   result[3] = interpolate(endpoints[subset_num * 2][3],
652                           endpoints[subset_num * 2 + 1][3],
653                           index,
654                           index_bits);
655
656   apply_rotation(rotation, result);
657}
658
659static void
660fetch_bptc_rgba_unorm_bytes(const GLubyte *map,
661                            GLint rowStride, GLint i, GLint j,
662                            GLubyte *texel)
663{
664   const GLubyte *block;
665
666   block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
667
668   fetch_rgba_unorm_from_block(block, texel, (i % 4) + (j % 4) * 4);
669}
670
671static void
672fetch_bptc_rgba_unorm(const GLubyte *map,
673                      GLint rowStride, GLint i, GLint j,
674                      GLfloat *texel)
675{
676   GLubyte texel_bytes[4];
677
678   fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
679
680   texel[RCOMP] = UBYTE_TO_FLOAT(texel_bytes[0]);
681   texel[GCOMP] = UBYTE_TO_FLOAT(texel_bytes[1]);
682   texel[BCOMP] = UBYTE_TO_FLOAT(texel_bytes[2]);
683   texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
684}
685
686static void
687fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
688                            GLint rowStride, GLint i, GLint j,
689                            GLfloat *texel)
690{
691   GLubyte texel_bytes[4];
692
693   fetch_bptc_rgba_unorm_bytes(map, rowStride, i, j, texel_bytes);
694
695   texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[0]);
696   texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[1]);
697   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(texel_bytes[2]);
698   texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
699}
700
701static int32_t
702sign_extend(int32_t value,
703            int n_bits)
704{
705   if ((value & (1 << (n_bits - 1)))) {
706      value |= (~(int32_t) 0) << n_bits;
707   }
708
709   return value;
710}
711
712static int
713signed_unquantize(int value, int n_endpoint_bits)
714{
715   bool sign;
716
717   if (n_endpoint_bits >= 16)
718      return value;
719
720   if (value == 0)
721      return 0;
722
723   sign = false;
724
725   if (value < 0) {
726      sign = true;
727      value = -value;
728   }
729
730   if (value >= (1 << (n_endpoint_bits - 1)) - 1)
731      value = 0x7fff;
732   else
733      value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
734
735   if (sign)
736      value = -value;
737
738   return value;
739}
740
741static int
742unsigned_unquantize(int value, int n_endpoint_bits)
743{
744   if (n_endpoint_bits >= 15)
745      return value;
746
747   if (value == 0)
748      return 0;
749
750   if (value == (1 << n_endpoint_bits) - 1)
751      return 0xffff;
752
753   return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
754}
755
756static int
757extract_float_endpoints(const struct bptc_float_mode *mode,
758                        const uint8_t *block,
759                        int bit_offset,
760                        int32_t endpoints[][3],
761                        bool is_signed)
762{
763   const struct bptc_float_bitfield *bitfield;
764   int endpoint, component;
765   int n_endpoints;
766   int value;
767   int i;
768
769   if (mode->n_partition_bits)
770      n_endpoints = 4;
771   else
772      n_endpoints = 2;
773
774   memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
775
776   for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
777      value = extract_bits(block, bit_offset, bitfield->n_bits);
778      bit_offset += bitfield->n_bits;
779
780      if (bitfield->reverse) {
781         for (i = 0; i < bitfield->n_bits; i++) {
782            if (value & (1 << i))
783               endpoints[bitfield->endpoint][bitfield->component] |=
784                  1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
785         }
786      } else {
787         endpoints[bitfield->endpoint][bitfield->component] |=
788            value << bitfield->offset;
789      }
790   }
791
792   if (mode->transformed_endpoints) {
793      /* The endpoints are specified as signed offsets from e0 */
794      for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
795         for (component = 0; component < 3; component++) {
796            value = sign_extend(endpoints[endpoint][component],
797                                mode->n_delta_bits[component]);
798            endpoints[endpoint][component] =
799               ((endpoints[0][component] + value) &
800                ((1 << mode->n_endpoint_bits) - 1));
801         }
802      }
803   }
804
805   if (is_signed) {
806      for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
807         for (component = 0; component < 3; component++) {
808            value = sign_extend(endpoints[endpoint][component],
809                                mode->n_endpoint_bits);
810            endpoints[endpoint][component] =
811               signed_unquantize(value, mode->n_endpoint_bits);
812         }
813      }
814   } else {
815      for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
816         for (component = 0; component < 3; component++) {
817            endpoints[endpoint][component] =
818               unsigned_unquantize(endpoints[endpoint][component],
819                                   mode->n_endpoint_bits);
820         }
821      }
822   }
823
824   return bit_offset;
825}
826
827static int32_t
828finish_unsigned_unquantize(int32_t value)
829{
830   return value * 31 / 64;
831}
832
833static int32_t
834finish_signed_unquantize(int32_t value)
835{
836   if (value < 0)
837      return (-value * 31 / 32) | 0x8000;
838   else
839      return value * 31 / 32;
840}
841
842static void
843fetch_rgb_float_from_block(const uint8_t *block,
844                           float *result,
845                           int texel,
846                           bool is_signed)
847{
848   int mode_num;
849   const struct bptc_float_mode *mode;
850   int bit_offset;
851   int partition_num;
852   int subset_num;
853   int index_bits;
854   int index;
855   int anchors_before_texel;
856   int32_t endpoints[2 * 2][3];
857   uint32_t subsets;
858   int n_subsets;
859   int component;
860   int32_t value;
861
862   if (block[0] & 0x2) {
863      mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
864      bit_offset = 5;
865   } else {
866      mode_num = block[0] & 3;
867      bit_offset = 2;
868   }
869
870   mode = bptc_float_modes + mode_num;
871
872   if (mode->reserved) {
873      memset(result, 0, sizeof result[0] * 3);
874      result[3] = 1.0f;
875      return;
876   }
877
878   bit_offset = extract_float_endpoints(mode, block, bit_offset,
879                                        endpoints, is_signed);
880
881   if (mode->n_partition_bits) {
882      partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
883      bit_offset += mode->n_partition_bits;
884
885      subsets = partition_table1[partition_num];
886      n_subsets = 2;
887   } else {
888      partition_num = 0;
889      subsets = 0;
890      n_subsets = 1;
891   }
892
893   anchors_before_texel =
894      count_anchors_before_texel(n_subsets, partition_num, texel);
895
896   /* Calculate the offset to the primary index for this texel */
897   bit_offset += mode->n_index_bits * texel - anchors_before_texel;
898
899   subset_num = (subsets >> (texel * 2)) & 3;
900
901   index_bits = mode->n_index_bits;
902   if (is_anchor(n_subsets, partition_num, texel))
903      index_bits--;
904   index = extract_bits(block, bit_offset, index_bits);
905
906   for (component = 0; component < 3; component++) {
907      value = interpolate(endpoints[subset_num * 2][component],
908                          endpoints[subset_num * 2 + 1][component],
909                          index,
910                          mode->n_index_bits);
911
912      if (is_signed)
913         value = finish_signed_unquantize(value);
914      else
915         value = finish_unsigned_unquantize(value);
916
917      result[component] = _mesa_half_to_float(value);
918   }
919
920   result[3] = 1.0f;
921}
922
923static void
924fetch_bptc_rgb_float(const GLubyte *map,
925                     GLint rowStride, GLint i, GLint j,
926                     GLfloat *texel,
927                     bool is_signed)
928{
929   const GLubyte *block;
930
931   block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
932
933   fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
934}
935
936static void
937fetch_bptc_rgb_signed_float(const GLubyte *map,
938                            GLint rowStride, GLint i, GLint j,
939                            GLfloat *texel)
940{
941   fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
942}
943
944static void
945fetch_bptc_rgb_unsigned_float(const GLubyte *map,
946                              GLint rowStride, GLint i, GLint j,
947                              GLfloat *texel)
948{
949   fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
950}
951
952compressed_fetch_func
953_mesa_get_bptc_fetch_func(mesa_format format)
954{
955   switch (format) {
956   case MESA_FORMAT_BPTC_RGBA_UNORM:
957      return fetch_bptc_rgba_unorm;
958   case MESA_FORMAT_BPTC_SRGB_ALPHA_UNORM:
959      return fetch_bptc_srgb_alpha_unorm;
960   case MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT:
961      return fetch_bptc_rgb_signed_float;
962   case MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT:
963      return fetch_bptc_rgb_unsigned_float;
964   default:
965      return NULL;
966   }
967}
968
969static void
970write_bits(struct bit_writer *writer, int n_bits, int value)
971{
972   do {
973      if (n_bits + writer->pos >= 8) {
974         *(writer->dst++) = writer->buf | (value << writer->pos);
975         writer->buf = 0;
976         value >>= (8 - writer->pos);
977         n_bits -= (8 - writer->pos);
978         writer->pos = 0;
979      } else {
980         writer->buf |= value << writer->pos;
981         writer->pos += n_bits;
982         break;
983      }
984   } while (n_bits > 0);
985}
986
987static void
988get_average_luminance_alpha_unorm(int width, int height,
989                                  const uint8_t *src, int src_rowstride,
990                                  int *average_luminance, int *average_alpha)
991{
992   int luminance_sum = 0, alpha_sum = 0;
993   int y, x;
994
995   for (y = 0; y < height; y++) {
996      for (x = 0; x < width; x++) {
997         luminance_sum += src[0] + src[1] + src[2];
998         alpha_sum += src[3];
999         src += 4;
1000      }
1001      src += src_rowstride - width * 4;
1002   }
1003
1004   *average_luminance = luminance_sum / (width * height);
1005   *average_alpha = alpha_sum / (width * height);
1006}
1007
1008static void
1009get_rgba_endpoints_unorm(int width, int height,
1010                         const uint8_t *src, int src_rowstride,
1011                         int average_luminance, int average_alpha,
1012                         uint8_t endpoints[][4])
1013{
1014   int endpoint_luminances[2];
1015   int midpoint;
1016   int sums[2][4];
1017   int endpoint;
1018   int luminance;
1019   uint8_t temp[3];
1020   const uint8_t *p = src;
1021   int rgb_left_endpoint_count = 0;
1022   int alpha_left_endpoint_count = 0;
1023   int y, x, i;
1024
1025   memset(sums, 0, sizeof sums);
1026
1027   for (y = 0; y < height; y++) {
1028      for (x = 0; x < width; x++) {
1029         luminance = p[0] + p[1] + p[2];
1030         if (luminance < average_luminance) {
1031            endpoint = 0;
1032            rgb_left_endpoint_count++;
1033         } else {
1034            endpoint = 1;
1035         }
1036         for (i = 0; i < 3; i++)
1037            sums[endpoint][i] += p[i];
1038
1039         if (p[2] < average_alpha) {
1040            endpoint = 0;
1041            alpha_left_endpoint_count++;
1042         } else {
1043            endpoint = 1;
1044         }
1045         sums[endpoint][3] += p[3];
1046
1047         p += 4;
1048      }
1049
1050      p += src_rowstride - width * 4;
1051   }
1052
1053   if (rgb_left_endpoint_count == 0 ||
1054       rgb_left_endpoint_count == width * height) {
1055      for (i = 0; i < 3; i++)
1056         endpoints[0][i] = endpoints[1][i] =
1057            (sums[0][i] + sums[1][i]) / (width * height);
1058   } else {
1059      for (i = 0; i < 3; i++) {
1060         endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1061         endpoints[1][i] = (sums[1][i] /
1062                            (width * height - rgb_left_endpoint_count));
1063      }
1064   }
1065
1066   if (alpha_left_endpoint_count == 0 ||
1067       alpha_left_endpoint_count == width * height) {
1068      endpoints[0][3] = endpoints[1][3] =
1069         (sums[0][3] + sums[1][3]) / (width * height);
1070   } else {
1071         endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1072         endpoints[1][3] = (sums[1][3] /
1073                            (width * height - alpha_left_endpoint_count));
1074   }
1075
1076   /* We may need to swap the endpoints to ensure the most-significant bit of
1077    * the first index is zero */
1078
1079   for (endpoint = 0; endpoint < 2; endpoint++) {
1080      endpoint_luminances[endpoint] =
1081         endpoints[endpoint][0] +
1082         endpoints[endpoint][1] +
1083         endpoints[endpoint][2];
1084   }
1085   midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1086
1087   if ((src[0] + src[1] + src[2] <= midpoint) !=
1088       (endpoint_luminances[0] <= midpoint)) {
1089      memcpy(temp, endpoints[0], 3);
1090      memcpy(endpoints[0], endpoints[1], 3);
1091      memcpy(endpoints[1], temp, 3);
1092   }
1093
1094   /* Same for the alpha endpoints */
1095
1096   midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1097
1098   if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1099      temp[0] = endpoints[0][3];
1100      endpoints[0][3] = endpoints[1][3];
1101      endpoints[1][3] = temp[0];
1102   }
1103}
1104
1105static void
1106write_rgb_indices_unorm(struct bit_writer *writer,
1107                        int src_width, int src_height,
1108                        const uint8_t *src, int src_rowstride,
1109                        uint8_t endpoints[][4])
1110{
1111   int luminance;
1112   int endpoint_luminances[2];
1113   int endpoint;
1114   int index;
1115   int y, x;
1116
1117   for (endpoint = 0; endpoint < 2; endpoint++) {
1118      endpoint_luminances[endpoint] =
1119         endpoints[endpoint][0] +
1120         endpoints[endpoint][1] +
1121         endpoints[endpoint][2];
1122   }
1123
1124   /* If the endpoints have the same luminance then we'll just use index 0 for
1125    * all of the texels */
1126   if (endpoint_luminances[0] == endpoint_luminances[1]) {
1127      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1128      return;
1129   }
1130
1131   for (y = 0; y < src_height; y++) {
1132      for (x = 0; x < src_width; x++) {
1133         luminance = src[0] + src[1] + src[2];
1134
1135         index = ((luminance - endpoint_luminances[0]) * 3 /
1136                  (endpoint_luminances[1] - endpoint_luminances[0]));
1137         if (index < 0)
1138            index = 0;
1139         else if (index > 3)
1140            index = 3;
1141
1142         assert(x != 0 || y != 0 || index < 2);
1143
1144         write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1145
1146         src += 4;
1147      }
1148
1149      /* Pad the indices out to the block size */
1150      if (src_width < BLOCK_SIZE)
1151         write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1152
1153      src += src_rowstride - src_width * 4;
1154   }
1155
1156   /* Pad the indices out to the block size */
1157   if (src_height < BLOCK_SIZE)
1158      write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1159}
1160
1161static void
1162write_alpha_indices_unorm(struct bit_writer *writer,
1163                          int src_width, int src_height,
1164                          const uint8_t *src, int src_rowstride,
1165                          uint8_t endpoints[][4])
1166{
1167   int index;
1168   int y, x;
1169
1170   /* If the endpoints have the same alpha then we'll just use index 0 for
1171    * all of the texels */
1172   if (endpoints[0][3] == endpoints[1][3]) {
1173      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1174      return;
1175   }
1176
1177   for (y = 0; y < src_height; y++) {
1178      for (x = 0; x < src_width; x++) {
1179         index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1180                  ((int) endpoints[1][3] - endpoints[0][3]));
1181         if (index < 0)
1182            index = 0;
1183         else if (index > 7)
1184            index = 7;
1185
1186         assert(x != 0 || y != 0 || index < 4);
1187
1188         /* The first index has one less bit */
1189         write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1190
1191         src += 4;
1192      }
1193
1194      /* Pad the indices out to the block size */
1195      if (src_width < BLOCK_SIZE)
1196         write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1197
1198      src += src_rowstride - src_width * 4;
1199   }
1200
1201   /* Pad the indices out to the block size */
1202   if (src_height < BLOCK_SIZE)
1203      write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1204}
1205
1206static void
1207compress_rgba_unorm_block(int src_width, int src_height,
1208                          const uint8_t *src, int src_rowstride,
1209                          uint8_t *dst)
1210{
1211   int average_luminance, average_alpha;
1212   uint8_t endpoints[2][4];
1213   struct bit_writer writer;
1214   int component, endpoint;
1215
1216   get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1217                                     &average_luminance, &average_alpha);
1218   get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1219                            average_luminance, average_alpha,
1220                            endpoints);
1221
1222   writer.dst = dst;
1223   writer.pos = 0;
1224   writer.buf = 0;
1225
1226   write_bits(&writer, 5, 0x10); /* mode 4 */
1227   write_bits(&writer, 2, 0); /* rotation 0 */
1228   write_bits(&writer, 1, 0); /* index selection bit */
1229
1230   /* Write the color endpoints */
1231   for (component = 0; component < 3; component++)
1232      for (endpoint = 0; endpoint < 2; endpoint++)
1233         write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1234
1235   /* Write the alpha endpoints */
1236   for (endpoint = 0; endpoint < 2; endpoint++)
1237      write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1238
1239   write_rgb_indices_unorm(&writer,
1240                           src_width, src_height,
1241                           src, src_rowstride,
1242                           endpoints);
1243   write_alpha_indices_unorm(&writer,
1244                             src_width, src_height,
1245                             src, src_rowstride,
1246                             endpoints);
1247}
1248
1249static void
1250compress_rgba_unorm(int width, int height,
1251                    const uint8_t *src, int src_rowstride,
1252                    uint8_t *dst, int dst_rowstride)
1253{
1254   int dst_row_diff;
1255   int y, x;
1256
1257   if (dst_rowstride >= width * 4)
1258      dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1259   else
1260      dst_row_diff = 0;
1261
1262   for (y = 0; y < height; y += BLOCK_SIZE) {
1263      for (x = 0; x < width; x += BLOCK_SIZE) {
1264         compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1265                                   MIN2(height - y, BLOCK_SIZE),
1266                                   src + x * 4 + y * src_rowstride,
1267                                   src_rowstride,
1268                                   dst);
1269         dst += BLOCK_BYTES;
1270      }
1271      dst += dst_row_diff;
1272   }
1273}
1274
1275GLboolean
1276_mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
1277{
1278   const GLubyte *pixels;
1279   const GLubyte *tempImage = NULL;
1280   int rowstride;
1281
1282   if (srcFormat != GL_RGBA ||
1283       srcType != GL_UNSIGNED_BYTE ||
1284       ctx->_ImageTransferState ||
1285       srcPacking->SwapBytes) {
1286      /* convert image to RGBA/ubyte */
1287      GLubyte *tempImageSlices[1];
1288      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
1289      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
1290      if (!tempImage)
1291         return GL_FALSE; /* out of memory */
1292      tempImageSlices[0] = (GLubyte *) tempImage;
1293      _mesa_texstore(ctx, dims,
1294                     baseInternalFormat,
1295                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
1296                                           : MESA_FORMAT_A8B8G8R8_UNORM,
1297                     rgbaRowStride, tempImageSlices,
1298                     srcWidth, srcHeight, srcDepth,
1299                     srcFormat, srcType, srcAddr,
1300                     srcPacking);
1301
1302      pixels = tempImage;
1303      rowstride = srcWidth * 4;
1304   } else {
1305      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1306                                     srcFormat, srcType, 0, 0);
1307      rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1308                                         srcFormat, srcType);
1309   }
1310
1311   compress_rgba_unorm(srcWidth, srcHeight,
1312                       pixels, rowstride,
1313                       dstSlices[0], dstRowStride);
1314
1315   free((void *) tempImage);
1316
1317   return GL_TRUE;
1318}
1319
1320static float
1321get_average_luminance_float(int width, int height,
1322                            const float *src, int src_rowstride)
1323{
1324   float luminance_sum = 0;
1325   int y, x;
1326
1327   for (y = 0; y < height; y++) {
1328      for (x = 0; x < width; x++) {
1329         luminance_sum += src[0] + src[1] + src[2];
1330         src += 3;
1331      }
1332      src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1333   }
1334
1335   return luminance_sum / (width * height);
1336}
1337
1338static float
1339clamp_value(float value, bool is_signed)
1340{
1341   if (value > 65504.0f)
1342      return 65504.0f;
1343
1344   if (is_signed) {
1345      if (value < -65504.0f)
1346         return -65504.0f;
1347      else
1348         return value;
1349   }
1350
1351   if (value < 0.0f)
1352      return 0.0f;
1353
1354   return value;
1355}
1356
1357static void
1358get_endpoints_float(int width, int height,
1359                    const float *src, int src_rowstride,
1360                    float average_luminance, float endpoints[][3],
1361                    bool is_signed)
1362{
1363   float endpoint_luminances[2];
1364   float midpoint;
1365   float sums[2][3];
1366   int endpoint, component;
1367   float luminance;
1368   float temp[3];
1369   const float *p = src;
1370   int left_endpoint_count = 0;
1371   int y, x, i;
1372
1373   memset(sums, 0, sizeof sums);
1374
1375   for (y = 0; y < height; y++) {
1376      for (x = 0; x < width; x++) {
1377         luminance = p[0] + p[1] + p[2];
1378         if (luminance < average_luminance) {
1379            endpoint = 0;
1380            left_endpoint_count++;
1381         } else {
1382            endpoint = 1;
1383         }
1384         for (i = 0; i < 3; i++)
1385            sums[endpoint][i] += p[i];
1386
1387         p += 3;
1388      }
1389
1390      p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1391   }
1392
1393   if (left_endpoint_count == 0 ||
1394       left_endpoint_count == width * height) {
1395      for (i = 0; i < 3; i++)
1396         endpoints[0][i] = endpoints[1][i] =
1397            (sums[0][i] + sums[1][i]) / (width * height);
1398   } else {
1399      for (i = 0; i < 3; i++) {
1400         endpoints[0][i] = sums[0][i] / left_endpoint_count;
1401         endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1402      }
1403   }
1404
1405   /* Clamp the endpoints to the range of a half float and strip out
1406    * infinities */
1407   for (endpoint = 0; endpoint < 2; endpoint++) {
1408      for (component = 0; component < 3; component++) {
1409         endpoints[endpoint][component] =
1410            clamp_value(endpoints[endpoint][component], is_signed);
1411      }
1412   }
1413
1414   /* We may need to swap the endpoints to ensure the most-significant bit of
1415    * the first index is zero */
1416
1417   for (endpoint = 0; endpoint < 2; endpoint++) {
1418      endpoint_luminances[endpoint] =
1419         endpoints[endpoint][0] +
1420         endpoints[endpoint][1] +
1421         endpoints[endpoint][2];
1422   }
1423   midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1424
1425   if ((src[0] + src[1] + src[2] <= midpoint) !=
1426       (endpoint_luminances[0] <= midpoint)) {
1427      memcpy(temp, endpoints[0], sizeof temp);
1428      memcpy(endpoints[0], endpoints[1], sizeof temp);
1429      memcpy(endpoints[1], temp, sizeof temp);
1430   }
1431}
1432
1433static void
1434write_rgb_indices_float(struct bit_writer *writer,
1435                        int src_width, int src_height,
1436                        const float *src, int src_rowstride,
1437                        float endpoints[][3])
1438{
1439   float luminance;
1440   float endpoint_luminances[2];
1441   int endpoint;
1442   int index;
1443   int y, x;
1444
1445   for (endpoint = 0; endpoint < 2; endpoint++) {
1446      endpoint_luminances[endpoint] =
1447         endpoints[endpoint][0] +
1448         endpoints[endpoint][1] +
1449         endpoints[endpoint][2];
1450   }
1451
1452   /* If the endpoints have the same luminance then we'll just use index 0 for
1453    * all of the texels */
1454   if (endpoint_luminances[0] == endpoint_luminances[1]) {
1455      write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1456      return;
1457   }
1458
1459   for (y = 0; y < src_height; y++) {
1460      for (x = 0; x < src_width; x++) {
1461         luminance = src[0] + src[1] + src[2];
1462
1463         index = ((luminance - endpoint_luminances[0]) * 15 /
1464                  (endpoint_luminances[1] - endpoint_luminances[0]));
1465         if (index < 0)
1466            index = 0;
1467         else if (index > 15)
1468            index = 15;
1469
1470         assert(x != 0 || y != 0 || index < 8);
1471
1472         write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1473
1474         src += 3;
1475      }
1476
1477      /* Pad the indices out to the block size */
1478      if (src_width < BLOCK_SIZE)
1479         write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1480
1481      src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1482   }
1483
1484   /* Pad the indices out to the block size */
1485   if (src_height < BLOCK_SIZE)
1486      write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1487}
1488
1489static int
1490get_endpoint_value(float value, bool is_signed)
1491{
1492   bool sign = false;
1493   int half;
1494
1495   if (is_signed) {
1496      half = _mesa_float_to_half(value);
1497
1498      if (half & 0x8000) {
1499         half &= 0x7fff;
1500         sign = true;
1501      }
1502
1503      half = (32 * half / 31) >> 6;
1504
1505      if (sign)
1506         half = -half & ((1 << 10) - 1);
1507
1508      return half;
1509   } else {
1510      if (value <= 0.0f)
1511         return 0;
1512
1513      half = _mesa_float_to_half(value);
1514
1515      return (64 * half / 31) >> 6;
1516   }
1517}
1518
1519static void
1520compress_rgb_float_block(int src_width, int src_height,
1521                         const float *src, int src_rowstride,
1522                         uint8_t *dst,
1523                         bool is_signed)
1524{
1525   float average_luminance;
1526   float endpoints[2][3];
1527   struct bit_writer writer;
1528   int component, endpoint;
1529   int endpoint_value;
1530
1531   average_luminance =
1532      get_average_luminance_float(src_width, src_height, src, src_rowstride);
1533   get_endpoints_float(src_width, src_height, src, src_rowstride,
1534                       average_luminance, endpoints, is_signed);
1535
1536   writer.dst = dst;
1537   writer.pos = 0;
1538   writer.buf = 0;
1539
1540   write_bits(&writer, 5, 3); /* mode 3 */
1541
1542   /* Write the endpoints */
1543   for (endpoint = 0; endpoint < 2; endpoint++) {
1544      for (component = 0; component < 3; component++) {
1545         endpoint_value =
1546            get_endpoint_value(endpoints[endpoint][component], is_signed);
1547         write_bits(&writer, 10, endpoint_value);
1548      }
1549   }
1550
1551   write_rgb_indices_float(&writer,
1552                           src_width, src_height,
1553                           src, src_rowstride,
1554                           endpoints);
1555}
1556
1557static void
1558compress_rgb_float(int width, int height,
1559                   const float *src, int src_rowstride,
1560                   uint8_t *dst, int dst_rowstride,
1561                   bool is_signed)
1562{
1563   int dst_row_diff;
1564   int y, x;
1565
1566   if (dst_rowstride >= width * 4)
1567      dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1568   else
1569      dst_row_diff = 0;
1570
1571   for (y = 0; y < height; y += BLOCK_SIZE) {
1572      for (x = 0; x < width; x += BLOCK_SIZE) {
1573         compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1574                                  MIN2(height - y, BLOCK_SIZE),
1575                                  src + x * 3 +
1576                                  y * src_rowstride / sizeof (float),
1577                                  src_rowstride,
1578                                  dst,
1579                                  is_signed);
1580         dst += BLOCK_BYTES;
1581      }
1582      dst += dst_row_diff;
1583   }
1584}
1585
1586static GLboolean
1587texstore_bptc_rgb_float(TEXSTORE_PARAMS,
1588                        bool is_signed)
1589{
1590   const float *pixels;
1591   const float *tempImage = NULL;
1592   int rowstride;
1593
1594   if (srcFormat != GL_RGB ||
1595       srcType != GL_FLOAT ||
1596       ctx->_ImageTransferState ||
1597       srcPacking->SwapBytes) {
1598      /* convert image to RGB/float */
1599      GLfloat *tempImageSlices[1];
1600      int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
1601      tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
1602      if (!tempImage)
1603         return GL_FALSE; /* out of memory */
1604      tempImageSlices[0] = (GLfloat *) tempImage;
1605      _mesa_texstore(ctx, dims,
1606                     baseInternalFormat,
1607                     MESA_FORMAT_RGB_FLOAT32,
1608                     rgbRowStride, (GLubyte **)tempImageSlices,
1609                     srcWidth, srcHeight, srcDepth,
1610                     srcFormat, srcType, srcAddr,
1611                     srcPacking);
1612
1613      pixels = tempImage;
1614      rowstride = srcWidth * sizeof(float) * 3;
1615   } else {
1616      pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
1617                                     srcFormat, srcType, 0, 0);
1618      rowstride = _mesa_image_row_stride(srcPacking, srcWidth,
1619                                         srcFormat, srcType);
1620   }
1621
1622   compress_rgb_float(srcWidth, srcHeight,
1623                      pixels, rowstride,
1624                      dstSlices[0], dstRowStride,
1625                      is_signed);
1626
1627   free((void *) tempImage);
1628
1629   return GL_TRUE;
1630}
1631
1632GLboolean
1633_mesa_texstore_bptc_rgb_signed_float(TEXSTORE_PARAMS)
1634{
1635   assert(dstFormat == MESA_FORMAT_BPTC_RGB_SIGNED_FLOAT);
1636
1637   return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1638                                  dstFormat, dstRowStride, dstSlices,
1639                                  srcWidth, srcHeight, srcDepth,
1640                                  srcFormat, srcType,
1641                                  srcAddr, srcPacking,
1642                                  true /* signed */);
1643}
1644
1645GLboolean
1646_mesa_texstore_bptc_rgb_unsigned_float(TEXSTORE_PARAMS)
1647{
1648   assert(dstFormat == MESA_FORMAT_BPTC_RGB_UNSIGNED_FLOAT);
1649
1650   return texstore_bptc_rgb_float(ctx, dims, baseInternalFormat,
1651                                  dstFormat, dstRowStride, dstSlices,
1652                                  srcWidth, srcHeight, srcDepth,
1653                                  srcFormat, srcType,
1654                                  srcAddr, srcPacking,
1655                                  false /* unsigned */);
1656}
1657