1/*
2 * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28#include "radeon_tile.h"
29
30#include <stdint.h>
31#include <string.h>
32
33#include "main/macros.h"
34#include "radeon_debug.h"
35
36#define MICRO_TILE_SIZE 32
37
38static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
39                                  void * const dst, unsigned dst_pitch,
40                                  unsigned width, unsigned height)
41{
42    unsigned row; /* current source row */
43    unsigned col; /* current source column */
44    unsigned k; /* number of processed tiles */
45    const unsigned tile_width = 8, tile_height = 4;
46    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
47
48    k = 0;
49    for (row = 0; row < height; row += tile_height)
50    {
51        for (col = 0; col < width; col += tile_width, ++k)
52        {
53            uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
54            uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
55                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
56            unsigned j;
57
58            for (j = 0; j < MIN2(tile_height, height - row); ++j)
59            {
60                unsigned columns = MIN2(tile_width, width - col);
61                memcpy(dst2, src2, columns * sizeof(uint8_t));
62                dst2 += tile_width;
63                src2 += src_pitch;
64            }
65        }
66    }
67}
68
69static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
70                                   void * const dst, unsigned dst_pitch,
71                                   unsigned width, unsigned height)
72{
73    unsigned row; /* current source row */
74    unsigned col; /* current source column */
75    unsigned k; /* number of processed tiles */
76    const unsigned tile_width = 4, tile_height = 4;
77    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
78
79    k = 0;
80    for (row = 0; row < height; row += tile_height)
81    {
82        for (col = 0; col < width; col += tile_width, ++k)
83        {
84            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
85            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
86                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
87            unsigned j;
88
89            for (j = 0; j < MIN2(tile_height, height - row); ++j)
90            {
91                unsigned columns = MIN2(tile_width, width - col);
92                memcpy(dst2, src2, columns * sizeof(uint16_t));
93                dst2 += tile_width;
94                src2 += src_pitch;
95            }
96        }
97    }
98}
99
100static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
101                                   void * const dst, unsigned dst_pitch,
102                                   unsigned width, unsigned height)
103{
104    unsigned row; /* current source row */
105    unsigned col; /* current source column */
106    unsigned k; /* number of processed tiles */
107    const unsigned tile_width = 8, tile_height = 2;
108    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
109
110    k = 0;
111    for (row = 0; row < height; row += tile_height)
112    {
113        for (col = 0; col < width; col += tile_width, ++k)
114        {
115            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
116            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
117                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
118            unsigned j;
119
120            for (j = 0; j < MIN2(tile_height, height - row); ++j)
121            {
122                unsigned columns = MIN2(tile_width, width - col);
123                memcpy(dst2, src2, columns * sizeof(uint16_t));
124                dst2 += tile_width;
125                src2 += src_pitch;
126            }
127        }
128    }
129}
130
131static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
132                                   void * const dst, unsigned dst_pitch,
133                                   unsigned width, unsigned height)
134{
135    unsigned row; /* current source row */
136    unsigned col; /* current source column */
137    unsigned k; /* number of processed tiles */
138    const unsigned tile_width = 4, tile_height = 2;
139    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
140
141    k = 0;
142    for (row = 0; row < height; row += tile_height)
143    {
144        for (col = 0; col < width; col += tile_width, ++k)
145        {
146            uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
147            uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
148                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
149            unsigned j;
150
151            for (j = 0; j < MIN2(tile_height, height - row); ++j)
152            {
153                unsigned columns = MIN2(tile_width, width - col);
154                memcpy(dst2, src2, columns * sizeof(uint32_t));
155                dst2 += tile_width;
156                src2 += src_pitch;
157            }
158        }
159    }
160}
161
162static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
163                                   void * const dst, unsigned dst_pitch,
164                                   unsigned width, unsigned height)
165{
166    unsigned row; /* current source row */
167    unsigned col; /* current source column */
168    unsigned k; /* number of processed tiles */
169    const unsigned tile_width = 2, tile_height = 2;
170    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
171
172    k = 0;
173    for (row = 0; row < height; row += tile_height)
174    {
175        for (col = 0; col < width; col += tile_width, ++k)
176        {
177            uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
178            uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
179                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
180            unsigned j;
181
182            for (j = 0; j < MIN2(tile_height, height - row); ++j)
183            {
184                unsigned columns = MIN2(tile_width, width - col);
185                memcpy(dst2, src2, columns * sizeof(uint64_t));
186                dst2 += tile_width;
187                src2 += src_pitch;
188            }
189        }
190    }
191}
192
193static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
194                                    void * dst, unsigned dst_pitch,
195                                    unsigned width, unsigned height)
196{
197    unsigned i, j;
198    const unsigned elem_size = 16; /* sizeof(uint128_t) */
199
200    for (j = 0; j < height; ++j)
201    {
202        for (i = 0; i < width; ++i)
203        {
204            memcpy(dst, src, width * elem_size);
205            dst += dst_pitch * elem_size;
206            src += src_pitch * elem_size;
207        }
208    }
209}
210
211void tile_image(const void * src, unsigned src_pitch,
212                void *dst, unsigned dst_pitch,
213                gl_format format, unsigned width, unsigned height)
214{
215    assert(src_pitch >= width);
216    assert(dst_pitch >= width);
217
218    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
219                 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
220                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
221
222    switch (_mesa_get_format_bytes(format))
223    {
224        case 16:
225            micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
226            break;
227        case 8:
228            micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
229            break;
230        case 4:
231            micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
232            break;
233        case 2:
234            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
235            {
236                micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
237            }
238            else
239            {
240                micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
241            }
242            break;
243        case 1:
244            micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
245            break;
246        default:
247            assert(0);
248            break;
249    }
250}
251
252static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
253                                    void * const dst, unsigned dst_pitch,
254                                    unsigned width, unsigned height)
255{
256    unsigned row; /* current destination row */
257    unsigned col; /* current destination column */
258    unsigned k; /* current tile number */
259    const unsigned tile_width = 8, tile_height = 4;
260    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
261
262    assert(src_pitch % tile_width == 0);
263
264    k = 0;
265    for (row = 0; row < height; row += tile_height)
266    {
267        for (col = 0; col < width; col += tile_width, ++k)
268        {
269            uint8_t *src2 = (uint8_t *)src + row * src_pitch +
270                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
271            uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
272            unsigned j;
273
274            for (j = 0; j < MIN2(tile_height, height - row); ++j)
275            {
276                unsigned columns = MIN2(tile_width, width - col);
277                memcpy(dst2, src2, columns * sizeof(uint8_t));
278                dst2 += dst_pitch;
279                src2 += tile_width;
280            }
281        }
282    }
283}
284
285static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
286                                     void * const dst, unsigned dst_pitch,
287                                     unsigned width, unsigned height)
288{
289    unsigned row; /* current destination row */
290    unsigned col; /* current destination column */
291    unsigned k; /* current tile number */
292    const unsigned tile_width = 8, tile_height = 2;
293    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
294
295    assert(src_pitch % tile_width == 0);
296
297    k = 0;
298    for (row = 0; row < height; row += tile_height)
299    {
300        for (col = 0; col < width; col += tile_width, ++k)
301        {
302            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
303                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
304            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
305            unsigned j;
306
307            for (j = 0; j < MIN2(tile_height, height - row); ++j)
308            {
309                unsigned columns = MIN2(tile_width, width - col);
310                memcpy(dst2, src2, columns * sizeof(uint16_t));
311                dst2 += dst_pitch;
312                src2 += tile_width;
313            }
314        }
315    }
316}
317
318static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
319                                     void * const dst, unsigned dst_pitch,
320                                     unsigned width, unsigned height)
321{
322    unsigned row; /* current destination row */
323    unsigned col; /* current destination column */
324    unsigned k; /* current tile number */
325    const unsigned tile_width = 4, tile_height = 4;
326    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
327
328    assert(src_pitch % tile_width == 0);
329
330    k = 0;
331    for (row = 0; row < height; row += tile_height)
332    {
333        for (col = 0; col < width; col += tile_width, ++k)
334        {
335            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
336                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
337            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
338            unsigned j;
339
340            for (j = 0; j < MIN2(tile_height, height - row); ++j)
341            {
342                unsigned columns = MIN2(tile_width, width - col);
343                memcpy(dst2, src2, columns * sizeof(uint16_t));
344                dst2 += dst_pitch;
345                src2 += tile_width;
346            }
347        }
348    }
349}
350
351static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
352                                     void * const dst, unsigned dst_pitch,
353                                     unsigned width, unsigned height)
354{
355    unsigned row; /* current destination row */
356    unsigned col; /* current destination column */
357    unsigned k; /* current tile number */
358    const unsigned tile_width = 4, tile_height = 2;
359    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
360
361    assert(src_pitch % tile_width == 0);
362
363    k = 0;
364    for (row = 0; row < height; row += tile_height)
365    {
366        for (col = 0; col < width; col += tile_width, ++k)
367        {
368            uint32_t *src2 = (uint32_t *)src + row * src_pitch +
369                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
370            uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
371            unsigned j;
372
373            for (j = 0; j < MIN2(tile_height, height - row); ++j)
374            {
375                unsigned columns = MIN2(tile_width, width - col);
376                memcpy(dst2, src2, columns * sizeof(uint32_t));
377                dst2 += dst_pitch;
378                src2 += tile_width;
379            }
380        }
381    }
382}
383
384static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
385                                     void * const dst, unsigned dst_pitch,
386                                     unsigned width, unsigned height)
387{
388    unsigned row; /* current destination row */
389    unsigned col; /* current destination column */
390    unsigned k; /* current tile number */
391    const unsigned tile_width = 2, tile_height = 2;
392    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
393
394    assert(src_pitch % tile_width == 0);
395
396    k = 0;
397    for (row = 0; row < height; row += tile_height)
398    {
399        for (col = 0; col < width; col += tile_width, ++k)
400        {
401            uint64_t *src2 = (uint64_t *)src + row * src_pitch +
402                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
403            uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
404            unsigned j;
405
406            for (j = 0; j < MIN2(tile_height, height - row); ++j)
407            {
408                unsigned columns = MIN2(tile_width, width - col);
409                memcpy(dst2, src2, columns * sizeof(uint64_t));
410                dst2 += dst_pitch;
411                src2 += tile_width;
412            }
413        }
414    }
415}
416
417static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
418                                      void * dst, unsigned dst_pitch,
419                                      unsigned width, unsigned height)
420{
421    unsigned i, j;
422    const unsigned elem_size = 16; /* sizeof(uint128_t) */
423
424    for (j = 0; j < height; ++j)
425    {
426        for (i = 0; i < width; ++i)
427        {
428            memcpy(dst, src, width * elem_size);
429            dst += dst_pitch * elem_size;
430            src += src_pitch * elem_size;
431        }
432    }
433}
434
435void untile_image(const void * src, unsigned src_pitch,
436                  void *dst, unsigned dst_pitch,
437                  gl_format format, unsigned width, unsigned height)
438{
439    assert(src_pitch >= width);
440    assert(dst_pitch >= width);
441
442    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
443                 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
444                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
445
446    switch (_mesa_get_format_bytes(format))
447    {
448        case 16:
449            micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
450            break;
451        case 8:
452            micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
453            break;
454        case 4:
455            micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
456            break;
457        case 2:
458            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
459            {
460                micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
461            }
462            else
463            {
464                micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
465            }
466            break;
467        case 1:
468            micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
469            break;
470        default:
471            assert(0);
472            break;
473    }
474}
475
476void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height)
477{
478    switch (_mesa_get_format_bytes(format))
479    {
480        case 16:
481            *block_width = 1;
482            *block_height = 1;
483            break;
484        case 8:
485            *block_width = 2;
486            *block_height = 2;
487            break;
488        case 4:
489            *block_width = 4;
490            *block_height = 2;
491            break;
492        case 2:
493            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
494            {
495                *block_width = 4;
496                *block_height = 4;
497            }
498            else
499            {
500                *block_width = 8;
501                *block_height = 2;
502            }
503            break;
504        case 1:
505            *block_width = 8;
506            *block_height = 4;
507            break;
508        default:
509            assert(0);
510            break;
511    }
512}
513