1/**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.  All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27
28/**
29 * Code to convert images from tiled to linear and back.
30 * XXX there are quite a few assumptions about color and z/stencil being
31 * 32bpp.
32 */
33
34
35#include "util/u_format.h"
36#include "util/u_memory.h"
37#include "lp_tile_soa.h"
38#include "lp_tile_image.h"
39
40
41#define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
42
43
44/**
45 * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
46 * at dst, with dst_stride words between rows.
47 */
48static void
49untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
50{
51   uint32_t *d0 = dst;
52   uint32_t *d1 = d0 + dst_stride;
53   uint32_t *d2 = d1 + dst_stride;
54   uint32_t *d3 = d2 + dst_stride;
55
56   d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
57   d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
58   d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
59   d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
60}
61
62
63
64/**
65 * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
66 * at dst, with dst_stride words between rows.
67 */
68static void
69untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
70{
71   uint16_t *d0 = dst;
72   uint16_t *d1 = d0 + dst_stride;
73   uint16_t *d2 = d1 + dst_stride;
74   uint16_t *d3 = d2 + dst_stride;
75
76   d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
77   d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
78   d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
79   d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
80}
81
82
83
84/**
85 * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
86 * layout (in which all 16 words are contiguous).
87 */
88static void
89tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
90{
91   const uint32_t *s0 = src;
92   const uint32_t *s1 = s0 + src_stride;
93   const uint32_t *s2 = s1 + src_stride;
94   const uint32_t *s3 = s2 + src_stride;
95
96   dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
97   dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
98   dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
99   dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
100}
101
102
103
104/**
105 * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
106 * layout (in which all 16 words are contiguous).
107 */
108static void
109tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
110{
111   const uint16_t *s0 = src;
112   const uint16_t *s1 = s0 + src_stride;
113   const uint16_t *s2 = s1 + src_stride;
114   const uint16_t *s3 = s2 + src_stride;
115
116   dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
117   dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
118   dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
119   dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
120}
121
122
123
124/**
125 * Convert a tiled image into a linear image.
126 * \param dst_stride  dest row stride in bytes
127 */
128void
129lp_tiled_to_linear(const void *src, void *dst,
130                   unsigned x, unsigned y,
131                   unsigned width, unsigned height,
132                   enum pipe_format format,
133                   unsigned dst_stride,
134                   unsigned tiles_per_row)
135{
136   assert(x % TILE_SIZE == 0);
137   assert(y % TILE_SIZE == 0);
138   /*assert(width % TILE_SIZE == 0);
139     assert(height % TILE_SIZE == 0);*/
140
141   /* Note that Z/stencil surfaces use a different tiling size than
142    * color surfaces.
143    */
144   if (util_format_is_depth_or_stencil(format)) {
145      const uint bpp = util_format_get_blocksize(format);
146      const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
147      const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
148      const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
149
150      dst_stride /= bpp;   /* convert from bytes to words */
151
152      if (bpp == 4) {
153         const uint32_t *src32 = (const uint32_t *) src;
154         uint32_t *dst32 = (uint32_t *) dst;
155         uint i, j;
156
157         for (j = 0; j < height; j += tile_h) {
158            for (i = 0; i < width; i += tile_w) {
159               /* compute offsets in 32-bit words */
160               uint ii = i + x, jj = j + y;
161               uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
162                  * (tile_w * tile_h);
163               uint dst_offset = jj * dst_stride + ii;
164               untile_4_4_uint32(src32 + src_offset,
165                                 dst32 + dst_offset,
166                                 dst_stride);
167            }
168         }
169      }
170      else {
171         const uint16_t *src16 = (const uint16_t *) src;
172         uint16_t *dst16 = (uint16_t *) dst;
173         uint i, j;
174
175         assert(bpp == 2);
176
177         for (j = 0; j < height; j += tile_h) {
178            for (i = 0; i < width; i += tile_w) {
179               /* compute offsets in 16-bit words */
180               uint ii = i + x, jj = j + y;
181               uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
182                  * (tile_w * tile_h);
183               uint dst_offset = jj * dst_stride + ii;
184               untile_4_4_uint16(src16 + src_offset,
185                                 dst16 + dst_offset,
186                                 dst_stride);
187            }
188         }
189      }
190   }
191   else {
192      /* color image */
193      const uint bpp = 4;
194      const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
195      const uint bytes_per_tile = tile_w * tile_h * bpp;
196      uint i, j;
197
198      for (j = 0; j < height; j += tile_h) {
199         for (i = 0; i < width; i += tile_w) {
200            uint ii = i + x, jj = j + y;
201            uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
202            uint byte_offset = tile_offset * bytes_per_tile;
203            const uint8_t *src_tile = (uint8_t *) src + byte_offset;
204
205            lp_tile_unswizzle_4ub(format,
206                              src_tile,
207                              dst, dst_stride,
208                              ii, jj);
209         }
210      }
211   }
212}
213
214
215/**
216 * Convert a linear image into a tiled image.
217 * \param src_stride  source row stride in bytes
218 */
219void
220lp_linear_to_tiled(const void *src, void *dst,
221                   unsigned x, unsigned y,
222                   unsigned width, unsigned height,
223                   enum pipe_format format,
224                   unsigned src_stride,
225                   unsigned tiles_per_row)
226{
227   assert(x % TILE_SIZE == 0);
228   assert(y % TILE_SIZE == 0);
229   /*
230   assert(width % TILE_SIZE == 0);
231   assert(height % TILE_SIZE == 0);
232   */
233
234   if (util_format_is_depth_or_stencil(format)) {
235      const uint bpp = util_format_get_blocksize(format);
236      const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
237      const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
238      const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
239
240      src_stride /= bpp;   /* convert from bytes to words */
241
242      if (bpp == 4) {
243         const uint32_t *src32 = (const uint32_t *) src;
244         uint32_t *dst32 = (uint32_t *) dst;
245         uint i, j;
246
247         for (j = 0; j < height; j += tile_h) {
248            for (i = 0; i < width; i += tile_w) {
249               /* compute offsets in 32-bit words */
250               uint ii = i + x, jj = j + y;
251               uint src_offset = jj * src_stride + ii;
252               uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
253                  * (tile_w * tile_h);
254               tile_4_4_uint32(src32 + src_offset,
255                               dst32 + dst_offset,
256                               src_stride);
257            }
258         }
259      }
260      else {
261         const uint16_t *src16 = (const uint16_t *) src;
262         uint16_t *dst16 = (uint16_t *) dst;
263         uint i, j;
264
265         assert(bpp == 2);
266
267         for (j = 0; j < height; j += tile_h) {
268            for (i = 0; i < width; i += tile_w) {
269               /* compute offsets in 16-bit words */
270               uint ii = i + x, jj = j + y;
271               uint src_offset = jj * src_stride + ii;
272               uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
273                  * (tile_w * tile_h);
274               tile_4_4_uint16(src16 + src_offset,
275                               dst16 + dst_offset,
276                               src_stride);
277            }
278         }
279      }
280   }
281   else {
282      const uint bpp = 4;
283      const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
284      const uint bytes_per_tile = tile_w * tile_h * bpp;
285      uint i, j;
286
287      for (j = 0; j < height; j += TILE_SIZE) {
288         for (i = 0; i < width; i += TILE_SIZE) {
289            uint ii = i + x, jj = j + y;
290            uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
291            uint byte_offset = tile_offset * bytes_per_tile;
292            uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
293
294            lp_tile_swizzle_4ub(format,
295                             dst_tile,
296                             src, src_stride,
297                             ii, jj);
298         }
299      }
300   }
301}
302
303
304/**
305 * For testing only.
306 */
307void
308test_tiled_linear_conversion(void *data,
309                             enum pipe_format format,
310                             unsigned width, unsigned height,
311                             unsigned stride)
312{
313   /* size in tiles */
314   unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
315   unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
316
317   uint8_t *tiled = MALLOC(wt * ht * TILE_SIZE * TILE_SIZE * 4);
318
319   /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
320
321   lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
322                      stride, wt);
323
324   lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
325                      stride, wt);
326
327   FREE(tiled);
328}
329
330