lp_bld_format_soa.c revision eb20c57f03f7f6a43dedb9c317f3648087e6d1d7
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include "pipe/p_defines.h"
30
31#include "util/u_format.h"
32#include "util/u_memory.h"
33#include "util/u_string.h"
34
35#include "lp_bld_type.h"
36#include "lp_bld_const.h"
37#include "lp_bld_conv.h"
38#include "lp_bld_swizzle.h"
39#include "lp_bld_gather.h"
40#include "lp_bld_format.h"
41
42
43void
44lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
45                            struct lp_build_context *bld,
46                            const LLVMValueRef *unswizzled,
47                            LLVMValueRef swizzled_out[4])
48{
49   assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
50   assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
51
52   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
53      /*
54       * Return zzz1 for depth-stencil formats.
55       *
56       * XXX: Allow to control the depth swizzle with an additional parameter,
57       * as the caller may wish another depth swizzle, or retain the stencil
58       * value.
59       */
60      enum util_format_swizzle swizzle = format_desc->swizzle[0];
61      LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
62      swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
63      swizzled_out[3] = bld->one;
64   }
65   else {
66      unsigned chan;
67      for (chan = 0; chan < 4; ++chan) {
68         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
69         swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
70      }
71   }
72}
73
74
75/**
76 * Unpack several pixels in SoA.
77 *
78 * It takes a vector of packed pixels:
79 *
80 *   packed = {P0, P1, P2, P3, ..., Pn}
81 *
82 * And will produce four vectors:
83 *
84 *   red    = {R0, R1, R2, R3, ..., Rn}
85 *   green  = {G0, G1, G2, G3, ..., Gn}
86 *   blue   = {B0, B1, B2, B3, ..., Bn}
87 *   alpha  = {A0, A1, A2, A3, ..., An}
88 *
89 * It requires that a packed pixel fits into an element of the output
90 * channels. The common case is when converting pixel with a depth of 32 bit or
91 * less into floats.
92 *
93 * \param format_desc  the format of the 'packed' incoming pixel vector
94 * \param type  the desired type for rgba_out (type.length = n, above)
95 * \param packed  the incoming vector of packed pixels
96 * \param rgba_out  returns the SoA R,G,B,A vectors
97 */
98void
99lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
100                         const struct util_format_description *format_desc,
101                         struct lp_type type,
102                         LLVMValueRef packed,
103                         LLVMValueRef rgba_out[4])
104{
105   struct lp_build_context bld;
106   LLVMValueRef inputs[4];
107   unsigned start;
108   unsigned chan;
109
110   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
111   assert(format_desc->block.width == 1);
112   assert(format_desc->block.height == 1);
113   assert(format_desc->block.bits <= type.width);
114   /* FIXME: Support more output types */
115   assert(type.floating);
116   assert(type.width == 32);
117
118   lp_build_context_init(&bld, builder, type);
119
120   /* Decode the input vector components */
121   start = 0;
122   for (chan = 0; chan < format_desc->nr_channels; ++chan) {
123      const unsigned width = format_desc->channel[chan].size;
124      const unsigned stop = start + width;
125      LLVMValueRef input;
126
127      input = packed;
128
129      switch(format_desc->channel[chan].type) {
130      case UTIL_FORMAT_TYPE_VOID:
131         input = lp_build_undef(type);
132         break;
133
134      case UTIL_FORMAT_TYPE_UNSIGNED:
135         /*
136          * Align the LSB
137          */
138
139         if (start) {
140            input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
141         }
142
143         /*
144          * Zero the MSBs
145          */
146
147         if (stop < format_desc->block.bits) {
148            unsigned mask = ((unsigned long long)1 << width) - 1;
149            input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
150         }
151
152         /*
153          * Type conversion
154          */
155
156         if (type.floating) {
157            if(format_desc->channel[chan].normalized)
158               input = lp_build_unsigned_norm_to_float(builder, width, type, input);
159            else
160               input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
161         }
162         else {
163            /* FIXME */
164            assert(0);
165            input = lp_build_undef(type);
166         }
167
168         break;
169
170      case UTIL_FORMAT_TYPE_SIGNED:
171         /*
172          * Align the sign bit first.
173          */
174
175         if (stop < type.width) {
176            unsigned bits = type.width - stop;
177            LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
178            input = LLVMBuildShl(builder, input, bits_val, "");
179         }
180
181         /*
182          * Align the LSB (with an arithmetic shift to preserve the sign)
183          */
184
185         if (format_desc->channel[chan].size < type.width) {
186            unsigned bits = type.width - format_desc->channel[chan].size;
187            LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
188            input = LLVMBuildAShr(builder, input, bits_val, "");
189         }
190
191         /*
192          * Type conversion
193          */
194
195         if (type.floating) {
196            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
197            if (format_desc->channel[chan].normalized) {
198               double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
199               LLVMValueRef scale_val = lp_build_const_vec(type, scale);
200               input = LLVMBuildMul(builder, input, scale_val, "");
201            }
202         }
203         else {
204            /* FIXME */
205            assert(0);
206            input = lp_build_undef(type);
207         }
208
209         break;
210
211      case UTIL_FORMAT_TYPE_FLOAT:
212         if (type.floating) {
213            assert(start == 0);
214            assert(stop == 32);
215            assert(type.width == 32);
216            input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
217         }
218         else {
219            /* FIXME */
220            assert(0);
221            input = lp_build_undef(type);
222         }
223         break;
224
225      case UTIL_FORMAT_TYPE_FIXED:
226         if (type.floating) {
227            double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
228            LLVMValueRef scale_val = lp_build_const_vec(type, scale);
229            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
230            input = LLVMBuildMul(builder, input, scale_val, "");
231         }
232         else {
233            /* FIXME */
234            assert(0);
235            input = lp_build_undef(type);
236         }
237         break;
238
239      default:
240         assert(0);
241         input = lp_build_undef(type);
242         break;
243      }
244
245      inputs[chan] = input;
246
247      start = stop;
248   }
249
250   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
251}
252
253
254void
255lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
256                          struct lp_type dst_type,
257                          LLVMValueRef packed,
258                          LLVMValueRef *rgba)
259{
260   LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
261   unsigned chan;
262
263   packed = LLVMBuildBitCast(builder, packed,
264                             lp_build_int_vec_type(dst_type), "");
265
266   /* Decode the input vector components */
267   for (chan = 0; chan < 4; ++chan) {
268      unsigned start = chan*8;
269      unsigned stop = start + 8;
270      LLVMValueRef input;
271
272      input = packed;
273
274      if (start)
275         input = LLVMBuildLShr(builder, input,
276                               lp_build_const_int_vec(dst_type, start), "");
277
278      if (stop < 32)
279         input = LLVMBuildAnd(builder, input, mask, "");
280
281      input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
282
283      rgba[chan] = input;
284   }
285}
286
287
288
289/**
290 * Fetch a texels from a texture, returning them in SoA layout.
291 *
292 * \param type  the desired return type for 'rgba'.  The vector length
293 *              is the number of texels to fetch
294 *
295 * \param base_ptr  points to start of the texture image block.  For non-
296 *                  compressed formats, this simply points to the texel.
297 *                  For compressed formats, it points to the start of the
298 *                  compressed data block.
299 *
300 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
301 *              these will always be (0,0).  For compressed formats, i will
302 *              be in [0, block_width-1] and j will be in [0, block_height-1].
303 */
304void
305lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
306                        const struct util_format_description *format_desc,
307                        struct lp_type type,
308                        LLVMValueRef base_ptr,
309                        LLVMValueRef offset,
310                        LLVMValueRef i,
311                        LLVMValueRef j,
312                        LLVMValueRef rgba_out[4])
313{
314
315   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
316       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
317        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
318       format_desc->block.width == 1 &&
319       format_desc->block.height == 1 &&
320       format_desc->block.bits <= type.width &&
321       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
322        format_desc->channel[0].size == 32))
323   {
324      /*
325       * The packed pixel fits into an element of the destination format. Put
326       * the packed pixels into a vector and extract each component for all
327       * vector elements in parallel.
328       */
329
330      LLVMValueRef packed;
331
332      /*
333       * gather the texels from the texture
334       * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
335       */
336      packed = lp_build_gather(builder,
337                               type.length,
338                               format_desc->block.bits,
339                               type.width,
340                               base_ptr, offset);
341
342      /*
343       * convert texels to float rgba
344       */
345      lp_build_unpack_rgba_soa(builder,
346                               format_desc,
347                               type,
348                               packed, rgba_out);
349   }
350   else {
351      /*
352       * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
353       *
354       * This is not the most efficient way of fetching pixels, as we
355       * miss some opportunities to do vectorization, but this is
356       * convenient for formats or scenarios for which there was no
357       * opportunity or incentive to optimize.
358       */
359
360      unsigned k, chan;
361
362      for (chan = 0; chan < 4; ++chan) {
363         rgba_out[chan] = lp_build_undef(type);
364      }
365
366      /* loop over number of pixels */
367      for(k = 0; k < type.length; ++k) {
368         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
369         LLVMValueRef offset_elem;
370         LLVMValueRef ptr;
371         LLVMValueRef i_elem, j_elem;
372         LLVMValueRef tmp;
373
374         offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
375         ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
376
377         i_elem = LLVMBuildExtractElement(builder, i, index, "");
378         j_elem = LLVMBuildExtractElement(builder, j, index, "");
379
380         /* Get a single float[4]={R,G,B,A} pixel */
381         tmp = lp_build_fetch_rgba_aos(builder, format_desc, type, ptr,
382                                       i_elem, j_elem);
383
384         /*
385          * Insert the AoS tmp value channels into the SoA result vectors at
386          * position = 'index'.
387          */
388         for (chan = 0; chan < 4; ++chan) {
389            LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
390            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
391            rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
392                                                    tmp_chan, index, "");
393         }
394      }
395   }
396}
397