1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#include "pipe/p_defines.h"
30
31#include "util/u_format.h"
32#include "util/u_memory.h"
33#include "util/u_string.h"
34
35#include "lp_bld_type.h"
36#include "lp_bld_const.h"
37#include "lp_bld_conv.h"
38#include "lp_bld_swizzle.h"
39#include "lp_bld_gather.h"
40#include "lp_bld_debug.h"
41#include "lp_bld_format.h"
42
43
44void
45lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
46                            struct lp_build_context *bld,
47                            const LLVMValueRef *unswizzled,
48                            LLVMValueRef swizzled_out[4])
49{
50   assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
51   assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
52
53   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
54      /*
55       * Return zzz1 for depth-stencil formats.
56       *
57       * XXX: Allow to control the depth swizzle with an additional parameter,
58       * as the caller may wish another depth swizzle, or retain the stencil
59       * value.
60       */
61      enum util_format_swizzle swizzle = format_desc->swizzle[0];
62      LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
63      swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
64      swizzled_out[3] = bld->one;
65   }
66   else {
67      unsigned chan;
68      for (chan = 0; chan < 4; ++chan) {
69         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
70         swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
71      }
72   }
73}
74
75
76/**
77 * Unpack several pixels in SoA.
78 *
79 * It takes a vector of packed pixels:
80 *
81 *   packed = {P0, P1, P2, P3, ..., Pn}
82 *
83 * And will produce four vectors:
84 *
85 *   red    = {R0, R1, R2, R3, ..., Rn}
86 *   green  = {G0, G1, G2, G3, ..., Gn}
87 *   blue   = {B0, B1, B2, B3, ..., Bn}
88 *   alpha  = {A0, A1, A2, A3, ..., An}
89 *
90 * It requires that a packed pixel fits into an element of the output
91 * channels. The common case is when converting pixel with a depth of 32 bit or
92 * less into floats.
93 *
94 * \param format_desc  the format of the 'packed' incoming pixel vector
95 * \param type  the desired type for rgba_out (type.length = n, above)
96 * \param packed  the incoming vector of packed pixels
97 * \param rgba_out  returns the SoA R,G,B,A vectors
98 */
99void
100lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
101                         const struct util_format_description *format_desc,
102                         struct lp_type type,
103                         LLVMValueRef packed,
104                         LLVMValueRef rgba_out[4])
105{
106   LLVMBuilderRef builder = gallivm->builder;
107   struct lp_build_context bld;
108   LLVMValueRef inputs[4];
109   unsigned start;
110   unsigned chan;
111
112   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
113   assert(format_desc->block.width == 1);
114   assert(format_desc->block.height == 1);
115   assert(format_desc->block.bits <= type.width);
116   /* FIXME: Support more output types */
117   assert(type.floating);
118   assert(type.width == 32);
119
120   lp_build_context_init(&bld, gallivm, type);
121
122   /* Decode the input vector components */
123   start = 0;
124   for (chan = 0; chan < format_desc->nr_channels; ++chan) {
125      const unsigned width = format_desc->channel[chan].size;
126      const unsigned stop = start + width;
127      LLVMValueRef input;
128
129      input = packed;
130
131      switch(format_desc->channel[chan].type) {
132      case UTIL_FORMAT_TYPE_VOID:
133         input = lp_build_undef(gallivm, type);
134         break;
135
136      case UTIL_FORMAT_TYPE_UNSIGNED:
137         /*
138          * Align the LSB
139          */
140
141         if (start) {
142            input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
143         }
144
145         /*
146          * Zero the MSBs
147          */
148
149         if (stop < format_desc->block.bits) {
150            unsigned mask = ((unsigned long long)1 << width) - 1;
151            input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
152         }
153
154         /*
155          * Type conversion
156          */
157
158         if (type.floating) {
159            if(format_desc->channel[chan].normalized)
160               input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
161            else
162               input = LLVMBuildSIToFP(builder, input,
163                                       lp_build_vec_type(gallivm, type), "");
164         }
165         else {
166            /* FIXME */
167            assert(0);
168            input = lp_build_undef(gallivm, type);
169         }
170
171         break;
172
173      case UTIL_FORMAT_TYPE_SIGNED:
174         /*
175          * Align the sign bit first.
176          */
177
178         if (stop < type.width) {
179            unsigned bits = type.width - stop;
180            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
181            input = LLVMBuildShl(builder, input, bits_val, "");
182         }
183
184         /*
185          * Align the LSB (with an arithmetic shift to preserve the sign)
186          */
187
188         if (format_desc->channel[chan].size < type.width) {
189            unsigned bits = type.width - format_desc->channel[chan].size;
190            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
191            input = LLVMBuildAShr(builder, input, bits_val, "");
192         }
193
194         /*
195          * Type conversion
196          */
197
198         if (type.floating) {
199            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
200            if (format_desc->channel[chan].normalized) {
201               double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
202               LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
203               input = LLVMBuildFMul(builder, input, scale_val, "");
204            }
205         }
206         else {
207            /* FIXME */
208            assert(0);
209            input = lp_build_undef(gallivm, type);
210         }
211
212         break;
213
214      case UTIL_FORMAT_TYPE_FLOAT:
215         if (type.floating) {
216            assert(start == 0);
217            assert(stop == 32);
218            assert(type.width == 32);
219            input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
220         }
221         else {
222            /* FIXME */
223            assert(0);
224            input = lp_build_undef(gallivm, type);
225         }
226         break;
227
228      case UTIL_FORMAT_TYPE_FIXED:
229         if (type.floating) {
230            double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
231            LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
232            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
233            input = LLVMBuildFMul(builder, input, scale_val, "");
234         }
235         else {
236            /* FIXME */
237            assert(0);
238            input = lp_build_undef(gallivm, type);
239         }
240         break;
241
242      default:
243         assert(0);
244         input = lp_build_undef(gallivm, type);
245         break;
246      }
247
248      inputs[chan] = input;
249
250      start = stop;
251   }
252
253   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
254}
255
256
257void
258lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm,
259                          struct lp_type dst_type,
260                          LLVMValueRef packed,
261                          LLVMValueRef *rgba)
262{
263   LLVMBuilderRef builder = gallivm->builder;
264   LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
265   unsigned chan;
266
267   packed = LLVMBuildBitCast(builder, packed,
268                             lp_build_int_vec_type(gallivm, dst_type), "");
269
270   /* Decode the input vector components */
271   for (chan = 0; chan < 4; ++chan) {
272      unsigned start = chan*8;
273      unsigned stop = start + 8;
274      LLVMValueRef input;
275
276      input = packed;
277
278      if (start)
279         input = LLVMBuildLShr(builder, input,
280                               lp_build_const_int_vec(gallivm, dst_type, start), "");
281
282      if (stop < 32)
283         input = LLVMBuildAnd(builder, input, mask, "");
284
285      input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
286
287      rgba[chan] = input;
288   }
289}
290
291
292
293/**
294 * Fetch a texels from a texture, returning them in SoA layout.
295 *
296 * \param type  the desired return type for 'rgba'.  The vector length
297 *              is the number of texels to fetch
298 *
299 * \param base_ptr  points to start of the texture image block.  For non-
300 *                  compressed formats, this simply points to the texel.
301 *                  For compressed formats, it points to the start of the
302 *                  compressed data block.
303 *
304 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
305 *              these will always be (0,0).  For compressed formats, i will
306 *              be in [0, block_width-1] and j will be in [0, block_height-1].
307 */
308void
309lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
310                        const struct util_format_description *format_desc,
311                        struct lp_type type,
312                        LLVMValueRef base_ptr,
313                        LLVMValueRef offset,
314                        LLVMValueRef i,
315                        LLVMValueRef j,
316                        LLVMValueRef rgba_out[4])
317{
318   LLVMBuilderRef builder = gallivm->builder;
319
320   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
321       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
322        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
323       format_desc->block.width == 1 &&
324       format_desc->block.height == 1 &&
325       format_desc->block.bits <= type.width &&
326       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
327        format_desc->channel[0].size == 32))
328   {
329      /*
330       * The packed pixel fits into an element of the destination format. Put
331       * the packed pixels into a vector and extract each component for all
332       * vector elements in parallel.
333       */
334
335      LLVMValueRef packed;
336
337      /*
338       * gather the texels from the texture
339       * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
340       */
341      packed = lp_build_gather(gallivm,
342                               type.length,
343                               format_desc->block.bits,
344                               type.width,
345                               base_ptr, offset);
346
347      /*
348       * convert texels to float rgba
349       */
350      lp_build_unpack_rgba_soa(gallivm,
351                               format_desc,
352                               type,
353                               packed, rgba_out);
354      return;
355   }
356
357   /*
358    * Try calling lp_build_fetch_rgba_aos for all pixels.
359    */
360
361   if (util_format_fits_8unorm(format_desc) &&
362       type.floating && type.width == 32 &&
363       (type.length == 1 || (type.length % 4 == 0))) {
364      struct lp_type tmp_type;
365      LLVMValueRef tmp;
366
367      memset(&tmp_type, 0, sizeof tmp_type);
368      tmp_type.width = 8;
369      tmp_type.length = type.length * 4;
370      tmp_type.norm = TRUE;
371
372      tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
373                                    base_ptr, offset, i, j);
374
375      lp_build_rgba8_to_f32_soa(gallivm,
376                                type,
377                                tmp,
378                                rgba_out);
379
380      return;
381   }
382
383   /*
384    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
385    *
386    * This is not the most efficient way of fetching pixels, as we
387    * miss some opportunities to do vectorization, but this is
388    * convenient for formats or scenarios for which there was no
389    * opportunity or incentive to optimize.
390    */
391
392   {
393      unsigned k, chan;
394      struct lp_type tmp_type;
395
396      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
397         debug_printf("%s: scalar unpacking of %s\n",
398                      __FUNCTION__, format_desc->short_name);
399      }
400
401      tmp_type = type;
402      tmp_type.length = 4;
403
404      for (chan = 0; chan < 4; ++chan) {
405         rgba_out[chan] = lp_build_undef(gallivm, type);
406      }
407
408      /* loop over number of pixels */
409      for(k = 0; k < type.length; ++k) {
410         LLVMValueRef index = lp_build_const_int32(gallivm, k);
411         LLVMValueRef offset_elem;
412         LLVMValueRef i_elem, j_elem;
413         LLVMValueRef tmp;
414
415         offset_elem = LLVMBuildExtractElement(builder, offset,
416                                               index, "");
417
418         i_elem = LLVMBuildExtractElement(builder, i, index, "");
419         j_elem = LLVMBuildExtractElement(builder, j, index, "");
420
421         /* Get a single float[4]={R,G,B,A} pixel */
422         tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
423                                       base_ptr, offset_elem,
424                                       i_elem, j_elem);
425
426         /*
427          * Insert the AoS tmp value channels into the SoA result vectors at
428          * position = 'index'.
429          */
430         for (chan = 0; chan < 4; ++chan) {
431            LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
432            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
433            rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
434                                                    tmp_chan, index, "");
435         }
436      }
437   }
438}
439