lp_bld_format_aos.c revision a18c210a95794c79c6f26dbf4c66d4a85e29169d
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * AoS pixel format manipulation.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36#include "util/u_format.h"
37#include "util/u_memory.h"
38#include "util/u_math.h"
39#include "util/u_string.h"
40
41#include "lp_bld_init.h"
42#include "lp_bld_type.h"
43#include "lp_bld_flow.h"
44#include "lp_bld_format.h"
45
46
47/**
48 * Unpack a single pixel into its RGBA components.
49 *
50 * @param packed integer.
51 *
52 * @return RGBA in a 4 floats vector.
53 */
54LLVMValueRef
55lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
56                         const struct util_format_description *desc,
57                         LLVMValueRef packed)
58{
59   LLVMValueRef shifted, casted, scaled, masked;
60   LLVMValueRef shifts[4];
61   LLVMValueRef masks[4];
62   LLVMValueRef scales[4];
63   LLVMValueRef swizzles[4];
64   LLVMValueRef aux[4];
65   bool normalized;
66   int empty_channel;
67   bool needs_uitofp;
68   unsigned shift;
69   unsigned i;
70
71   /* TODO: Support more formats */
72   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
73   assert(desc->block.width == 1);
74   assert(desc->block.height == 1);
75   assert(desc->block.bits <= 32);
76
77   /* Do the intermediate integer computations with 32bit integers since it
78    * matches floating point size */
79   if (desc->block.bits < 32)
80      packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
81
82   /* Broadcast the packed value to all four channels */
83   packed = LLVMBuildInsertElement(builder,
84                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
85                                   packed,
86                                   LLVMConstNull(LLVMInt32Type()),
87                                   "");
88   packed = LLVMBuildShuffleVector(builder,
89                                   packed,
90                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
91                                   LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
92                                   "");
93
94   /* Initialize vector constants */
95   normalized = FALSE;
96   needs_uitofp = FALSE;
97   empty_channel = -1;
98   shift = 0;
99   for (i = 0; i < 4; ++i) {
100      unsigned bits = desc->channel[i].size;
101
102      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
103         shifts[i] = LLVMGetUndef(LLVMInt32Type());
104         masks[i] = LLVMConstNull(LLVMInt32Type());
105         scales[i] =  LLVMConstNull(LLVMFloatType());
106         empty_channel = i;
107      }
108      else {
109         unsigned long long mask = (1ULL << bits) - 1;
110
111         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
112
113         if (bits == 32) {
114            needs_uitofp = TRUE;
115         }
116
117         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
118         masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
119
120         if (desc->channel[i].normalized) {
121            scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
122            normalized = TRUE;
123         }
124         else
125            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
126      }
127
128      shift += bits;
129   }
130
131   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
132   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
133   if (!needs_uitofp) {
134      /* UIToFP can't be expressed in SSE2 */
135      casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
136   } else {
137      casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
138   }
139
140   if (normalized)
141      scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
142   else
143      scaled = casted;
144
145   for (i = 0; i < 4; ++i)
146      aux[i] = LLVMGetUndef(LLVMFloatType());
147
148   for (i = 0; i < 4; ++i) {
149      enum util_format_swizzle swizzle;
150
151      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
152         /*
153          * For ZS formats do RGBA = ZZZ1
154          */
155         if (i == 3) {
156            swizzle = UTIL_FORMAT_SWIZZLE_1;
157         } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
158            swizzle = UTIL_FORMAT_SWIZZLE_0;
159         } else {
160            swizzle = desc->swizzle[0];
161         }
162      } else {
163         swizzle = desc->swizzle[i];
164      }
165
166      switch (swizzle) {
167      case UTIL_FORMAT_SWIZZLE_X:
168      case UTIL_FORMAT_SWIZZLE_Y:
169      case UTIL_FORMAT_SWIZZLE_Z:
170      case UTIL_FORMAT_SWIZZLE_W:
171         swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
172         break;
173      case UTIL_FORMAT_SWIZZLE_0:
174         assert(empty_channel >= 0);
175         swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
176         break;
177      case UTIL_FORMAT_SWIZZLE_1:
178         swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
179         aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
180         break;
181      case UTIL_FORMAT_SWIZZLE_NONE:
182         swizzles[i] = LLVMGetUndef(LLVMFloatType());
183         assert(0);
184         break;
185      }
186   }
187
188   return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
189}
190
191
192/**
193 * Pack a single pixel.
194 *
195 * @param rgba 4 float vector with the unpacked components.
196 *
197 * XXX: This is mostly for reference and testing -- operating a single pixel at
198 * a time is rarely if ever needed.
199 */
200LLVMValueRef
201lp_build_pack_rgba_aos(LLVMBuilderRef builder,
202                       const struct util_format_description *desc,
203                       LLVMValueRef rgba)
204{
205   LLVMTypeRef type;
206   LLVMValueRef packed = NULL;
207   LLVMValueRef swizzles[4];
208   LLVMValueRef shifted, casted, scaled, unswizzled;
209   LLVMValueRef shifts[4];
210   LLVMValueRef scales[4];
211   bool normalized;
212   unsigned shift;
213   unsigned i, j;
214
215   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
216   assert(desc->block.width == 1);
217   assert(desc->block.height == 1);
218
219   type = LLVMIntType(desc->block.bits);
220
221   /* Unswizzle the color components into the source vector. */
222   for (i = 0; i < 4; ++i) {
223      for (j = 0; j < 4; ++j) {
224         if (desc->swizzle[j] == i)
225            break;
226      }
227      if (j < 4)
228         swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
229      else
230         swizzles[i] = LLVMGetUndef(LLVMInt32Type());
231   }
232
233   unswizzled = LLVMBuildShuffleVector(builder, rgba,
234                                       LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
235                                       LLVMConstVector(swizzles, 4), "");
236
237   normalized = FALSE;
238   shift = 0;
239   for (i = 0; i < 4; ++i) {
240      unsigned bits = desc->channel[i].size;
241
242      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
243         shifts[i] = LLVMGetUndef(LLVMInt32Type());
244         scales[i] =  LLVMGetUndef(LLVMFloatType());
245      }
246      else {
247         unsigned mask = (1 << bits) - 1;
248
249         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
250         assert(bits < 32);
251
252         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
253
254         if (desc->channel[i].normalized) {
255            scales[i] = LLVMConstReal(LLVMFloatType(), mask);
256            normalized = TRUE;
257         }
258         else
259            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
260      }
261
262      shift += bits;
263   }
264
265   if (normalized)
266      scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
267   else
268      scaled = unswizzled;
269
270   casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
271
272   shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
273
274   /* Bitwise or all components */
275   for (i = 0; i < 4; ++i) {
276      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
277         LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
278         if (packed)
279            packed = LLVMBuildOr(builder, packed, component, "");
280         else
281            packed = component;
282      }
283   }
284
285   if (!packed)
286      packed = LLVMGetUndef(LLVMInt32Type());
287
288   if (desc->block.bits < 32)
289      packed = LLVMBuildTrunc(builder, packed, type, "");
290
291   return packed;
292}
293
294
295/**
296 * Fetch a pixel into a 4 float AoS.
297 *
298 * i and j are the sub-block pixel coordinates.
299 */
300LLVMValueRef
301lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
302                        const struct util_format_description *format_desc,
303                        LLVMValueRef ptr,
304                        LLVMValueRef i,
305                        LLVMValueRef j)
306{
307
308   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
309       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
310        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
311       format_desc->block.width == 1 &&
312       format_desc->block.height == 1 &&
313       util_is_pot(format_desc->block.bits) &&
314       format_desc->block.bits <= 32 &&
315       format_desc->is_bitmask &&
316       !format_desc->is_mixed &&
317       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
318        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
319   {
320      LLVMValueRef packed;
321
322      ptr = LLVMBuildBitCast(builder, ptr,
323                             LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
324                             "");
325
326      packed = LLVMBuildLoad(builder, ptr, "packed");
327
328      return lp_build_unpack_rgba_aos(builder, format_desc, packed);
329   }
330   else if (format_desc->fetch_rgba_float) {
331      /*
332       * Fallback to calling util_format_description::fetch_rgba_float.
333       *
334       * This is definitely not the most efficient way of fetching pixels, as
335       * we miss the opportunity to do vectorization, but this it is a
336       * convenient for formats or scenarios for which there was no opportunity
337       * or incentive to optimize.
338       */
339
340      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
341      char name[256];
342      LLVMValueRef function;
343      LLVMValueRef tmp;
344      LLVMValueRef args[4];
345
346      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
347                    format_desc->short_name);
348
349      /*
350       * Declare and bind format_desc->fetch_rgba_float().
351       */
352
353      function = LLVMGetNamedFunction(module, name);
354      if (!function) {
355         LLVMTypeRef ret_type;
356         LLVMTypeRef arg_types[4];
357         LLVMTypeRef function_type;
358
359         ret_type = LLVMVoidType();
360         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
361         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
362         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
363         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
364         function = LLVMAddFunction(module, name, function_type);
365
366         LLVMSetFunctionCallConv(function, LLVMCCallConv);
367         LLVMSetLinkage(function, LLVMExternalLinkage);
368
369         assert(LLVMIsDeclaration(function));
370
371         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
372      }
373
374      tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
375
376      /*
377       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
378       * in the SoA vectors.
379       */
380
381      args[0] = LLVMBuildBitCast(builder, tmp,
382                                 LLVMPointerType(LLVMFloatType(), 0), "");
383      args[1] = ptr;
384      args[2] = i;
385      args[3] = j;
386
387      LLVMBuildCall(builder, function, args, 4, "");
388
389      return LLVMBuildLoad(builder, tmp, "");
390   }
391   else {
392      assert(0);
393      return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
394   }
395}
396