lp_bld_format_aos.c revision bd91f665a7c12f114619a4f6f1e00059e4f4cb5e
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * AoS pixel format manipulation.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36#include "util/u_format.h"
37#include "util/u_memory.h"
38#include "util/u_math.h"
39#include "util/u_string.h"
40
41#include "lp_bld_init.h"
42#include "lp_bld_type.h"
43#include "lp_bld_format.h"
44
45
46/**
47 * Unpack a single pixel into its RGBA components.
48 *
49 * @param packed integer.
50 *
51 * @return RGBA in a 4 floats vector.
52 */
53LLVMValueRef
54lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
55                         const struct util_format_description *desc,
56                         LLVMValueRef packed)
57{
58   LLVMValueRef shifted, casted, scaled, masked;
59   LLVMValueRef shifts[4];
60   LLVMValueRef masks[4];
61   LLVMValueRef scales[4];
62   LLVMValueRef swizzles[4];
63   LLVMValueRef aux[4];
64   bool normalized;
65   int empty_channel;
66   bool needs_uitofp;
67   unsigned shift;
68   unsigned i;
69
70   /* TODO: Support more formats */
71   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
72   assert(desc->block.width == 1);
73   assert(desc->block.height == 1);
74   assert(desc->block.bits <= 32);
75
76   /* Do the intermediate integer computations with 32bit integers since it
77    * matches floating point size */
78   if (desc->block.bits < 32)
79      packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
80
81   /* Broadcast the packed value to all four channels */
82   packed = LLVMBuildInsertElement(builder,
83                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
84                                   packed,
85                                   LLVMConstNull(LLVMInt32Type()),
86                                   "");
87   packed = LLVMBuildShuffleVector(builder,
88                                   packed,
89                                   LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)),
90                                   LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)),
91                                   "");
92
93   /* Initialize vector constants */
94   normalized = FALSE;
95   needs_uitofp = FALSE;
96   empty_channel = -1;
97   shift = 0;
98   for (i = 0; i < 4; ++i) {
99      unsigned bits = desc->channel[i].size;
100
101      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
102         shifts[i] = LLVMGetUndef(LLVMInt32Type());
103         masks[i] = LLVMConstNull(LLVMInt32Type());
104         scales[i] =  LLVMConstNull(LLVMFloatType());
105         empty_channel = i;
106      }
107      else {
108         unsigned long long mask = (1ULL << bits) - 1;
109
110         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
111
112         if (bits == 32) {
113            needs_uitofp = TRUE;
114         }
115
116         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
117         masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0);
118
119         if (desc->channel[i].normalized) {
120            scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask);
121            normalized = TRUE;
122         }
123         else
124            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
125      }
126
127      shift += bits;
128   }
129
130   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
131   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
132   if (!needs_uitofp) {
133      /* UIToFP can't be expressed in SSE2 */
134      casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
135   } else {
136      casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
137   }
138
139   if (normalized)
140      scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), "");
141   else
142      scaled = casted;
143
144   for (i = 0; i < 4; ++i)
145      aux[i] = LLVMGetUndef(LLVMFloatType());
146
147   for (i = 0; i < 4; ++i) {
148      enum util_format_swizzle swizzle;
149
150      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
151         /*
152          * For ZS formats do RGBA = ZZZ1
153          */
154         if (i == 3) {
155            swizzle = UTIL_FORMAT_SWIZZLE_1;
156         } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
157            swizzle = UTIL_FORMAT_SWIZZLE_0;
158         } else {
159            swizzle = desc->swizzle[0];
160         }
161      } else {
162         swizzle = desc->swizzle[i];
163      }
164
165      switch (swizzle) {
166      case UTIL_FORMAT_SWIZZLE_X:
167      case UTIL_FORMAT_SWIZZLE_Y:
168      case UTIL_FORMAT_SWIZZLE_Z:
169      case UTIL_FORMAT_SWIZZLE_W:
170         swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
171         break;
172      case UTIL_FORMAT_SWIZZLE_0:
173         assert(empty_channel >= 0);
174         swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
175         break;
176      case UTIL_FORMAT_SWIZZLE_1:
177         swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
178         aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
179         break;
180      case UTIL_FORMAT_SWIZZLE_NONE:
181         swizzles[i] = LLVMGetUndef(LLVMFloatType());
182         assert(0);
183         break;
184      }
185   }
186
187   return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), "");
188}
189
190
191/**
192 * Pack a single pixel.
193 *
194 * @param rgba 4 float vector with the unpacked components.
195 *
196 * XXX: This is mostly for reference and testing -- operating a single pixel at
197 * a time is rarely if ever needed.
198 */
199LLVMValueRef
200lp_build_pack_rgba_aos(LLVMBuilderRef builder,
201                       const struct util_format_description *desc,
202                       LLVMValueRef rgba)
203{
204   LLVMTypeRef type;
205   LLVMValueRef packed = NULL;
206   LLVMValueRef swizzles[4];
207   LLVMValueRef shifted, casted, scaled, unswizzled;
208   LLVMValueRef shifts[4];
209   LLVMValueRef scales[4];
210   bool normalized;
211   unsigned shift;
212   unsigned i, j;
213
214   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
215   assert(desc->block.width == 1);
216   assert(desc->block.height == 1);
217
218   type = LLVMIntType(desc->block.bits);
219
220   /* Unswizzle the color components into the source vector. */
221   for (i = 0; i < 4; ++i) {
222      for (j = 0; j < 4; ++j) {
223         if (desc->swizzle[j] == i)
224            break;
225      }
226      if (j < 4)
227         swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0);
228      else
229         swizzles[i] = LLVMGetUndef(LLVMInt32Type());
230   }
231
232   unswizzled = LLVMBuildShuffleVector(builder, rgba,
233                                       LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)),
234                                       LLVMConstVector(swizzles, 4), "");
235
236   normalized = FALSE;
237   shift = 0;
238   for (i = 0; i < 4; ++i) {
239      unsigned bits = desc->channel[i].size;
240
241      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
242         shifts[i] = LLVMGetUndef(LLVMInt32Type());
243         scales[i] =  LLVMGetUndef(LLVMFloatType());
244      }
245      else {
246         unsigned mask = (1 << bits) - 1;
247
248         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
249         assert(bits < 32);
250
251         shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0);
252
253         if (desc->channel[i].normalized) {
254            scales[i] = LLVMConstReal(LLVMFloatType(), mask);
255            normalized = TRUE;
256         }
257         else
258            scales[i] =  LLVMConstReal(LLVMFloatType(), 1.0);
259      }
260
261      shift += bits;
262   }
263
264   if (normalized)
265      scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
266   else
267      scaled = unswizzled;
268
269   casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), "");
270
271   shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
272
273   /* Bitwise or all components */
274   for (i = 0; i < 4; ++i) {
275      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
276         LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), "");
277         if (packed)
278            packed = LLVMBuildOr(builder, packed, component, "");
279         else
280            packed = component;
281      }
282   }
283
284   if (!packed)
285      packed = LLVMGetUndef(LLVMInt32Type());
286
287   if (desc->block.bits < 32)
288      packed = LLVMBuildTrunc(builder, packed, type, "");
289
290   return packed;
291}
292
293
294/**
295 * Fetch a pixel into a 4 float AoS.
296 *
297 * i and j are the sub-block pixel coordinates.
298 */
299LLVMValueRef
300lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
301                        const struct util_format_description *format_desc,
302                        LLVMValueRef ptr,
303                        LLVMValueRef i,
304                        LLVMValueRef j)
305{
306
307   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
308       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
309        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
310       format_desc->block.width == 1 &&
311       format_desc->block.height == 1 &&
312       util_is_pot(format_desc->block.bits) &&
313       format_desc->block.bits <= 32 &&
314       format_desc->is_bitmask &&
315       !format_desc->is_mixed &&
316       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
317        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
318   {
319      LLVMValueRef packed;
320
321      ptr = LLVMBuildBitCast(builder, ptr,
322                             LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
323                             "");
324
325      packed = LLVMBuildLoad(builder, ptr, "packed");
326
327      return lp_build_unpack_rgba_aos(builder, format_desc, packed);
328   }
329   else if (format_desc->fetch_rgba_float) {
330      /*
331       * Fallback to calling util_format_description::fetch_rgba_float.
332       *
333       * This is definitely not the most efficient way of fetching pixels, as
334       * we miss the opportunity to do vectorization, but this it is a
335       * convenient for formats or scenarios for which there was no opportunity
336       * or incentive to optimize.
337       */
338
339      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
340      char name[256];
341      LLVMValueRef function;
342      LLVMValueRef tmp;
343      LLVMValueRef args[4];
344
345      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
346                    format_desc->short_name);
347
348      /*
349       * Declare and bind format_desc->fetch_rgba_float().
350       */
351
352      function = LLVMGetNamedFunction(module, name);
353      if (!function) {
354         LLVMTypeRef ret_type;
355         LLVMTypeRef arg_types[4];
356         LLVMTypeRef function_type;
357
358         ret_type = LLVMVoidType();
359         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
360         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
361         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
362         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
363         function = LLVMAddFunction(module, name, function_type);
364
365         LLVMSetFunctionCallConv(function, LLVMCCallConv);
366         LLVMSetLinkage(function, LLVMExternalLinkage);
367
368         assert(LLVMIsDeclaration(function));
369
370         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
371      }
372
373      /*
374       * XXX: this should better go to the first block in the function
375       */
376
377      tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
378
379      /*
380       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
381       * in the SoA vectors.
382       */
383
384      args[0] = LLVMBuildBitCast(builder, tmp,
385                                 LLVMPointerType(LLVMFloatType(), 0), "");
386      args[1] = ptr;
387      args[2] = i;
388      args[3] = j;
389
390      LLVMBuildCall(builder, function, args, 4, "");
391
392      return LLVMBuildLoad(builder, tmp, "");
393   }
394   else {
395      assert(0);
396      return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
397   }
398}
399