1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Helper functions for swizzling/shuffling.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36#include "util/u_debug.h"
37
38#include "lp_bld_type.h"
39#include "lp_bld_const.h"
40#include "lp_bld_init.h"
41#include "lp_bld_logic.h"
42#include "lp_bld_swizzle.h"
43#include "lp_bld_pack.h"
44
45
46LLVMValueRef
47lp_build_broadcast(struct gallivm_state *gallivm,
48                   LLVMTypeRef vec_type,
49                   LLVMValueRef scalar)
50{
51   LLVMValueRef res;
52
53   if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
54      /* scalar */
55      assert(vec_type == LLVMTypeOf(scalar));
56      res = scalar;
57   } else {
58      LLVMBuilderRef builder = gallivm->builder;
59      const unsigned length = LLVMGetVectorSize(vec_type);
60      LLVMValueRef undef = LLVMGetUndef(vec_type);
61      LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
62
63      assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
64
65      if (HAVE_LLVM >= 0x207) {
66         /* The shuffle vector is always made of int32 elements */
67         LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
68         res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
69         res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
70      } else {
71         /* XXX: The above path provokes a bug in LLVM 2.6 */
72         unsigned i;
73         res = undef;
74         for(i = 0; i < length; ++i) {
75            LLVMValueRef index = lp_build_const_int32(gallivm, i);
76            res = LLVMBuildInsertElement(builder, res, scalar, index, "");
77         }
78      }
79   }
80
81   return res;
82}
83
84
85/**
86 * Broadcast
87 */
88LLVMValueRef
89lp_build_broadcast_scalar(struct lp_build_context *bld,
90                          LLVMValueRef scalar)
91{
92   assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
93
94   return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
95}
96
97
98/**
99 * Combined extract and broadcast (mere shuffle in most cases)
100 */
101LLVMValueRef
102lp_build_extract_broadcast(struct gallivm_state *gallivm,
103                           struct lp_type src_type,
104                           struct lp_type dst_type,
105                           LLVMValueRef vector,
106                           LLVMValueRef index)
107{
108   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
109   LLVMValueRef res;
110
111   assert(src_type.floating == dst_type.floating);
112   assert(src_type.width    == dst_type.width);
113
114   assert(lp_check_value(src_type, vector));
115   assert(LLVMTypeOf(index) == i32t);
116
117   if (src_type.length == 1) {
118      if (dst_type.length == 1) {
119         /*
120          * Trivial scalar -> scalar.
121          */
122
123         res = vector;
124      }
125      else {
126         /*
127          * Broadcast scalar -> vector.
128          */
129
130         res = lp_build_broadcast(gallivm,
131                                  lp_build_vec_type(gallivm, dst_type),
132                                  vector);
133      }
134   }
135   else {
136      if (dst_type.length > 1) {
137         /*
138          * shuffle - result can be of different length.
139          */
140
141         LLVMValueRef shuffle;
142         shuffle = lp_build_broadcast(gallivm,
143                                      LLVMVectorType(i32t, dst_type.length),
144                                      index);
145         res = LLVMBuildShuffleVector(gallivm->builder, vector,
146                                      LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
147                                      shuffle, "");
148      }
149      else {
150         /*
151          * Trivial extract scalar from vector.
152          */
153          res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
154      }
155   }
156
157   return res;
158}
159
160
161/**
162 * Swizzle one channel into all other three channels.
163 */
164LLVMValueRef
165lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
166                            LLVMValueRef a,
167                            unsigned channel)
168{
169   LLVMBuilderRef builder = bld->gallivm->builder;
170   const struct lp_type type = bld->type;
171   const unsigned n = type.length;
172   unsigned i, j;
173
174   if(a == bld->undef || a == bld->zero || a == bld->one)
175      return a;
176
177   /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
178    * using shuffles here actually causes worst results. More investigation is
179    * needed. */
180   if (type.width >= 16) {
181      /*
182       * Shuffle.
183       */
184      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
185      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
186
187      for(j = 0; j < n; j += 4)
188         for(i = 0; i < 4; ++i)
189            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
190
191      return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
192   }
193   else {
194      /*
195       * Bit mask and recursive shifts
196       *
197       *   XYZW XYZW .... XYZW  <= input
198       *   0Y00 0Y00 .... 0Y00
199       *   YY00 YY00 .... YY00
200       *   YYYY YYYY .... YYYY  <= output
201       */
202      struct lp_type type4;
203      const char shifts[4][2] = {
204         { 1,  2},
205         {-1,  2},
206         { 1, -2},
207         {-1, -2}
208      };
209      unsigned i;
210
211      a = LLVMBuildAnd(builder, a,
212                       lp_build_const_mask_aos(bld->gallivm,
213                                               type, 1 << channel), "");
214
215      /*
216       * Build a type where each element is an integer that cover the four
217       * channels.
218       */
219
220      type4 = type;
221      type4.floating = FALSE;
222      type4.width *= 4;
223      type4.length /= 4;
224
225      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
226
227      for(i = 0; i < 2; ++i) {
228         LLVMValueRef tmp = NULL;
229         int shift = shifts[channel][i];
230
231#ifdef PIPE_ARCH_LITTLE_ENDIAN
232         shift = -shift;
233#endif
234
235         if(shift > 0)
236            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
237         if(shift < 0)
238            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
239
240         assert(tmp);
241         if(tmp)
242            a = LLVMBuildOr(builder, a, tmp, "");
243      }
244
245      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
246   }
247}
248
249
250LLVMValueRef
251lp_build_swizzle_aos(struct lp_build_context *bld,
252                     LLVMValueRef a,
253                     const unsigned char swizzles[4])
254{
255   LLVMBuilderRef builder = bld->gallivm->builder;
256   const struct lp_type type = bld->type;
257   const unsigned n = type.length;
258   unsigned i, j;
259
260   if (swizzles[0] == PIPE_SWIZZLE_RED &&
261       swizzles[1] == PIPE_SWIZZLE_GREEN &&
262       swizzles[2] == PIPE_SWIZZLE_BLUE &&
263       swizzles[3] == PIPE_SWIZZLE_ALPHA) {
264      return a;
265   }
266
267   if (swizzles[0] == swizzles[1] &&
268       swizzles[1] == swizzles[2] &&
269       swizzles[2] == swizzles[3]) {
270      switch (swizzles[0]) {
271      case PIPE_SWIZZLE_RED:
272      case PIPE_SWIZZLE_GREEN:
273      case PIPE_SWIZZLE_BLUE:
274      case PIPE_SWIZZLE_ALPHA:
275         return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
276      case PIPE_SWIZZLE_ZERO:
277         return bld->zero;
278      case PIPE_SWIZZLE_ONE:
279         return bld->one;
280      case LP_BLD_SWIZZLE_DONTCARE:
281         return bld->undef;
282      default:
283         assert(0);
284         return bld->undef;
285      }
286   }
287
288   if (type.width >= 16) {
289      /*
290       * Shuffle.
291       */
292      LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
293      LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
294      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
295      LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
296
297      memset(aux, 0, sizeof aux);
298
299      for(j = 0; j < n; j += 4) {
300         for(i = 0; i < 4; ++i) {
301            unsigned shuffle;
302            switch (swizzles[i]) {
303            default:
304               assert(0);
305               /* fall through */
306            case PIPE_SWIZZLE_RED:
307            case PIPE_SWIZZLE_GREEN:
308            case PIPE_SWIZZLE_BLUE:
309            case PIPE_SWIZZLE_ALPHA:
310               shuffle = j + swizzles[i];
311               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
312               break;
313            case PIPE_SWIZZLE_ZERO:
314               shuffle = type.length + 0;
315               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
316               if (!aux[0]) {
317                  aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
318               }
319               break;
320            case PIPE_SWIZZLE_ONE:
321               shuffle = type.length + 1;
322               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
323               if (!aux[1]) {
324                  aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
325               }
326               break;
327            case LP_BLD_SWIZZLE_DONTCARE:
328               shuffles[j + i] = LLVMGetUndef(i32t);
329               break;
330            }
331         }
332      }
333
334      for (i = 0; i < n; ++i) {
335         if (!aux[i]) {
336            aux[i] = undef;
337         }
338      }
339
340      return LLVMBuildShuffleVector(builder, a,
341                                    LLVMConstVector(aux, n),
342                                    LLVMConstVector(shuffles, n), "");
343   } else {
344      /*
345       * Bit mask and shifts.
346       *
347       * For example, this will convert BGRA to RGBA by doing
348       *
349       *   rgba = (bgra & 0x00ff0000) >> 16
350       *        | (bgra & 0xff00ff00)
351       *        | (bgra & 0x000000ff) << 16
352       *
353       * This is necessary not only for faster cause, but because X86 backend
354       * will refuse shuffles of <4 x i8> vectors
355       */
356      LLVMValueRef res;
357      struct lp_type type4;
358      unsigned cond = 0;
359      unsigned chan;
360      int shift;
361
362      /*
363       * Start with a mixture of 1 and 0.
364       */
365      for (chan = 0; chan < 4; ++chan) {
366         if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
367            cond |= 1 << chan;
368         }
369      }
370      res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
371
372      /*
373       * Build a type where each element is an integer that cover the four
374       * channels.
375       */
376      type4 = type;
377      type4.floating = FALSE;
378      type4.width *= 4;
379      type4.length /= 4;
380
381      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
382      res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
383
384      /*
385       * Mask and shift the channels, trying to group as many channels in the
386       * same shift as possible
387       */
388      for (shift = -3; shift <= 3; ++shift) {
389         unsigned long long mask = 0;
390
391         assert(type4.width <= sizeof(mask)*8);
392
393         for (chan = 0; chan < 4; ++chan) {
394            /* FIXME: big endian */
395            if (swizzles[chan] < 4 &&
396                chan - swizzles[chan] == shift) {
397               mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
398            }
399         }
400
401         if (mask) {
402            LLVMValueRef masked;
403            LLVMValueRef shifted;
404
405            if (0)
406               debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
407
408            masked = LLVMBuildAnd(builder, a,
409                                  lp_build_const_int_vec(bld->gallivm, type4, mask), "");
410            if (shift > 0) {
411               shifted = LLVMBuildShl(builder, masked,
412                                      lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
413            } else if (shift < 0) {
414               shifted = LLVMBuildLShr(builder, masked,
415                                       lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
416            } else {
417               shifted = masked;
418            }
419
420            res = LLVMBuildOr(builder, res, shifted, "");
421         }
422      }
423
424      return LLVMBuildBitCast(builder, res,
425                              lp_build_vec_type(bld->gallivm, type), "");
426   }
427}
428
429
430/**
431 * Extended swizzle of a single channel of a SoA vector.
432 *
433 * @param bld         building context
434 * @param unswizzled  array with the 4 unswizzled values
435 * @param swizzle     one of the PIPE_SWIZZLE_*
436 *
437 * @return  the swizzled value.
438 */
439LLVMValueRef
440lp_build_swizzle_soa_channel(struct lp_build_context *bld,
441                             const LLVMValueRef *unswizzled,
442                             unsigned swizzle)
443{
444   switch (swizzle) {
445   case PIPE_SWIZZLE_RED:
446   case PIPE_SWIZZLE_GREEN:
447   case PIPE_SWIZZLE_BLUE:
448   case PIPE_SWIZZLE_ALPHA:
449      return unswizzled[swizzle];
450   case PIPE_SWIZZLE_ZERO:
451      return bld->zero;
452   case PIPE_SWIZZLE_ONE:
453      return bld->one;
454   default:
455      assert(0);
456      return bld->undef;
457   }
458}
459
460
461/**
462 * Extended swizzle of a SoA vector.
463 *
464 * @param bld         building context
465 * @param unswizzled  array with the 4 unswizzled values
466 * @param swizzles    array of PIPE_SWIZZLE_*
467 * @param swizzled    output swizzled values
468 */
469void
470lp_build_swizzle_soa(struct lp_build_context *bld,
471                     const LLVMValueRef *unswizzled,
472                     const unsigned char swizzles[4],
473                     LLVMValueRef *swizzled)
474{
475   unsigned chan;
476
477   for (chan = 0; chan < 4; ++chan) {
478      swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
479                                                    swizzles[chan]);
480   }
481}
482
483
484/**
485 * Do an extended swizzle of a SoA vector inplace.
486 *
487 * @param bld         building context
488 * @param values      intput/output array with the 4 values
489 * @param swizzles    array of PIPE_SWIZZLE_*
490 */
491void
492lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
493                             LLVMValueRef *values,
494                             const unsigned char swizzles[4])
495{
496   LLVMValueRef unswizzled[4];
497   unsigned chan;
498
499   for (chan = 0; chan < 4; ++chan) {
500      unswizzled[chan] = values[chan];
501   }
502
503   lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
504}
505
506
507/**
508 * Transpose from AOS <-> SOA
509 *
510 * @param single_type_lp   type of pixels
511 * @param src              the 4 * n pixel input
512 * @param dst              the 4 * n pixel output
513 */
514void
515lp_build_transpose_aos(struct gallivm_state *gallivm,
516                       struct lp_type single_type_lp,
517                       const LLVMValueRef src[4],
518                       LLVMValueRef dst[4])
519{
520   struct lp_type double_type_lp = single_type_lp;
521   LLVMTypeRef single_type;
522   LLVMTypeRef double_type;
523   LLVMValueRef t0, t1, t2, t3;
524
525   double_type_lp.length >>= 1;
526   double_type_lp.width  <<= 1;
527
528   double_type = lp_build_vec_type(gallivm, double_type_lp);
529   single_type = lp_build_vec_type(gallivm, single_type_lp);
530
531   /* Interleave x, y, z, w -> xy and zw */
532   t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
533   t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
534   t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
535   t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
536
537   /* Cast to double width type for second interleave */
538   t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
539   t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
540   t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
541   t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
542
543   /* Interleave xy, zw -> xyzw */
544   dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
545   dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
546   dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
547   dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
548
549   /* Cast back to original single width type */
550   dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
551   dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
552   dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
553   dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
554}
555
556
557/**
558 * Pack first element of aos values,
559 * pad out to destination size.
560 * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _
561 */
562LLVMValueRef
563lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
564                          struct lp_type src_type,
565                          struct lp_type dst_type,
566                          const LLVMValueRef src)
567{
568   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
569   LLVMValueRef undef = LLVMGetUndef(i32t);
570   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
571   unsigned num_src = src_type.length / 4;
572   unsigned num_dst = dst_type.length;
573   unsigned i;
574
575   assert(num_src <= num_dst);
576
577   for (i = 0; i < num_src; i++) {
578      shuffles[i] = LLVMConstInt(i32t, i * 4, 0);
579   }
580   for (i = num_src; i < num_dst; i++) {
581      shuffles[i] = undef;
582   }
583
584   if (num_dst == 1) {
585      return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
586   }
587   else {
588      return LLVMBuildShuffleVector(gallivm->builder, src, src,
589                                    LLVMConstVector(shuffles, num_dst), "");
590   }
591}
592
593
594/**
595 * Unpack and broadcast packed aos values consisting of only the
596 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
597 */
598LLVMValueRef
599lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
600                                      struct lp_type src_type,
601                                      struct lp_type dst_type,
602                                      const LLVMValueRef src)
603{
604   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
605   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
606   unsigned num_dst = dst_type.length;
607   unsigned num_src = dst_type.length / 4;
608   unsigned i;
609
610   assert(num_dst / 4 <= src_type.length);
611
612   for (i = 0; i < num_src; i++) {
613      shuffles[i*4] = LLVMConstInt(i32t, i, 0);
614      shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
615      shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
616      shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
617   }
618
619   if (num_src == 1) {
620      return lp_build_extract_broadcast(gallivm, src_type, dst_type,
621                                        src, shuffles[0]);
622   }
623   else {
624      return LLVMBuildShuffleVector(gallivm->builder, src, src,
625                                    LLVMConstVector(shuffles, num_dst), "");
626   }
627}
628
629