11dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca/**************************************************************************
21dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca *
31dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * Copyright 2009 VMware, Inc.
41dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * All Rights Reserved.
51dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca *
61dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * copy of this software and associated documentation files (the
81dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * "Software"), to deal in the Software without restriction, including
91dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * permit persons to whom the Software is furnished to do so, subject to
121dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * the following conditions:
131dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca *
141dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * The above copyright notice and this permission notice (including the
151dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * next paragraph) shall be included in all copies or substantial portions
161dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * of the Software.
171dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca *
181dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca *
261dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca **************************************************************************/
271dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/**
295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file
305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for swizzling/shuffling.
315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca *
325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com>
335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */
345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca
35369b5a311ca5e03bc4cccc3052800b94e316087dBrian Paul#include <inttypes.h>  /* for PRIx64 macro */
361dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#include "util/u_debug.h"
371dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
381dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#include "lp_bld_type.h"
391dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#include "lp_bld_const.h"
40efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul#include "lp_bld_init.h"
4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_logic.h"
421dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#include "lp_bld_swizzle.h"
433469715a8a171512cf9b528702e70393f01c6041José Fonseca#include "lp_bld_pack.h"
441dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
451dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
461dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé FonsecaLLVMValueRef
47efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_broadcast(struct gallivm_state *gallivm,
486f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca                   LLVMTypeRef vec_type,
496f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca                   LLVMValueRef scalar)
506f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca{
516f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca   LLVMValueRef res;
526f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca
53638779e44555b3c3789638a95693357cf59f5595José Fonseca   if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
54638779e44555b3c3789638a95693357cf59f5595José Fonseca      /* scalar */
55638779e44555b3c3789638a95693357cf59f5595José Fonseca      assert(vec_type == LLVMTypeOf(scalar));
56638779e44555b3c3789638a95693357cf59f5595José Fonseca      res = scalar;
57638779e44555b3c3789638a95693357cf59f5595José Fonseca   } else {
58638779e44555b3c3789638a95693357cf59f5595José Fonseca      LLVMBuilderRef builder = gallivm->builder;
59638779e44555b3c3789638a95693357cf59f5595José Fonseca      const unsigned length = LLVMGetVectorSize(vec_type);
60638779e44555b3c3789638a95693357cf59f5595José Fonseca      LLVMValueRef undef = LLVMGetUndef(vec_type);
619cf67e51b06b4b136d03e642b18b4a4e36a1dabbJosé Fonseca      /* The shuffle vector is always made of int32 elements */
62638779e44555b3c3789638a95693357cf59f5595José Fonseca      LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
639cf67e51b06b4b136d03e642b18b4a4e36a1dabbJosé Fonseca      LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
64638779e44555b3c3789638a95693357cf59f5595José Fonseca
65638779e44555b3c3789638a95693357cf59f5595José Fonseca      assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
66638779e44555b3c3789638a95693357cf59f5595José Fonseca
679cf67e51b06b4b136d03e642b18b4a4e36a1dabbJosé Fonseca      res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
689cf67e51b06b4b136d03e642b18b4a4e36a1dabbJosé Fonseca      res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
696f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca   }
706f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca
716f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca   return res;
726f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca}
736f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca
746f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca
752c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca/**
762c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Broadcast
772c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */
786f5cd15f80d612e165078dfe2126cf44cf509d91José FonsecaLLVMValueRef
794393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonsecalp_build_broadcast_scalar(struct lp_build_context *bld,
804393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca                          LLVMValueRef scalar)
814393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca{
82638779e44555b3c3789638a95693357cf59f5595José Fonseca   assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
83ff7542ab90154769930c80d58597cfec40844bd9José Fonseca
84638779e44555b3c3789638a95693357cf59f5595José Fonseca   return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
854393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca}
864393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca
874393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca
8834ea50f6720d6aca970613da560a1f25f5b5772cJosé Fonseca/**
893469715a8a171512cf9b528702e70393f01c6041José Fonseca * Combined extract and broadcast (mere shuffle in most cases)
903fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca */
913fde8167a5d9c1e845053ae4e6a9cd49628adc71José FonsecaLLVMValueRef
92efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_extract_broadcast(struct gallivm_state *gallivm,
933fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca                           struct lp_type src_type,
943fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca                           struct lp_type dst_type,
953fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca                           LLVMValueRef vector,
963fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca                           LLVMValueRef index)
973fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca{
98efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
993fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   LLVMValueRef res;
1003fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
1013fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   assert(src_type.floating == dst_type.floating);
1023fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   assert(src_type.width    == dst_type.width);
1033fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
1043fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   assert(lp_check_value(src_type, vector));
1053fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   assert(LLVMTypeOf(index) == i32t);
1063fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
1073fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   if (src_type.length == 1) {
1083fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca      if (dst_type.length == 1) {
1093fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca         /*
1103fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca          * Trivial scalar -> scalar.
1113fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca          */
1123fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
1133fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca         res = vector;
1143fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca      }
1153fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca      else {
1163fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca         /*
1173fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca          * Broadcast scalar -> vector.
1183fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca          */
1193fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
120efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         res = lp_build_broadcast(gallivm,
121efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul                                  lp_build_vec_type(gallivm, dst_type),
1223fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca                                  vector);
1233fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca      }
1243fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   }
1253fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   else {
1263469715a8a171512cf9b528702e70393f01c6041José Fonseca      if (dst_type.length > 1) {
1273fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca         /*
1283469715a8a171512cf9b528702e70393f01c6041José Fonseca          * shuffle - result can be of different length.
1293fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca          */
1303fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
1313fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca         LLVMValueRef shuffle;
132efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         shuffle = lp_build_broadcast(gallivm,
1333fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca                                      LLVMVectorType(i32t, dst_type.length),
1343fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca                                      index);
135efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         res = LLVMBuildShuffleVector(gallivm->builder, vector,
1363469715a8a171512cf9b528702e70393f01c6041José Fonseca                                      LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
1373fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca                                      shuffle, "");
1383fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca      }
1393fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca      else {
1403469715a8a171512cf9b528702e70393f01c6041José Fonseca         /*
1413469715a8a171512cf9b528702e70393f01c6041José Fonseca          * Trivial extract scalar from vector.
1423469715a8a171512cf9b528702e70393f01c6041José Fonseca          */
1433469715a8a171512cf9b528702e70393f01c6041José Fonseca          res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
1443fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca      }
1453fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   }
1463fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
1473fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca   return res;
1483fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca}
1493fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
1503fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca
1513fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca/**
152fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * Swizzle one channel into other channels.
15334ea50f6720d6aca970613da560a1f25f5b5772cJosé Fonseca */
1544393ca7956bd03e07b4d7a019705fc62d4ac7155José FonsecaLLVMValueRef
15534ea50f6720d6aca970613da560a1f25f5b5772cJosé Fonsecalp_build_swizzle_scalar_aos(struct lp_build_context *bld,
15634ea50f6720d6aca970613da560a1f25f5b5772cJosé Fonseca                            LLVMValueRef a,
157fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                            unsigned channel,
158fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                            unsigned num_channels)
1591dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca{
1606299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul   LLVMBuilderRef builder = bld->gallivm->builder;
161b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   const struct lp_type type = bld->type;
1621dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca   const unsigned n = type.length;
1631dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca   unsigned i, j;
1641dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
165fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
1661dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      return a;
1671dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
168fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   assert(num_channels == 2 || num_channels == 4);
169fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
17028e46458bca3065baf0424b20e5b72cb672069e6José Fonseca   /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
17128e46458bca3065baf0424b20e5b72cb672069e6José Fonseca    * using shuffles here actually causes worst results. More investigation is
17228e46458bca3065baf0424b20e5b72cb672069e6José Fonseca    * needed. */
1732eddf91faf4abf0b3d7b81a0a486fd46d5acd1eaJosé Fonseca   if (LLVMIsConstant(a) ||
1742eddf91faf4abf0b3d7b81a0a486fd46d5acd1eaJosé Fonseca       type.width >= 16) {
1751dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      /*
1761dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca       * Shuffle.
1771dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca       */
178efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
1791dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
1801dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
181fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      for(j = 0; j < n; j += num_channels)
182fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         for(i = 0; i < num_channels; ++i)
1831dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
1841dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
1856299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
1861dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca   }
187fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   else if (num_channels == 2) {
188fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      /*
189fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton       * Bit mask and shifts
190fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton       *
191fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton       *   XY XY .... XY  <= input
192fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton       *   0Y 0Y .... 0Y
193fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton       *   YY YY .... YY
194fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton       *   YY YY .... YY  <= output
195fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton       */
196fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      struct lp_type type2;
197fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      LLVMValueRef tmp = NULL;
198fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      int shift;
199fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
200fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      a = LLVMBuildAnd(builder, a,
201fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                       lp_build_const_mask_aos(bld->gallivm,
202fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                                               type, 1 << channel, num_channels), "");
203fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
204fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      type2 = type;
205fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      type2.floating = FALSE;
206fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      type2.width *= 2;
207fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      type2.length /= 2;
208fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
209fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
210fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
2112151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson      /*
2122151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * Vector element 0 is always channel X.
2132151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *
2142151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *                        76 54 32 10 (array numbering)
2152151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * Little endian reg in:  YX YX YX YX
2162151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * Little endian reg out: YY YY YY YY if shift right (shift == -1)
2172151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *                        XX XX XX XX if shift left (shift == 1)
2182151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *
2192151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *                        01 23 45 67 (array numbering)
2202151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * Big endian reg in:     XY XY XY XY
2212151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
2222151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *                        XX XX XX XX if shift right (shift == -1)
2232151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *
2242151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       */
225fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton#ifdef PIPE_ARCH_LITTLE_ENDIAN
226fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      shift = channel == 0 ? 1 : -1;
227fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton#else
228fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      shift = channel == 0 ? -1 : 1;
229fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton#endif
230fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
231fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      if (shift > 0) {
232fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
233fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      } else if (shift < 0) {
234fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
235fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      }
236fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
237fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      assert(tmp);
238fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      if (tmp) {
239fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         a = LLVMBuildOr(builder, a, tmp, "");
240fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      }
241fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
242fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
243fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   }
2441dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca   else {
2451dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      /*
2461dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca       * Bit mask and recursive shifts
2471dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca       *
2482151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * Little-endian registers:
2492151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *
2502151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   7654 3210
2512151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   WZYX WZYX .... WZYX  <= input
2522151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   00Y0 00Y0 .... 00Y0  <= mask
2532151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
2542151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
2552151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *
2562151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * Big-endian registers:
2572151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *
2582151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   0123 4567
25928e46458bca3065baf0424b20e5b72cb672069e6José Fonseca       *   XYZW XYZW .... XYZW  <= input
2602151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   0Y00 0Y00 .... 0Y00  <= mask
2612151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
2622151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
2632151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       *
2642151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
2651dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca       */
266a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      struct lp_type type4;
267dd5c58081672bd495e0ffef1c3cc1229620f0f88Adhemerval Zanella      const int shifts[4][2] = {
2681dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         { 1,  2},
2691dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         {-1,  2},
2701dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         { 1, -2},
2711dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         {-1, -2}
2721dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      };
2731dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      unsigned i;
2741dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
2756299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      a = LLVMBuildAnd(builder, a,
276efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul                       lp_build_const_mask_aos(bld->gallivm,
2771d3789bccbbcc814fd7b339e9f5b5631e30d9f0eJames Benton                                               type, 1 << channel, 4), "");
2781dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
279a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      /*
280a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * Build a type where each element is an integer that cover the four
281a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * channels.
282a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       */
283a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
284a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      type4 = type;
285a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      type4.floating = FALSE;
2861dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      type4.width *= 4;
2871dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      type4.length /= 4;
2881dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
2896299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
2901dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
2911dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      for(i = 0; i < 2; ++i) {
2921dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         LLVMValueRef tmp = NULL;
2931dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         int shift = shifts[channel][i];
2941dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
2952151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson         /* See endianness diagram above */
2962151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson#ifdef PIPE_ARCH_BIG_ENDIAN
2971dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         shift = -shift;
2981dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#endif
2991dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
3001dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         if(shift > 0)
3012151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
3021dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         if(shift < 0)
3032151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
3041dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
3051dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         assert(tmp);
3061dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca         if(tmp)
3076299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul            a = LLVMBuildOr(builder, a, tmp, "");
3081dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      }
3091dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
3106299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
3111dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca   }
3121dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca}
3131dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
3141dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
315fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton/**
316fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * Swizzle a vector consisting of an array of XYZW structs.
317fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton *
318fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * This fills a vector of dst_len length with the swizzled channels from src.
319fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton *
320fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
321fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton *      RGBA RGBA = BGR BGR BG
322fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton *
323fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * @param swizzles        the swizzle array
324fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * @param num_swizzles    the number of elements in swizzles
325fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * @param dst_len         the length of the result
326fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton */
327fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames BentonLLVMValueRef
328fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Bentonlp_build_swizzle_aos_n(struct gallivm_state* gallivm,
329fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                       LLVMValueRef src,
330fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                       const unsigned char* swizzles,
331fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                       unsigned num_swizzles,
332fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                       unsigned dst_len)
333fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton{
334fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   LLVMBuilderRef builder = gallivm->builder;
335fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
336fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   unsigned i;
337fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
338fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   assert(dst_len < LP_MAX_VECTOR_WIDTH);
339fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
340fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   for (i = 0; i < dst_len; ++i) {
341fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      int swizzle = swizzles[i % num_swizzles];
342fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
343fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
344fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
345fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      } else {
346fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         shuffles[i] = lp_build_const_int32(gallivm, swizzle);
347fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      }
348fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   }
349fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
350fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
351fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton}
352fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
353fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
3541dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé FonsecaLLVMValueRef
355a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonsecalp_build_swizzle_aos(struct lp_build_context *bld,
356a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca                     LLVMValueRef a,
357a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca                     const unsigned char swizzles[4])
3581dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca{
3596299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul   LLVMBuilderRef builder = bld->gallivm->builder;
360a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca   const struct lp_type type = bld->type;
361a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca   const unsigned n = type.length;
3621dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca   unsigned i, j;
3631dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
364fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák   if (swizzles[0] == PIPE_SWIZZLE_X &&
365fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák       swizzles[1] == PIPE_SWIZZLE_Y &&
366fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák       swizzles[2] == PIPE_SWIZZLE_Z &&
367fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák       swizzles[3] == PIPE_SWIZZLE_W) {
3681dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      return a;
369a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca   }
3701dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
371a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca   if (swizzles[0] == swizzles[1] &&
372a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       swizzles[1] == swizzles[2] &&
373a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       swizzles[2] == swizzles[3]) {
374a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      switch (swizzles[0]) {
375fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák      case PIPE_SWIZZLE_X:
376fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák      case PIPE_SWIZZLE_Y:
377fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák      case PIPE_SWIZZLE_Z:
378fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák      case PIPE_SWIZZLE_W:
379fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
380fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák      case PIPE_SWIZZLE_0:
381a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         return bld->zero;
382fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák      case PIPE_SWIZZLE_1:
383a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         return bld->one;
3843469715a8a171512cf9b528702e70393f01c6041José Fonseca      case LP_BLD_SWIZZLE_DONTCARE:
3853469715a8a171512cf9b528702e70393f01c6041José Fonseca         return bld->undef;
386a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      default:
387a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         assert(0);
388a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         return bld->undef;
389a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      }
390a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca   }
3911dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
3922eddf91faf4abf0b3d7b81a0a486fd46d5acd1eaJosé Fonseca   if (LLVMIsConstant(a) ||
3932eddf91faf4abf0b3d7b81a0a486fd46d5acd1eaJosé Fonseca       type.width >= 16) {
3941dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      /*
3951dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca       * Shuffle.
3961dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca       */
397efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
398efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
3991dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
400a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
401a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
402a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      memset(aux, 0, sizeof aux);
403a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
404a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      for(j = 0; j < n; j += 4) {
405a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         for(i = 0; i < 4; ++i) {
406a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            unsigned shuffle;
407a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            switch (swizzles[i]) {
408a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            default:
409a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               assert(0);
410a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               /* fall through */
411fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák            case PIPE_SWIZZLE_X:
412fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák            case PIPE_SWIZZLE_Y:
413fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák            case PIPE_SWIZZLE_Z:
414fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák            case PIPE_SWIZZLE_W:
415a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               shuffle = j + swizzles[i];
4163469715a8a171512cf9b528702e70393f01c6041José Fonseca               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
417a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               break;
418fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák            case PIPE_SWIZZLE_0:
419a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               shuffle = type.length + 0;
4203469715a8a171512cf9b528702e70393f01c6041José Fonseca               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
421a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               if (!aux[0]) {
422efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul                  aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
423a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               }
424a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               break;
425fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák            case PIPE_SWIZZLE_1:
426a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               shuffle = type.length + 1;
4273469715a8a171512cf9b528702e70393f01c6041José Fonseca               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
428a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               if (!aux[1]) {
429efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul                  aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
430a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               }
431a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               break;
4323469715a8a171512cf9b528702e70393f01c6041José Fonseca            case LP_BLD_SWIZZLE_DONTCARE:
4333469715a8a171512cf9b528702e70393f01c6041José Fonseca               shuffles[j + i] = LLVMGetUndef(i32t);
4343469715a8a171512cf9b528702e70393f01c6041José Fonseca               break;
435a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            }
436a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         }
437a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      }
4381dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
439a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      for (i = 0; i < n; ++i) {
440a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         if (!aux[i]) {
441a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            aux[i] = undef;
442a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         }
443a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      }
4441dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
4456299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      return LLVMBuildShuffleVector(builder, a,
446a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca                                    LLVMConstVector(aux, n),
447a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca                                    LLVMConstVector(shuffles, n), "");
448a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca   } else {
449a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      /*
450a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * Bit mask and shifts.
451a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       *
452a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * For example, this will convert BGRA to RGBA by doing
453a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       *
454e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella       * Little endian:
455a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       *   rgba = (bgra & 0x00ff0000) >> 16
456a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       *        | (bgra & 0xff00ff00)
457a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       *        | (bgra & 0x000000ff) << 16
458a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       *
459e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella       * Big endian:A
460e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella       *   rgba = (bgra & 0x0000ff00) << 16
461e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella       *        | (bgra & 0x00ff00ff)
462e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella       *        | (bgra & 0xff000000) >> 16
463e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella       *
464a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * This is necessary not only for faster cause, but because X86 backend
465a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * will refuse shuffles of <4 x i8> vectors
466a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       */
467a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      LLVMValueRef res;
468a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      struct lp_type type4;
4696ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      unsigned cond = 0;
470ace70aedcf8b29380a17f68a994b18f60976bca6Jan Vesely      int chan;
471a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      int shift;
472a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
473a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      /*
474a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * Start with a mixture of 1 and 0.
475a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       */
476a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      for (chan = 0; chan < 4; ++chan) {
477fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák         if (swizzles[chan] == PIPE_SWIZZLE_1) {
4786ed726b8fc6210a41fe325591e1428d19f419108José Fonseca            cond |= 1 << chan;
4796ed726b8fc6210a41fe325591e1428d19f419108José Fonseca         }
480a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      }
481fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
482a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
483a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      /*
484a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * Build a type where each element is an integer that cover the four
485a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * channels.
486a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       */
487a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      type4 = type;
488a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      type4.floating = FALSE;
489a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      type4.width *= 4;
490a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      type4.length /= 4;
491a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
4926299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
4936299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
494a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
495a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      /*
496a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       * Mask and shift the channels, trying to group as many channels in the
4972151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * same shift as possible.  The shift amount is positive for shifts left
4982151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson       * and negative for shifts right.
499a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca       */
500a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      for (shift = -3; shift <= 3; ++shift) {
501369b5a311ca5e03bc4cccc3052800b94e316087dBrian Paul         uint64_t mask = 0;
502a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
503a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         assert(type4.width <= sizeof(mask)*8);
504a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
5052151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson         /*
5062151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          * Vector element numbers follow the XYZW order, so 0 is always X, etc.
5072151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          * After widening 4 times we have:
5082151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          *
5092151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          *                                3210
5102151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          * Little-endian register layout: WZYX
5112151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          *
5122151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          *                                0123
5132151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          * Big-endian register layout:    XYZW
5142151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          *
5152151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          * For little-endian, higher-numbered channels are obtained by a shift right
5162151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          * (negative shift amount) and lower-numbered channels by a shift left
5172151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          * (positive shift amount).  The opposite is true for big-endian.
5182151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson          */
519a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         for (chan = 0; chan < 4; ++chan) {
5202151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson            if (swizzles[chan] < 4) {
5212151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson               /* We need to move channel swizzles[chan] into channel chan */
522e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella#ifdef PIPE_ARCH_LITTLE_ENDIAN
5232151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson               if (swizzles[chan] - chan == -shift) {
5242151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson                  mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
5252151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson               }
526e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella#else
5272151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson               if (swizzles[chan] - chan == shift) {
5282151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson                  mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
5292151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson               }
530e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella#endif
531a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            }
532a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         }
533a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
534a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         if (mask) {
535a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            LLVMValueRef masked;
536a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            LLVMValueRef shifted;
537a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            if (0)
538369b5a311ca5e03bc4cccc3052800b94e316087dBrian Paul               debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
539a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
5406299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul            masked = LLVMBuildAnd(builder, a,
541efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul                                  lp_build_const_int_vec(bld->gallivm, type4, mask), "");
542a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            if (shift > 0) {
5436299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul               shifted = LLVMBuildShl(builder, masked,
544efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul                                      lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
545a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            } else if (shift < 0) {
5466299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul               shifted = LLVMBuildLShr(builder, masked,
547efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul                                       lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
548a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            } else {
549a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca               shifted = masked;
550a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca            }
551a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
5526299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul            res = LLVMBuildOr(builder, res, shifted, "");
553a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca         }
554a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca      }
555a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca
5566299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      return LLVMBuildBitCast(builder, res,
557efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul                              lp_build_vec_type(bld->gallivm, type), "");
5581dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca   }
5591dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca}
5601dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
5611dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca
5622c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca/**
5632c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Extended swizzle of a single channel of a SoA vector.
5642c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca *
5652c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param bld         building context
5662c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param unswizzled  array with the 4 unswizzled values
5672c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param swizzle     one of the PIPE_SWIZZLE_*
5682c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca *
5692c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @return  the swizzled value.
5702c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */
5712c2debaea71eb99322c2371f1c581e9748cda91fJosé FonsecaLLVMValueRef
5722c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecalp_build_swizzle_soa_channel(struct lp_build_context *bld,
5732c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca                             const LLVMValueRef *unswizzled,
5742c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca                             unsigned swizzle)
5752c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca{
5762c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   switch (swizzle) {
577fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák   case PIPE_SWIZZLE_X:
578fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák   case PIPE_SWIZZLE_Y:
579fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák   case PIPE_SWIZZLE_Z:
580fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák   case PIPE_SWIZZLE_W:
5812c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca      return unswizzled[swizzle];
582fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák   case PIPE_SWIZZLE_0:
5832c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca      return bld->zero;
584fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák   case PIPE_SWIZZLE_1:
5852c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca      return bld->one;
5862c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   default:
5872c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca      assert(0);
5882c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca      return bld->undef;
5892c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   }
5902c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca}
5912c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca
5922c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca
5932c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca/**
5942c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Extended swizzle of a SoA vector.
5952c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca *
5962c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param bld         building context
5972c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param unswizzled  array with the 4 unswizzled values
5982c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param swizzles    array of PIPE_SWIZZLE_*
5992c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param swizzled    output swizzled values
6002c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */
6012c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecavoid
6022c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecalp_build_swizzle_soa(struct lp_build_context *bld,
6032c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca                     const LLVMValueRef *unswizzled,
6042c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca                     const unsigned char swizzles[4],
6052c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca                     LLVMValueRef *swizzled)
6062c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca{
6072c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   unsigned chan;
6082c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca
6092c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   for (chan = 0; chan < 4; ++chan) {
6102c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca      swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
6112c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca                                                    swizzles[chan]);
6122c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   }
6132c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca}
6142c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca
6152c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca
6162c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca/**
6172c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Do an extended swizzle of a SoA vector inplace.
6182c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca *
6192c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param bld         building context
6202c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param values      intput/output array with the 4 values
6212c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param swizzles    array of PIPE_SWIZZLE_*
6222c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */
6232c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecavoid
6242c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecalp_build_swizzle_soa_inplace(struct lp_build_context *bld,
6252c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca                             LLVMValueRef *values,
6262c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca                             const unsigned char swizzles[4])
6272c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca{
6282c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   LLVMValueRef unswizzled[4];
6292c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   unsigned chan;
6302c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca
6312c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   for (chan = 0; chan < 4; ++chan) {
6322c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca      unswizzled[chan] = values[chan];
6332c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   }
6342c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca
6352c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca   lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
6362c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca}
6373469715a8a171512cf9b528702e70393f01c6041José Fonseca
6383469715a8a171512cf9b528702e70393f01c6041José Fonseca
6393469715a8a171512cf9b528702e70393f01c6041José Fonseca/**
6403469715a8a171512cf9b528702e70393f01c6041José Fonseca * Transpose from AOS <-> SOA
6413469715a8a171512cf9b528702e70393f01c6041José Fonseca *
6423469715a8a171512cf9b528702e70393f01c6041José Fonseca * @param single_type_lp   type of pixels
6433469715a8a171512cf9b528702e70393f01c6041José Fonseca * @param src              the 4 * n pixel input
6443469715a8a171512cf9b528702e70393f01c6041José Fonseca * @param dst              the 4 * n pixel output
6453469715a8a171512cf9b528702e70393f01c6041José Fonseca */
6463469715a8a171512cf9b528702e70393f01c6041José Fonsecavoid
6473469715a8a171512cf9b528702e70393f01c6041José Fonsecalp_build_transpose_aos(struct gallivm_state *gallivm,
6483469715a8a171512cf9b528702e70393f01c6041José Fonseca                       struct lp_type single_type_lp,
6493469715a8a171512cf9b528702e70393f01c6041José Fonseca                       const LLVMValueRef src[4],
6503469715a8a171512cf9b528702e70393f01c6041José Fonseca                       LLVMValueRef dst[4])
6513469715a8a171512cf9b528702e70393f01c6041José Fonseca{
6523469715a8a171512cf9b528702e70393f01c6041José Fonseca   struct lp_type double_type_lp = single_type_lp;
6533469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMTypeRef single_type;
6543469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMTypeRef double_type;
6553469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMValueRef t0, t1, t2, t3;
6563469715a8a171512cf9b528702e70393f01c6041José Fonseca
6573469715a8a171512cf9b528702e70393f01c6041José Fonseca   double_type_lp.length >>= 1;
6583469715a8a171512cf9b528702e70393f01c6041José Fonseca   double_type_lp.width  <<= 1;
6593469715a8a171512cf9b528702e70393f01c6041José Fonseca
6603469715a8a171512cf9b528702e70393f01c6041José Fonseca   double_type = lp_build_vec_type(gallivm, double_type_lp);
6613469715a8a171512cf9b528702e70393f01c6041José Fonseca   single_type = lp_build_vec_type(gallivm, single_type_lp);
6623469715a8a171512cf9b528702e70393f01c6041José Fonseca
6633469715a8a171512cf9b528702e70393f01c6041José Fonseca   /* Interleave x, y, z, w -> xy and zw */
6643469715a8a171512cf9b528702e70393f01c6041José Fonseca   t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
6653469715a8a171512cf9b528702e70393f01c6041José Fonseca   t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
6663469715a8a171512cf9b528702e70393f01c6041José Fonseca   t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
6673469715a8a171512cf9b528702e70393f01c6041José Fonseca   t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
6683469715a8a171512cf9b528702e70393f01c6041José Fonseca
6693469715a8a171512cf9b528702e70393f01c6041José Fonseca   /* Cast to double width type for second interleave */
6703469715a8a171512cf9b528702e70393f01c6041José Fonseca   t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
6713469715a8a171512cf9b528702e70393f01c6041José Fonseca   t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
6723469715a8a171512cf9b528702e70393f01c6041José Fonseca   t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
6733469715a8a171512cf9b528702e70393f01c6041José Fonseca   t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
6743469715a8a171512cf9b528702e70393f01c6041José Fonseca
6753469715a8a171512cf9b528702e70393f01c6041José Fonseca   /* Interleave xy, zw -> xyzw */
6763469715a8a171512cf9b528702e70393f01c6041José Fonseca   dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
6773469715a8a171512cf9b528702e70393f01c6041José Fonseca   dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
6783469715a8a171512cf9b528702e70393f01c6041José Fonseca   dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
6793469715a8a171512cf9b528702e70393f01c6041José Fonseca   dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
6803469715a8a171512cf9b528702e70393f01c6041José Fonseca
6813469715a8a171512cf9b528702e70393f01c6041José Fonseca   /* Cast back to original single width type */
6823469715a8a171512cf9b528702e70393f01c6041José Fonseca   dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
6833469715a8a171512cf9b528702e70393f01c6041José Fonseca   dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
6843469715a8a171512cf9b528702e70393f01c6041José Fonseca   dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
6853469715a8a171512cf9b528702e70393f01c6041José Fonseca   dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
6863469715a8a171512cf9b528702e70393f01c6041José Fonseca}
6873469715a8a171512cf9b528702e70393f01c6041José Fonseca
6883469715a8a171512cf9b528702e70393f01c6041José Fonseca
6893469715a8a171512cf9b528702e70393f01c6041José Fonseca/**
690fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * Transpose from AOS <-> SOA for num_srcs
691fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton */
692fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Bentonvoid
693fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Bentonlp_build_transpose_aos_n(struct gallivm_state *gallivm,
694fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                         struct lp_type type,
695fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                         const LLVMValueRef* src,
696fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                         unsigned num_srcs,
697fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton                         LLVMValueRef* dst)
698fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton{
699fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton   switch (num_srcs) {
700fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      case 1:
701fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         dst[0] = src[0];
702fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         break;
703fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
704fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      case 2:
705fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      {
706fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         /* Note: we must use a temporary incase src == dst */
707fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         LLVMValueRef lo, hi;
708fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
709fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
710fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
711fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
712fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         dst[0] = lo;
713fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         dst[1] = hi;
714fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         break;
715fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      }
716fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
717fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      case 4:
718fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         lp_build_transpose_aos(gallivm, type, src, dst);
719fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         break;
720fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
721fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton      default:
722fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton         assert(0);
72367d4b4b28c358845f0c0b9f6cacd5e611c746313Edward O'Callaghan   }
724fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton}
725fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
726fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton
727fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton/**
7280b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger * Pack n-th element of aos values,
7293469715a8a171512cf9b528702e70393f01c6041José Fonseca * pad out to destination size.
7300b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
7313469715a8a171512cf9b528702e70393f01c6041José Fonseca */
7323469715a8a171512cf9b528702e70393f01c6041José FonsecaLLVMValueRef
7333469715a8a171512cf9b528702e70393f01c6041José Fonsecalp_build_pack_aos_scalars(struct gallivm_state *gallivm,
7343469715a8a171512cf9b528702e70393f01c6041José Fonseca                          struct lp_type src_type,
7353469715a8a171512cf9b528702e70393f01c6041José Fonseca                          struct lp_type dst_type,
7360b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger                          const LLVMValueRef src,
7370b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger                          unsigned channel)
7383469715a8a171512cf9b528702e70393f01c6041José Fonseca{
7393469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
7403469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMValueRef undef = LLVMGetUndef(i32t);
7413469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
7423469715a8a171512cf9b528702e70393f01c6041José Fonseca   unsigned num_src = src_type.length / 4;
7433469715a8a171512cf9b528702e70393f01c6041José Fonseca   unsigned num_dst = dst_type.length;
7443469715a8a171512cf9b528702e70393f01c6041José Fonseca   unsigned i;
7453469715a8a171512cf9b528702e70393f01c6041José Fonseca
7463469715a8a171512cf9b528702e70393f01c6041José Fonseca   assert(num_src <= num_dst);
7473469715a8a171512cf9b528702e70393f01c6041José Fonseca
7483469715a8a171512cf9b528702e70393f01c6041José Fonseca   for (i = 0; i < num_src; i++) {
7490b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger      shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
7503469715a8a171512cf9b528702e70393f01c6041José Fonseca   }
7513469715a8a171512cf9b528702e70393f01c6041José Fonseca   for (i = num_src; i < num_dst; i++) {
7523469715a8a171512cf9b528702e70393f01c6041José Fonseca      shuffles[i] = undef;
7533469715a8a171512cf9b528702e70393f01c6041José Fonseca   }
7543469715a8a171512cf9b528702e70393f01c6041José Fonseca
7553469715a8a171512cf9b528702e70393f01c6041José Fonseca   if (num_dst == 1) {
7563469715a8a171512cf9b528702e70393f01c6041José Fonseca      return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
7573469715a8a171512cf9b528702e70393f01c6041José Fonseca   }
7583469715a8a171512cf9b528702e70393f01c6041José Fonseca   else {
7593469715a8a171512cf9b528702e70393f01c6041José Fonseca      return LLVMBuildShuffleVector(gallivm->builder, src, src,
7603469715a8a171512cf9b528702e70393f01c6041José Fonseca                                    LLVMConstVector(shuffles, num_dst), "");
7613469715a8a171512cf9b528702e70393f01c6041José Fonseca   }
7623469715a8a171512cf9b528702e70393f01c6041José Fonseca}
7633469715a8a171512cf9b528702e70393f01c6041José Fonseca
7643469715a8a171512cf9b528702e70393f01c6041José Fonseca
7653469715a8a171512cf9b528702e70393f01c6041José Fonseca/**
7663469715a8a171512cf9b528702e70393f01c6041José Fonseca * Unpack and broadcast packed aos values consisting of only the
7673469715a8a171512cf9b528702e70393f01c6041José Fonseca * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
7683469715a8a171512cf9b528702e70393f01c6041José Fonseca */
7693469715a8a171512cf9b528702e70393f01c6041José FonsecaLLVMValueRef
7703469715a8a171512cf9b528702e70393f01c6041José Fonsecalp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
7713469715a8a171512cf9b528702e70393f01c6041José Fonseca                                      struct lp_type src_type,
7723469715a8a171512cf9b528702e70393f01c6041José Fonseca                                      struct lp_type dst_type,
7733469715a8a171512cf9b528702e70393f01c6041José Fonseca                                      const LLVMValueRef src)
7743469715a8a171512cf9b528702e70393f01c6041José Fonseca{
7753469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
7763469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
7773469715a8a171512cf9b528702e70393f01c6041José Fonseca   unsigned num_dst = dst_type.length;
7783469715a8a171512cf9b528702e70393f01c6041José Fonseca   unsigned num_src = dst_type.length / 4;
7793469715a8a171512cf9b528702e70393f01c6041José Fonseca   unsigned i;
7803469715a8a171512cf9b528702e70393f01c6041José Fonseca
7813469715a8a171512cf9b528702e70393f01c6041José Fonseca   assert(num_dst / 4 <= src_type.length);
7823469715a8a171512cf9b528702e70393f01c6041José Fonseca
7833469715a8a171512cf9b528702e70393f01c6041José Fonseca   for (i = 0; i < num_src; i++) {
7843469715a8a171512cf9b528702e70393f01c6041José Fonseca      shuffles[i*4] = LLVMConstInt(i32t, i, 0);
7853469715a8a171512cf9b528702e70393f01c6041José Fonseca      shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
7863469715a8a171512cf9b528702e70393f01c6041José Fonseca      shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
7873469715a8a171512cf9b528702e70393f01c6041José Fonseca      shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
7883469715a8a171512cf9b528702e70393f01c6041José Fonseca   }
7893469715a8a171512cf9b528702e70393f01c6041José Fonseca
7903469715a8a171512cf9b528702e70393f01c6041José Fonseca   if (num_src == 1) {
7913469715a8a171512cf9b528702e70393f01c6041José Fonseca      return lp_build_extract_broadcast(gallivm, src_type, dst_type,
7923469715a8a171512cf9b528702e70393f01c6041José Fonseca                                        src, shuffles[0]);
7933469715a8a171512cf9b528702e70393f01c6041José Fonseca   }
7943469715a8a171512cf9b528702e70393f01c6041José Fonseca   else {
7953469715a8a171512cf9b528702e70393f01c6041José Fonseca      return LLVMBuildShuffleVector(gallivm->builder, src, src,
7963469715a8a171512cf9b528702e70393f01c6041José Fonseca                                    LLVMConstVector(shuffles, num_dst), "");
7973469715a8a171512cf9b528702e70393f01c6041José Fonseca   }
7983469715a8a171512cf9b528702e70393f01c6041José Fonseca}
7993469715a8a171512cf9b528702e70393f01c6041José Fonseca
800