11dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca/************************************************************************** 21dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * 31dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * Copyright 2009 VMware, Inc. 41dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * All Rights Reserved. 51dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * 61dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 71dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * copy of this software and associated documentation files (the 81dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * "Software"), to deal in the Software without restriction, including 91dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * without limitation the rights to use, copy, modify, merge, publish, 101dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * distribute, sub license, and/or sell copies of the Software, and to 111dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * permit persons to whom the Software is furnished to do so, subject to 121dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * the following conditions: 131dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * 141dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * The above copyright notice and this permission notice (including the 151dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * next paragraph) shall be included in all copies or substantial portions 161dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * of the Software. 171dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * 181dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 221dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 231dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * 261dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca **************************************************************************/ 271dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/** 295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file 305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for swizzling/shuffling. 315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * 325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com> 335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */ 345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca 35369b5a311ca5e03bc4cccc3052800b94e316087dBrian Paul#include <inttypes.h> /* for PRIx64 macro */ 361dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#include "util/u_debug.h" 371dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 381dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#include "lp_bld_type.h" 391dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#include "lp_bld_const.h" 40efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul#include "lp_bld_init.h" 4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_logic.h" 421dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#include "lp_bld_swizzle.h" 433469715a8a171512cf9b528702e70393f01c6041José Fonseca#include "lp_bld_pack.h" 441dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 451dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 461dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé FonsecaLLVMValueRef 47efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_broadcast(struct gallivm_state *gallivm, 486f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca LLVMTypeRef vec_type, 496f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca LLVMValueRef scalar) 506f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca{ 516f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca LLVMValueRef res; 526f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca 53638779e44555b3c3789638a95693357cf59f5595José Fonseca if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) { 54638779e44555b3c3789638a95693357cf59f5595José Fonseca /* scalar */ 55638779e44555b3c3789638a95693357cf59f5595José Fonseca assert(vec_type == LLVMTypeOf(scalar)); 56638779e44555b3c3789638a95693357cf59f5595José Fonseca res = scalar; 57638779e44555b3c3789638a95693357cf59f5595José Fonseca } else { 58638779e44555b3c3789638a95693357cf59f5595José Fonseca LLVMBuilderRef builder = gallivm->builder; 59638779e44555b3c3789638a95693357cf59f5595José Fonseca const unsigned length = LLVMGetVectorSize(vec_type); 60638779e44555b3c3789638a95693357cf59f5595José Fonseca LLVMValueRef undef = LLVMGetUndef(vec_type); 619cf67e51b06b4b136d03e642b18b4a4e36a1dabbJosé Fonseca /* The shuffle vector is always made of int32 elements */ 62638779e44555b3c3789638a95693357cf59f5595José Fonseca LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context); 639cf67e51b06b4b136d03e642b18b4a4e36a1dabbJosé Fonseca LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); 64638779e44555b3c3789638a95693357cf59f5595José Fonseca 65638779e44555b3c3789638a95693357cf59f5595José Fonseca assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar)); 66638779e44555b3c3789638a95693357cf59f5595José Fonseca 679cf67e51b06b4b136d03e642b18b4a4e36a1dabbJosé Fonseca res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), ""); 689cf67e51b06b4b136d03e642b18b4a4e36a1dabbJosé Fonseca res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), ""); 696f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca } 706f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca 716f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca return res; 726f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca} 736f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca 746f5cd15f80d612e165078dfe2126cf44cf509d91José Fonseca 752c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca/** 762c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Broadcast 772c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */ 786f5cd15f80d612e165078dfe2126cf44cf509d91José FonsecaLLVMValueRef 794393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonsecalp_build_broadcast_scalar(struct lp_build_context *bld, 804393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca LLVMValueRef scalar) 814393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca{ 82638779e44555b3c3789638a95693357cf59f5595José Fonseca assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar))); 83ff7542ab90154769930c80d58597cfec40844bd9José Fonseca 84638779e44555b3c3789638a95693357cf59f5595José Fonseca return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar); 854393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca} 864393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca 874393ca7956bd03e07b4d7a019705fc62d4ac7155José Fonseca 8834ea50f6720d6aca970613da560a1f25f5b5772cJosé Fonseca/** 893469715a8a171512cf9b528702e70393f01c6041José Fonseca * Combined extract and broadcast (mere shuffle in most cases) 903fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca */ 913fde8167a5d9c1e845053ae4e6a9cd49628adc71José FonsecaLLVMValueRef 92efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_extract_broadcast(struct gallivm_state *gallivm, 933fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca struct lp_type src_type, 943fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca struct lp_type dst_type, 953fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca LLVMValueRef vector, 963fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca LLVMValueRef index) 973fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca{ 98efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 993fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca LLVMValueRef res; 1003fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 1013fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca assert(src_type.floating == dst_type.floating); 1023fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca assert(src_type.width == dst_type.width); 1033fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 1043fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca assert(lp_check_value(src_type, vector)); 1053fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca assert(LLVMTypeOf(index) == i32t); 1063fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 1073fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca if (src_type.length == 1) { 1083fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca if (dst_type.length == 1) { 1093fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca /* 1103fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca * Trivial scalar -> scalar. 1113fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca */ 1123fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 1133fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca res = vector; 1143fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca } 1153fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca else { 1163fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca /* 1173fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca * Broadcast scalar -> vector. 1183fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca */ 1193fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 120efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul res = lp_build_broadcast(gallivm, 121efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_vec_type(gallivm, dst_type), 1223fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca vector); 1233fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca } 1243fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca } 1253fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca else { 1263469715a8a171512cf9b528702e70393f01c6041José Fonseca if (dst_type.length > 1) { 1273fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca /* 1283469715a8a171512cf9b528702e70393f01c6041José Fonseca * shuffle - result can be of different length. 1293fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca */ 1303fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 1313fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca LLVMValueRef shuffle; 132efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul shuffle = lp_build_broadcast(gallivm, 1333fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca LLVMVectorType(i32t, dst_type.length), 1343fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca index); 135efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul res = LLVMBuildShuffleVector(gallivm->builder, vector, 1363469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMGetUndef(lp_build_vec_type(gallivm, src_type)), 1373fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca shuffle, ""); 1383fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca } 1393fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca else { 1403469715a8a171512cf9b528702e70393f01c6041José Fonseca /* 1413469715a8a171512cf9b528702e70393f01c6041José Fonseca * Trivial extract scalar from vector. 1423469715a8a171512cf9b528702e70393f01c6041José Fonseca */ 1433469715a8a171512cf9b528702e70393f01c6041José Fonseca res = LLVMBuildExtractElement(gallivm->builder, vector, index, ""); 1443fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca } 1453fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca } 1463fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 1473fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca return res; 1483fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca} 1493fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 1503fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca 1513fde8167a5d9c1e845053ae4e6a9cd49628adc71José Fonseca/** 152fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * Swizzle one channel into other channels. 15334ea50f6720d6aca970613da560a1f25f5b5772cJosé Fonseca */ 1544393ca7956bd03e07b4d7a019705fc62d4ac7155José FonsecaLLVMValueRef 15534ea50f6720d6aca970613da560a1f25f5b5772cJosé Fonsecalp_build_swizzle_scalar_aos(struct lp_build_context *bld, 15634ea50f6720d6aca970613da560a1f25f5b5772cJosé Fonseca LLVMValueRef a, 157fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton unsigned channel, 158fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton unsigned num_channels) 1591dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca{ 1606299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul LLVMBuilderRef builder = bld->gallivm->builder; 161b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca const struct lp_type type = bld->type; 1621dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca const unsigned n = type.length; 1631dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca unsigned i, j; 1641dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 165fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1) 1661dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca return a; 1671dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 168fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton assert(num_channels == 2 || num_channels == 4); 169fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 17028e46458bca3065baf0424b20e5b72cb672069e6José Fonseca /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing 17128e46458bca3065baf0424b20e5b72cb672069e6José Fonseca * using shuffles here actually causes worst results. More investigation is 17228e46458bca3065baf0424b20e5b72cb672069e6José Fonseca * needed. */ 1732eddf91faf4abf0b3d7b81a0a486fd46d5acd1eaJosé Fonseca if (LLVMIsConstant(a) || 1742eddf91faf4abf0b3d7b81a0a486fd46d5acd1eaJosé Fonseca type.width >= 16) { 1751dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca /* 1761dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * Shuffle. 1771dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca */ 178efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 1791dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 1801dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 181fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton for(j = 0; j < n; j += num_channels) 182fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton for(i = 0; i < num_channels; ++i) 1831dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); 1841dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 1856299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); 1861dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca } 187fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton else if (num_channels == 2) { 188fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton /* 189fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * Bit mask and shifts 190fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * 191fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * XY XY .... XY <= input 192fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * 0Y 0Y .... 0Y 193fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * YY YY .... YY 194fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * YY YY .... YY <= output 195fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton */ 196fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton struct lp_type type2; 197fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton LLVMValueRef tmp = NULL; 198fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton int shift; 199fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 200fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton a = LLVMBuildAnd(builder, a, 201fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton lp_build_const_mask_aos(bld->gallivm, 202fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton type, 1 << channel, num_channels), ""); 203fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 204fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton type2 = type; 205fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton type2.floating = FALSE; 206fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton type2.width *= 2; 207fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton type2.length /= 2; 208fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 209fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), ""); 210fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 2112151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson /* 2122151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Vector element 0 is always channel X. 2132151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 2142151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 76 54 32 10 (array numbering) 2152151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Little endian reg in: YX YX YX YX 2162151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Little endian reg out: YY YY YY YY if shift right (shift == -1) 2172151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * XX XX XX XX if shift left (shift == 1) 2182151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 2192151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 01 23 45 67 (array numbering) 2202151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Big endian reg in: XY XY XY XY 2212151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Big endian reg out: YY YY YY YY if shift left (shift == 1) 2222151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * XX XX XX XX if shift right (shift == -1) 2232151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 2242151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson */ 225fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton#ifdef PIPE_ARCH_LITTLE_ENDIAN 226fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton shift = channel == 0 ? 1 : -1; 227fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton#else 228fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton shift = channel == 0 ? -1 : 1; 229fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton#endif 230fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 231fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton if (shift > 0) { 232fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), ""); 233fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton } else if (shift < 0) { 234fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), ""); 235fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton } 236fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 237fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton assert(tmp); 238fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton if (tmp) { 239fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton a = LLVMBuildOr(builder, a, tmp, ""); 240fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton } 241fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 242fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); 243fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton } 2441dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca else { 2451dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca /* 2461dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * Bit mask and recursive shifts 2471dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * 2482151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Little-endian registers: 2492151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 2502151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 7654 3210 2512151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * WZYX WZYX .... WZYX <= input 2522151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 00Y0 00Y0 .... 00Y0 <= mask 2532151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1) 2542151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2) 2552151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 2562151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Big-endian registers: 2572151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 2582151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 0123 4567 25928e46458bca3065baf0424b20e5b72cb672069e6José Fonseca * XYZW XYZW .... XYZW <= input 2602151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 0Y00 0Y00 .... 0Y00 <= mask 2612151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1) 2622151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2) 2632151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 2642151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * shifts[] gives little-endian shift amounts; we need to negate for big-endian. 2651dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca */ 266a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca struct lp_type type4; 267dd5c58081672bd495e0ffef1c3cc1229620f0f88Adhemerval Zanella const int shifts[4][2] = { 2681dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca { 1, 2}, 2691dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca {-1, 2}, 2701dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca { 1, -2}, 2711dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca {-1, -2} 2721dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca }; 2731dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca unsigned i; 2741dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 2756299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul a = LLVMBuildAnd(builder, a, 276efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_const_mask_aos(bld->gallivm, 2771d3789bccbbcc814fd7b339e9f5b5631e30d9f0eJames Benton type, 1 << channel, 4), ""); 2781dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 279a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca /* 280a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * Build a type where each element is an integer that cover the four 281a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * channels. 282a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca */ 283a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 284a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca type4 = type; 285a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca type4.floating = FALSE; 2861dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca type4.width *= 4; 2871dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca type4.length /= 4; 2881dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 2896299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 2901dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 2911dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca for(i = 0; i < 2; ++i) { 2921dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca LLVMValueRef tmp = NULL; 2931dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca int shift = shifts[channel][i]; 2941dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 2952151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson /* See endianness diagram above */ 2962151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson#ifdef PIPE_ARCH_BIG_ENDIAN 2971dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca shift = -shift; 2981dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca#endif 2991dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 3001dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca if(shift > 0) 3012151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 3021dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca if(shift < 0) 3032151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 3041dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 3051dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca assert(tmp); 3061dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca if(tmp) 3076299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul a = LLVMBuildOr(builder, a, tmp, ""); 3081dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca } 3091dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 3106299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); 3111dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca } 3121dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca} 3131dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 3141dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 315fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton/** 316fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * Swizzle a vector consisting of an array of XYZW structs. 317fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * 318fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * This fills a vector of dst_len length with the swizzled channels from src. 319fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * 320fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in 321fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * RGBA RGBA = BGR BGR BG 322fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * 323fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * @param swizzles the swizzle array 324fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * @param num_swizzles the number of elements in swizzles 325fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * @param dst_len the length of the result 326fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton */ 327fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames BentonLLVMValueRef 328fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Bentonlp_build_swizzle_aos_n(struct gallivm_state* gallivm, 329fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton LLVMValueRef src, 330fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton const unsigned char* swizzles, 331fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton unsigned num_swizzles, 332fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton unsigned dst_len) 333fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton{ 334fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton LLVMBuilderRef builder = gallivm->builder; 335fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH]; 336fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton unsigned i; 337fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 338fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton assert(dst_len < LP_MAX_VECTOR_WIDTH); 339fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 340fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton for (i = 0; i < dst_len; ++i) { 341fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton int swizzle = swizzles[i % num_swizzles]; 342fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 343fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton if (swizzle == LP_BLD_SWIZZLE_DONTCARE) { 344fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 345fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton } else { 346fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton shuffles[i] = lp_build_const_int32(gallivm, swizzle); 347fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton } 348fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton } 349fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 350fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), ""); 351fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton} 352fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 353fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 3541dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé FonsecaLLVMValueRef 355a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonsecalp_build_swizzle_aos(struct lp_build_context *bld, 356a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca LLVMValueRef a, 357a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca const unsigned char swizzles[4]) 3581dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca{ 3596299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul LLVMBuilderRef builder = bld->gallivm->builder; 360a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca const struct lp_type type = bld->type; 361a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca const unsigned n = type.length; 3621dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca unsigned i, j; 3631dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 364fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák if (swizzles[0] == PIPE_SWIZZLE_X && 365fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák swizzles[1] == PIPE_SWIZZLE_Y && 366fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák swizzles[2] == PIPE_SWIZZLE_Z && 367fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák swizzles[3] == PIPE_SWIZZLE_W) { 3681dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca return a; 369a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 3701dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 371a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca if (swizzles[0] == swizzles[1] && 372a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca swizzles[1] == swizzles[2] && 373a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca swizzles[2] == swizzles[3]) { 374a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca switch (swizzles[0]) { 375fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_X: 376fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_Y: 377fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_Z: 378fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_W: 379fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4); 380fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_0: 381a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca return bld->zero; 382fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_1: 383a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca return bld->one; 3843469715a8a171512cf9b528702e70393f01c6041José Fonseca case LP_BLD_SWIZZLE_DONTCARE: 3853469715a8a171512cf9b528702e70393f01c6041José Fonseca return bld->undef; 386a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca default: 387a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca assert(0); 388a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca return bld->undef; 389a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 390a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 3911dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 3922eddf91faf4abf0b3d7b81a0a486fd46d5acd1eaJosé Fonseca if (LLVMIsConstant(a) || 3932eddf91faf4abf0b3d7b81a0a486fd46d5acd1eaJosé Fonseca type.width >= 16) { 3941dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca /* 3951dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca * Shuffle. 3961dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca */ 397efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); 398efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); 3991dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 400a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; 401a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 402a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca memset(aux, 0, sizeof aux); 403a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 404a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca for(j = 0; j < n; j += 4) { 405a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca for(i = 0; i < 4; ++i) { 406a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca unsigned shuffle; 407a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca switch (swizzles[i]) { 408a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca default: 409a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca assert(0); 410a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca /* fall through */ 411fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_X: 412fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_Y: 413fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_Z: 414fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_W: 415a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca shuffle = j + swizzles[i]; 4163469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 417a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca break; 418fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_0: 419a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca shuffle = type.length + 0; 4203469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 421a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca if (!aux[0]) { 422efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); 423a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 424a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca break; 425fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_1: 426a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca shuffle = type.length + 1; 4273469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 428a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca if (!aux[1]) { 429efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); 430a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 431a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca break; 4323469715a8a171512cf9b528702e70393f01c6041José Fonseca case LP_BLD_SWIZZLE_DONTCARE: 4333469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[j + i] = LLVMGetUndef(i32t); 4343469715a8a171512cf9b528702e70393f01c6041José Fonseca break; 435a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 436a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 437a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 4381dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 439a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca for (i = 0; i < n; ++i) { 440a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca if (!aux[i]) { 441a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca aux[i] = undef; 442a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 443a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 4441dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 4456299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul return LLVMBuildShuffleVector(builder, a, 446a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca LLVMConstVector(aux, n), 447a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca LLVMConstVector(shuffles, n), ""); 448a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } else { 449a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca /* 450a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * Bit mask and shifts. 451a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * 452a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * For example, this will convert BGRA to RGBA by doing 453a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * 454e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella * Little endian: 455a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * rgba = (bgra & 0x00ff0000) >> 16 456a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * | (bgra & 0xff00ff00) 457a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * | (bgra & 0x000000ff) << 16 458a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * 459e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella * Big endian:A 460e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella * rgba = (bgra & 0x0000ff00) << 16 461e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella * | (bgra & 0x00ff00ff) 462e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella * | (bgra & 0xff000000) >> 16 463e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella * 464a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * This is necessary not only for faster cause, but because X86 backend 465a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * will refuse shuffles of <4 x i8> vectors 466a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca */ 467a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca LLVMValueRef res; 468a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca struct lp_type type4; 4696ed726b8fc6210a41fe325591e1428d19f419108José Fonseca unsigned cond = 0; 470ace70aedcf8b29380a17f68a994b18f60976bca6Jan Vesely int chan; 471a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca int shift; 472a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 473a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca /* 474a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * Start with a mixture of 1 and 0. 475a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca */ 476a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca for (chan = 0; chan < 4; ++chan) { 477fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák if (swizzles[chan] == PIPE_SWIZZLE_1) { 4786ed726b8fc6210a41fe325591e1428d19f419108José Fonseca cond |= 1 << chan; 4796ed726b8fc6210a41fe325591e1428d19f419108José Fonseca } 480a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 481fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4); 482a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 483a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca /* 484a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * Build a type where each element is an integer that cover the four 485a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * channels. 486a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca */ 487a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca type4 = type; 488a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca type4.floating = FALSE; 489a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca type4.width *= 4; 490a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca type4.length /= 4; 491a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 4926299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 4936299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), ""); 494a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 495a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca /* 496a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca * Mask and shift the channels, trying to group as many channels in the 4972151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * same shift as possible. The shift amount is positive for shifts left 4982151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * and negative for shifts right. 499a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca */ 500a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca for (shift = -3; shift <= 3; ++shift) { 501369b5a311ca5e03bc4cccc3052800b94e316087dBrian Paul uint64_t mask = 0; 502a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 503a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca assert(type4.width <= sizeof(mask)*8); 504a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 5052151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson /* 5062151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Vector element numbers follow the XYZW order, so 0 is always X, etc. 5072151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * After widening 4 times we have: 5082151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 5092151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 3210 5102151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Little-endian register layout: WZYX 5112151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 5122151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 0123 5132151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * Big-endian register layout: XYZW 5142151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * 5152151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * For little-endian, higher-numbered channels are obtained by a shift right 5162151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * (negative shift amount) and lower-numbered channels by a shift left 5172151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson * (positive shift amount). The opposite is true for big-endian. 5182151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson */ 519a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca for (chan = 0; chan < 4; ++chan) { 5202151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson if (swizzles[chan] < 4) { 5212151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson /* We need to move channel swizzles[chan] into channel chan */ 522e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella#ifdef PIPE_ARCH_LITTLE_ENDIAN 5232151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson if (swizzles[chan] - chan == -shift) { 5242151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); 5252151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson } 526e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella#else 5272151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson if (swizzles[chan] - chan == shift) { 5282151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width); 5292151d893fbd4a4be092098170e2fbca8c35797a5Adam Jackson } 530e25abacc1883f1b2e09c32230e35ffae7df5e61bAdhemerval Zanella#endif 531a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 532a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 533a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 534a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca if (mask) { 535a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca LLVMValueRef masked; 536a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca LLVMValueRef shifted; 537a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca if (0) 538369b5a311ca5e03bc4cccc3052800b94e316087dBrian Paul debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask); 539a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 5406299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul masked = LLVMBuildAnd(builder, a, 541efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_const_int_vec(bld->gallivm, type4, mask), ""); 542a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca if (shift > 0) { 5436299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul shifted = LLVMBuildShl(builder, masked, 544efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 545a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } else if (shift < 0) { 5466299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul shifted = LLVMBuildLShr(builder, masked, 547efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 548a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } else { 549a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca shifted = masked; 550a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 551a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 5526299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul res = LLVMBuildOr(builder, res, shifted, ""); 553a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 554a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca } 555a70ec096aaece3aaadc1a8307e32554f7ad4d082José Fonseca 5566299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul return LLVMBuildBitCast(builder, res, 557efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul lp_build_vec_type(bld->gallivm, type), ""); 5581dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca } 5591dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca} 5601dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 5611dd7bb17c7331f9ecd0bc830b61ada235a56fe6dJosé Fonseca 5622c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca/** 5632c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Extended swizzle of a single channel of a SoA vector. 5642c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * 5652c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param bld building context 5662c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param unswizzled array with the 4 unswizzled values 5672c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param swizzle one of the PIPE_SWIZZLE_* 5682c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * 5692c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @return the swizzled value. 5702c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */ 5712c2debaea71eb99322c2371f1c581e9748cda91fJosé FonsecaLLVMValueRef 5722c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecalp_build_swizzle_soa_channel(struct lp_build_context *bld, 5732c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca const LLVMValueRef *unswizzled, 5742c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca unsigned swizzle) 5752c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca{ 5762c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca switch (swizzle) { 577fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_X: 578fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_Y: 579fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_Z: 580fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_W: 5812c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca return unswizzled[swizzle]; 582fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_0: 5832c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca return bld->zero; 584fb523cb6ad3ffef22ab4b9cce9e53859c17c5739Marek Olšák case PIPE_SWIZZLE_1: 5852c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca return bld->one; 5862c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca default: 5872c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca assert(0); 5882c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca return bld->undef; 5892c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca } 5902c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca} 5912c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 5922c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 5932c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca/** 5942c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Extended swizzle of a SoA vector. 5952c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * 5962c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param bld building context 5972c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param unswizzled array with the 4 unswizzled values 5982c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param swizzles array of PIPE_SWIZZLE_* 5992c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param swizzled output swizzled values 6002c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */ 6012c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecavoid 6022c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecalp_build_swizzle_soa(struct lp_build_context *bld, 6032c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca const LLVMValueRef *unswizzled, 6042c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca const unsigned char swizzles[4], 6052c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca LLVMValueRef *swizzled) 6062c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca{ 6072c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca unsigned chan; 6082c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 6092c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca for (chan = 0; chan < 4; ++chan) { 6102c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, 6112c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca swizzles[chan]); 6122c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca } 6132c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca} 6142c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 6152c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 6162c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca/** 6172c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * Do an extended swizzle of a SoA vector inplace. 6182c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * 6192c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param bld building context 6202c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param values intput/output array with the 4 values 6212c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca * @param swizzles array of PIPE_SWIZZLE_* 6222c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca */ 6232c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecavoid 6242c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonsecalp_build_swizzle_soa_inplace(struct lp_build_context *bld, 6252c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca LLVMValueRef *values, 6262c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca const unsigned char swizzles[4]) 6272c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca{ 6282c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca LLVMValueRef unswizzled[4]; 6292c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca unsigned chan; 6302c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 6312c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca for (chan = 0; chan < 4; ++chan) { 6322c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca unswizzled[chan] = values[chan]; 6332c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca } 6342c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca 6352c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca lp_build_swizzle_soa(bld, unswizzled, swizzles, values); 6362c2debaea71eb99322c2371f1c581e9748cda91fJosé Fonseca} 6373469715a8a171512cf9b528702e70393f01c6041José Fonseca 6383469715a8a171512cf9b528702e70393f01c6041José Fonseca 6393469715a8a171512cf9b528702e70393f01c6041José Fonseca/** 6403469715a8a171512cf9b528702e70393f01c6041José Fonseca * Transpose from AOS <-> SOA 6413469715a8a171512cf9b528702e70393f01c6041José Fonseca * 6423469715a8a171512cf9b528702e70393f01c6041José Fonseca * @param single_type_lp type of pixels 6433469715a8a171512cf9b528702e70393f01c6041José Fonseca * @param src the 4 * n pixel input 6443469715a8a171512cf9b528702e70393f01c6041José Fonseca * @param dst the 4 * n pixel output 6453469715a8a171512cf9b528702e70393f01c6041José Fonseca */ 6463469715a8a171512cf9b528702e70393f01c6041José Fonsecavoid 6473469715a8a171512cf9b528702e70393f01c6041José Fonsecalp_build_transpose_aos(struct gallivm_state *gallivm, 6483469715a8a171512cf9b528702e70393f01c6041José Fonseca struct lp_type single_type_lp, 6493469715a8a171512cf9b528702e70393f01c6041José Fonseca const LLVMValueRef src[4], 6503469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMValueRef dst[4]) 6513469715a8a171512cf9b528702e70393f01c6041José Fonseca{ 6523469715a8a171512cf9b528702e70393f01c6041José Fonseca struct lp_type double_type_lp = single_type_lp; 6533469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMTypeRef single_type; 6543469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMTypeRef double_type; 6553469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMValueRef t0, t1, t2, t3; 6563469715a8a171512cf9b528702e70393f01c6041José Fonseca 6573469715a8a171512cf9b528702e70393f01c6041José Fonseca double_type_lp.length >>= 1; 6583469715a8a171512cf9b528702e70393f01c6041José Fonseca double_type_lp.width <<= 1; 6593469715a8a171512cf9b528702e70393f01c6041José Fonseca 6603469715a8a171512cf9b528702e70393f01c6041José Fonseca double_type = lp_build_vec_type(gallivm, double_type_lp); 6613469715a8a171512cf9b528702e70393f01c6041José Fonseca single_type = lp_build_vec_type(gallivm, single_type_lp); 6623469715a8a171512cf9b528702e70393f01c6041José Fonseca 6633469715a8a171512cf9b528702e70393f01c6041José Fonseca /* Interleave x, y, z, w -> xy and zw */ 6643469715a8a171512cf9b528702e70393f01c6041José Fonseca t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0); 6653469715a8a171512cf9b528702e70393f01c6041José Fonseca t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0); 6663469715a8a171512cf9b528702e70393f01c6041José Fonseca t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1); 6673469715a8a171512cf9b528702e70393f01c6041José Fonseca t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1); 6683469715a8a171512cf9b528702e70393f01c6041José Fonseca 6693469715a8a171512cf9b528702e70393f01c6041José Fonseca /* Cast to double width type for second interleave */ 6703469715a8a171512cf9b528702e70393f01c6041José Fonseca t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0"); 6713469715a8a171512cf9b528702e70393f01c6041José Fonseca t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1"); 6723469715a8a171512cf9b528702e70393f01c6041José Fonseca t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2"); 6733469715a8a171512cf9b528702e70393f01c6041José Fonseca t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3"); 6743469715a8a171512cf9b528702e70393f01c6041José Fonseca 6753469715a8a171512cf9b528702e70393f01c6041José Fonseca /* Interleave xy, zw -> xyzw */ 6763469715a8a171512cf9b528702e70393f01c6041José Fonseca dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0); 6773469715a8a171512cf9b528702e70393f01c6041José Fonseca dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1); 6783469715a8a171512cf9b528702e70393f01c6041José Fonseca dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0); 6793469715a8a171512cf9b528702e70393f01c6041José Fonseca dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1); 6803469715a8a171512cf9b528702e70393f01c6041José Fonseca 6813469715a8a171512cf9b528702e70393f01c6041José Fonseca /* Cast back to original single width type */ 6823469715a8a171512cf9b528702e70393f01c6041José Fonseca dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0"); 6833469715a8a171512cf9b528702e70393f01c6041José Fonseca dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1"); 6843469715a8a171512cf9b528702e70393f01c6041José Fonseca dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2"); 6853469715a8a171512cf9b528702e70393f01c6041José Fonseca dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3"); 6863469715a8a171512cf9b528702e70393f01c6041José Fonseca} 6873469715a8a171512cf9b528702e70393f01c6041José Fonseca 6883469715a8a171512cf9b528702e70393f01c6041José Fonseca 6893469715a8a171512cf9b528702e70393f01c6041José Fonseca/** 690fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton * Transpose from AOS <-> SOA for num_srcs 691fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton */ 692fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Bentonvoid 693fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Bentonlp_build_transpose_aos_n(struct gallivm_state *gallivm, 694fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton struct lp_type type, 695fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton const LLVMValueRef* src, 696fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton unsigned num_srcs, 697fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton LLVMValueRef* dst) 698fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton{ 699fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton switch (num_srcs) { 700fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton case 1: 701fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton dst[0] = src[0]; 702fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton break; 703fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 704fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton case 2: 705fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton { 706fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton /* Note: we must use a temporary incase src == dst */ 707fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton LLVMValueRef lo, hi; 708fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 709fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0); 710fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1); 711fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 712fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton dst[0] = lo; 713fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton dst[1] = hi; 714fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton break; 715fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton } 716fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 717fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton case 4: 718fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton lp_build_transpose_aos(gallivm, type, src, dst); 719fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton break; 720fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 721fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton default: 722fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton assert(0); 72367d4b4b28c358845f0c0b9f6cacd5e611c746313Edward O'Callaghan } 724fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton} 725fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 726fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton 727fa1b481c09b14e01eca1b3db8e0854033f6dee3dJames Benton/** 7280b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger * Pack n-th element of aos values, 7293469715a8a171512cf9b528702e70393f01c6041José Fonseca * pad out to destination size. 7300b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _ 7313469715a8a171512cf9b528702e70393f01c6041José Fonseca */ 7323469715a8a171512cf9b528702e70393f01c6041José FonsecaLLVMValueRef 7333469715a8a171512cf9b528702e70393f01c6041José Fonsecalp_build_pack_aos_scalars(struct gallivm_state *gallivm, 7343469715a8a171512cf9b528702e70393f01c6041José Fonseca struct lp_type src_type, 7353469715a8a171512cf9b528702e70393f01c6041José Fonseca struct lp_type dst_type, 7360b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger const LLVMValueRef src, 7370b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger unsigned channel) 7383469715a8a171512cf9b528702e70393f01c6041José Fonseca{ 7393469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 7403469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMValueRef undef = LLVMGetUndef(i32t); 7413469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 7423469715a8a171512cf9b528702e70393f01c6041José Fonseca unsigned num_src = src_type.length / 4; 7433469715a8a171512cf9b528702e70393f01c6041José Fonseca unsigned num_dst = dst_type.length; 7443469715a8a171512cf9b528702e70393f01c6041José Fonseca unsigned i; 7453469715a8a171512cf9b528702e70393f01c6041José Fonseca 7463469715a8a171512cf9b528702e70393f01c6041José Fonseca assert(num_src <= num_dst); 7473469715a8a171512cf9b528702e70393f01c6041José Fonseca 7483469715a8a171512cf9b528702e70393f01c6041José Fonseca for (i = 0; i < num_src; i++) { 7490b6554ba6f2aa8a771852566340c24205e406d02Roland Scheidegger shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0); 7503469715a8a171512cf9b528702e70393f01c6041José Fonseca } 7513469715a8a171512cf9b528702e70393f01c6041José Fonseca for (i = num_src; i < num_dst; i++) { 7523469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[i] = undef; 7533469715a8a171512cf9b528702e70393f01c6041José Fonseca } 7543469715a8a171512cf9b528702e70393f01c6041José Fonseca 7553469715a8a171512cf9b528702e70393f01c6041José Fonseca if (num_dst == 1) { 7563469715a8a171512cf9b528702e70393f01c6041José Fonseca return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], ""); 7573469715a8a171512cf9b528702e70393f01c6041José Fonseca } 7583469715a8a171512cf9b528702e70393f01c6041José Fonseca else { 7593469715a8a171512cf9b528702e70393f01c6041José Fonseca return LLVMBuildShuffleVector(gallivm->builder, src, src, 7603469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMConstVector(shuffles, num_dst), ""); 7613469715a8a171512cf9b528702e70393f01c6041José Fonseca } 7623469715a8a171512cf9b528702e70393f01c6041José Fonseca} 7633469715a8a171512cf9b528702e70393f01c6041José Fonseca 7643469715a8a171512cf9b528702e70393f01c6041José Fonseca 7653469715a8a171512cf9b528702e70393f01c6041José Fonseca/** 7663469715a8a171512cf9b528702e70393f01c6041José Fonseca * Unpack and broadcast packed aos values consisting of only the 7673469715a8a171512cf9b528702e70393f01c6041José Fonseca * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2 7683469715a8a171512cf9b528702e70393f01c6041José Fonseca */ 7693469715a8a171512cf9b528702e70393f01c6041José FonsecaLLVMValueRef 7703469715a8a171512cf9b528702e70393f01c6041José Fonsecalp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm, 7713469715a8a171512cf9b528702e70393f01c6041José Fonseca struct lp_type src_type, 7723469715a8a171512cf9b528702e70393f01c6041José Fonseca struct lp_type dst_type, 7733469715a8a171512cf9b528702e70393f01c6041José Fonseca const LLVMValueRef src) 7743469715a8a171512cf9b528702e70393f01c6041José Fonseca{ 7753469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 7763469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 7773469715a8a171512cf9b528702e70393f01c6041José Fonseca unsigned num_dst = dst_type.length; 7783469715a8a171512cf9b528702e70393f01c6041José Fonseca unsigned num_src = dst_type.length / 4; 7793469715a8a171512cf9b528702e70393f01c6041José Fonseca unsigned i; 7803469715a8a171512cf9b528702e70393f01c6041José Fonseca 7813469715a8a171512cf9b528702e70393f01c6041José Fonseca assert(num_dst / 4 <= src_type.length); 7823469715a8a171512cf9b528702e70393f01c6041José Fonseca 7833469715a8a171512cf9b528702e70393f01c6041José Fonseca for (i = 0; i < num_src; i++) { 7843469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[i*4] = LLVMConstInt(i32t, i, 0); 7853469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[i*4+1] = LLVMConstInt(i32t, i, 0); 7863469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[i*4+2] = LLVMConstInt(i32t, i, 0); 7873469715a8a171512cf9b528702e70393f01c6041José Fonseca shuffles[i*4+3] = LLVMConstInt(i32t, i, 0); 7883469715a8a171512cf9b528702e70393f01c6041José Fonseca } 7893469715a8a171512cf9b528702e70393f01c6041José Fonseca 7903469715a8a171512cf9b528702e70393f01c6041José Fonseca if (num_src == 1) { 7913469715a8a171512cf9b528702e70393f01c6041José Fonseca return lp_build_extract_broadcast(gallivm, src_type, dst_type, 7923469715a8a171512cf9b528702e70393f01c6041José Fonseca src, shuffles[0]); 7933469715a8a171512cf9b528702e70393f01c6041José Fonseca } 7943469715a8a171512cf9b528702e70393f01c6041José Fonseca else { 7953469715a8a171512cf9b528702e70393f01c6041José Fonseca return LLVMBuildShuffleVector(gallivm->builder, src, src, 7963469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMConstVector(shuffles, num_dst), ""); 7973469715a8a171512cf9b528702e70393f01c6041José Fonseca } 7983469715a8a171512cf9b528702e70393f01c6041José Fonseca} 7993469715a8a171512cf9b528702e70393f01c6041José Fonseca 800