lp_bld_logic.c revision 2a45972fb2ba12a6561e5cba84d167f4c30566d4
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/************************************************************************** 21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc. 41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved. 51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the 81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including 91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish, 101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to 111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to 121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions: 131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the 151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions 161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software. 171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/ 271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/** 295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file 305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations. 315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * 325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com> 335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */ 345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca 351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h" 3788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca#include "util/u_memory.h" 38854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h" 397cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca 401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h" 4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h" 421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h" 431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h" 441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 46a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/* 47a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX 48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like 50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * select <4 x i1> %C, %A, %B 52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not 54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * supported on any backend. 55a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 56a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in 57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * sext <4 x i1> %C to <4 x i32> 59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on 62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7. 63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */ 64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 65a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 66e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/** 672297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func. 682297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 69877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 70e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */ 711aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef 722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder, 732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul const struct lp_type type, 742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{ 781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef zeros = LLVMConstNull(int_vec_type); 801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef cond; 82241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMValueRef res; 831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 84e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func >= PIPE_FUNC_NEVER); 85e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func <= PIPE_FUNC_ALWAYS); 86e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul 871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_NEVER) 881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return zeros; 891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_ALWAYS) 901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return ones; 911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* TODO: optimize the constant case */ 931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* XXX: It is not clear if we should use the ordered or unordered operators */ 951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 96489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207 971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.width * type.length == 128) { 997cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca if(type.floating && util_cpu_caps.has_sse) { 100e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* float[4] comparison */ 1017b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef args[3]; 1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca unsigned cc; 1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca boolean swap; 1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = FALSE; 1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 0; 1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 4; 1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 1302297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(swap) { 1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = b; 1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = a; 1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = a; 1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = b; 1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 1432297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, 1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca "llvm.x86.sse.cmp.ps", 1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca vec_type, 1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args, 3); 1472297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return res; 1491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1507cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca else if(util_cpu_caps.has_sse2) { 151e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* int[4] comparison */ 152d07b0383660cd318ba43abad11bb80de4a124951José Fonseca static const struct { 153d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned swap:1; 154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned eq:1; 155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned gt:1; 156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned not:1; 157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } table[] = { 158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 162d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 166d07b0383660cd318ba43abad11bb80de4a124951José Fonseca }; 167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpeq; 168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpgt; 169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef args[2]; 170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef res; 1717b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca switch (type.width) { 174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 8: 175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 16: 179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 32: 183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca default: 187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca assert(0); 1882297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 190d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 191b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca /* There are no unsigned comparison instructions. So flip the sign bit 192b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca * so that the results match. 193d07b0383660cd318ba43abad11bb80de4a124951José Fonseca */ 194b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca if (table[func].gt && !type.sign) { 195185be3a87a5b38e8821a560c073975c11dcbd3e9Brian Paul LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); 1962297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul a = LLVMBuildXor(builder, a, msb, ""); 1972297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul b = LLVMBuildXor(builder, b, msb, ""); 198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].swap) { 201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = b; 202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = a; 203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else { 205d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = a; 206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = b; 207d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 208d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 209d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].eq) 2102297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 211d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else if (table[func].gt) 2122297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 213d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else 214d07b0383660cd318ba43abad11bb80de4a124951José Fonseca res = LLVMConstNull(vec_type); 215d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 216d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].not) 2172297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildNot(builder, res, ""); 218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 219d07b0383660cd318ba43abad11bb80de4a124951José Fonseca return res; 220d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 2212ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } /* if (type.width * type.length == 128) */ 2221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif 223a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */ 2241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 2251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.floating) { 2261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMRealPredicate op; 2271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NEVER: 2291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateFalse; 2301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_ALWAYS: 2321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateTrue; 2331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUEQ; 2361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUNE; 2391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULT; 2421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULE; 2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGT; 2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGE; 2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 2542297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 256241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 257a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 2582297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildFCmp(builder, op, a, b, ""); 259a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 260241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 2612ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 262e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildFCmp(builder, op, a, b, ""); 263e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 2642ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 2652ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 2667b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 2677b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 268e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 269e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 2702ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul debug_printf("%s: warning: using slow element-wise float" 2712ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul " vector comparison\n", __FUNCTION__); 2722ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for (i = 0; i < type.length; ++i) { 2732ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 2742ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildFCmp(builder, op, 2752ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 2762ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 2772ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 2782ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 2792ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 2802ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 2812ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 2822ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 2832ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 284241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 285241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 2861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 2871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 2881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMIntPredicate op; 2891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntEQ; 2921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntNE; 2951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLT : LLVMIntULT; 2981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 3001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLE : LLVMIntULE; 3011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 3031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGT : LLVMIntUGT; 3041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGE : LLVMIntUGE; 3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 3091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 3102297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 3111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 312241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 313a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 3142297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildICmp(builder, op, a, b, ""); 315a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 316241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 3172ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 318e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildICmp(builder, op, a, b, ""); 319e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 3202ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 3212ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 3227b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 3237b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 324e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 325e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 3262ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul debug_printf("%s: warning: using slow element-wise int" 3272ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul " vector comparison\n", __FUNCTION__); 3282ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul 3292ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for(i = 0; i < type.length; ++i) { 3302ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 3312ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildICmp(builder, op, 3322ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 3332ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 3342ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3352ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 3362ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 3372ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 3382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3392ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 3402ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 341241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 342241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 3431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 3441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 345241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca return res; 3461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca} 34709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 34809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3492297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3502297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/** 3512297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func. 3522297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 353877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 3542297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */ 3552297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef 3562297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld, 3572297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 3582297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 3592297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 3602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{ 3612297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_compare(bld->builder, bld->type, func, a, b); 3622297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul} 3632297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3642297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 365877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/** 3662a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * Return (mask & a) | (~mask & b); 3672a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */ 3682a45972fb2ba12a6561e5cba84d167f4c30566d4José FonsecaLLVMValueRef 3692a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonsecalp_build_select_bitwise(struct lp_build_context *bld, 3702a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef mask, 3712a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef a, 3722a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef b) 3732a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca{ 3742a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca struct lp_type type = bld->type; 3752a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef res; 3762a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3772a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if (a == b) { 3782a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca return a; 3792a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 3802a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3812a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if(type.floating) { 3822a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 3832a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 3842a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 3852a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 3862a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3872a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca a = LLVMBuildAnd(bld->builder, a, mask, ""); 3882a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3892a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca /* This often gets translated to PANDN, but sometimes the NOT is 3902a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * pre-computed and stored in another constant. The best strategy depends 3912a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * on available registers, so it is not a big deal -- hopefully LLVM does 3922a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * the right decision attending the rest of the program. 3932a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */ 3942a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 3952a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3962a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = LLVMBuildOr(bld->builder, a, b, ""); 3972a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3982a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if(type.floating) { 3992a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMTypeRef vec_type = lp_build_vec_type(type); 4002a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 4012a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 4022a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4032a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca return res; 4042a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca} 4052a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4062a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4072a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca/** 408877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b; 409e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca * 4102a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 4112a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * will yield unpredictable results. 412877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */ 41309a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 41409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld, 41509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask, 41609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 41709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b) 41809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 419b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca struct lp_type type = bld->type; 42009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef res; 42109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 42209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 42309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 42409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 4252ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 426e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); 4272ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildSelect(bld->builder, mask, a, b, ""); 42809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 42988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca else if (util_cpu_caps.has_sse4_1 && 43088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca type.width * type.length == 128 && 43188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(a) && 43288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(b) && 43388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(mask)) { 43488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca const char *intrinsic; 43588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca LLVMTypeRef arg_type; 43688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca LLVMValueRef args[3]; 43788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 43888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (type.width == 64) { 43988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.blendvpd"; 44088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMDoubleType(), 2); 44188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } else if (type.width == 32) { 44288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.blendvps"; 44388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMFloatType(), 4); 44488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } else { 44588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.pblendvb"; 44688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMInt8Type(), 16); 44788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 44888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 44988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->int_vec_type) { 45088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca mask = LLVMBuildBitCast(bld->builder, mask, arg_type, ""); 45188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 45288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 45388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->vec_type) { 45488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca a = LLVMBuildBitCast(bld->builder, a, arg_type, ""); 45588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca b = LLVMBuildBitCast(bld->builder, b, arg_type, ""); 45688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 45788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 45888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[0] = b; 45988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[1] = a; 46088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[2] = mask; 46188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 46288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca res = lp_build_intrinsic(bld->builder, intrinsic, 46388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type, args, Elements(args)); 46488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 46588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->vec_type) { 46688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); 46788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 46888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 4692ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 4702a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = lp_build_select_bitwise(bld, mask, a, b); 47109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 47209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 47309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return res; 47409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 47509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 47609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 47709a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 47809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld, 47909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 48009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b, 4815e13e987da6ce656b08f6c25f8d373c80949e3b0José Fonseca const boolean cond[4]) 48209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 483b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca const struct lp_type type = bld->type; 48409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca const unsigned n = type.length; 48509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca unsigned i, j; 48609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 48709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 48809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 48909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(cond[0] && cond[1] && cond[2] && cond[3]) 49009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 49109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) 49209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return b; 49309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == bld->undef || b == bld->undef) 49409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return bld->undef; 49509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 49609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 49709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * There are three major ways of accomplishing this: 49809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a shuffle, 49909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a select, 50009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - or with a bit mask. 50109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * 50209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Select isn't supported for vector types yet. 50309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * The flip between these is empirical and might need to be. 50409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 50509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if (n <= 4) { 50609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 50709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Shuffle. 50809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 50909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt32Type(); 51009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 51109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 51209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 51309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 51409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); 51509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 51609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 51709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 5181fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca else { 51909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0 5201fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca /* XXX: Unfortunately select of vectors do not work */ 52109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* Use a select */ 52209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt1Type(); 52309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; 52409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 52509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 52609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 52709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); 52809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 52909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); 5301fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else 53109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask = lp_build_const_mask_aos(type, cond); 53209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return lp_build_select(bld, mask, a, b); 5331fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif 53409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 53509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 53685c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin 5372b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul 5382b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul/** Return (a & ~b) */ 5392b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian PaulLLVMValueRef 5402b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paullp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) 5412b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul{ 5422b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul b = LLVMBuildNot(bld->builder, b, ""); 5432b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul b = LLVMBuildAnd(bld->builder, a, b, ""); 5442b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul return b; 5452b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul} 546