lp_bld_logic.c revision b3d4e5bd26a44870af7d2413cca7a6f576a0984a
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/************************************************************************** 21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc. 41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved. 51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the 81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including 91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish, 101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to 111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to 121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions: 131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the 151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions 161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software. 171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/ 271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/** 295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file 305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations. 315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * 325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com> 335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */ 345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca 351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h" 37854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h" 387cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca 391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h" 4009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h" 411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h" 421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h" 431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 45a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/* 46a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX 47a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like 49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * select <4 x i1> %C, %A, %B 51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not 53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * supported on any backend. 54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 55a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in 56a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * sext <4 x i1> %C to <4 x i32> 58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on 61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7. 62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */ 63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 65e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/** 662297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func. 672297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 68877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 69e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */ 701aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef 712297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder, 722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul const struct lp_type type, 732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{ 771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef zeros = LLVMConstNull(int_vec_type); 791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef cond; 81241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMValueRef res; 821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 83e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func >= PIPE_FUNC_NEVER); 84e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func <= PIPE_FUNC_ALWAYS); 85e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul 861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_NEVER) 871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return zeros; 881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_ALWAYS) 891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return ones; 901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* TODO: optimize the constant case */ 921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* XXX: It is not clear if we should use the ordered or unordered operators */ 941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 95489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207 961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.width * type.length == 128) { 987cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca if(type.floating && util_cpu_caps.has_sse) { 99e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* float[4] comparison */ 1007b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef args[3]; 1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca unsigned cc; 1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca boolean swap; 1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = FALSE; 1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 0; 1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 4; 1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 1292297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(swap) { 1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = b; 1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = a; 1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = a; 1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = b; 1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 1422297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, 1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca "llvm.x86.sse.cmp.ps", 1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca vec_type, 1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args, 3); 1462297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 1471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return res; 1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1497cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca else if(util_cpu_caps.has_sse2) { 150e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* int[4] comparison */ 151d07b0383660cd318ba43abad11bb80de4a124951José Fonseca static const struct { 152d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned swap:1; 153d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned eq:1; 154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned gt:1; 155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned not:1; 156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } table[] = { 157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 162d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca }; 166d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpeq; 167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpgt; 168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef args[2]; 169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef res; 1707b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca switch (type.width) { 173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 8: 174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 16: 178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 32: 182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca default: 186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca assert(0); 1872297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 188d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 190b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca /* There are no unsigned comparison instructions. So flip the sign bit 191b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca * so that the results match. 192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca */ 193b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca if (table[func].gt && !type.sign) { 194185be3a87a5b38e8821a560c073975c11dcbd3e9Brian Paul LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); 1952297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul a = LLVMBuildXor(builder, a, msb, ""); 1962297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul b = LLVMBuildXor(builder, b, msb, ""); 197d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].swap) { 200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = b; 201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = a; 202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else { 204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = a; 205d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = b; 206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 207d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 208d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].eq) 2092297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 210d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else if (table[func].gt) 2112297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 212d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else 213d07b0383660cd318ba43abad11bb80de4a124951José Fonseca res = LLVMConstNull(vec_type); 214d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 215d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].not) 2162297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildNot(builder, res, ""); 217d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca return res; 219d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 2202ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } /* if (type.width * type.length == 128) */ 2211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif 222a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */ 2231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 2241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.floating) { 2251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMRealPredicate op; 2261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NEVER: 2281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateFalse; 2291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_ALWAYS: 2311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateTrue; 2321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUEQ; 2351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUNE; 2381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULT; 2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 2431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULE; 2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGT; 2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGE; 2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 2532297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 255241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 256a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 2572297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildFCmp(builder, op, a, b, ""); 258a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 259241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 2602ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 261e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildFCmp(builder, op, a, b, ""); 262e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 2632ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 2642ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 2657b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 2667b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 267e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 268e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 2692ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul debug_printf("%s: warning: using slow element-wise float" 2702ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul " vector comparison\n", __FUNCTION__); 2712ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for (i = 0; i < type.length; ++i) { 2722ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 2732ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildFCmp(builder, op, 2742ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 2752ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 2762ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 2772ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 2782ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 2792ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 2802ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 2812ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 2822ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 283241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 284241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 2851aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 2861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 2871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMIntPredicate op; 2881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntEQ; 2911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntNE; 2941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLT : LLVMIntULT; 2971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 2991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLE : LLVMIntULE; 3001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 3021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGT : LLVMIntUGT; 3031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGE : LLVMIntUGE; 3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 3092297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 3101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 311241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 312a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 3132297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildICmp(builder, op, a, b, ""); 314a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 315241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 3162ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 317e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildICmp(builder, op, a, b, ""); 318e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 3192ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 3202ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 3217b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 3227b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 323e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 324e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 3252ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul debug_printf("%s: warning: using slow element-wise int" 3262ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul " vector comparison\n", __FUNCTION__); 3272ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul 3282ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for(i = 0; i < type.length; ++i) { 3292ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 3302ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildICmp(builder, op, 3312ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 3322ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 3332ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3342ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 3352ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 3362ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 3372ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 3392ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 340241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 341241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 3421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 3431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 344241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca return res; 3451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca} 34609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 34709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3482297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3492297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/** 3502297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func. 3512297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 352877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 3532297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */ 3542297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef 3552297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld, 3562297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 3572297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 3582297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 3592297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{ 3602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_compare(bld->builder, bld->type, func, a, b); 3612297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul} 3622297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3632297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 364877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/** 365877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b; 366e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca * 367e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. 368877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */ 36909a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 37009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld, 37109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask, 37209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 37309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b) 37409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 375b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca struct lp_type type = bld->type; 37609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef res; 37709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 37809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 37909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 38009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3812ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 382e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); 3832ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildSelect(bld->builder, mask, a, b, ""); 38409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 3852ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 3862ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if(type.floating) { 3872ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 3882ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 3892ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 3902ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 39109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3922ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul a = LLVMBuildAnd(bld->builder, a, mask, ""); 39309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3942ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul /* This often gets translated to PANDN, but sometimes the NOT is 3952ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul * pre-computed and stored in another constant. The best strategy depends 3962ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul * on available registers, so it is not a big deal -- hopefully LLVM does 3972ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul * the right decision attending the rest of the program. 3982ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul */ 3992ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 40009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 4012ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildOr(bld->builder, a, b, ""); 40209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 4032ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if(type.floating) { 4042ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 4052ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 4062ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 40709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 40809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 40909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return res; 41009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 41109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 41209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 41309a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 41409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld, 41509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 41609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b, 4175e13e987da6ce656b08f6c25f8d373c80949e3b0José Fonseca const boolean cond[4]) 41809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 419b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca const struct lp_type type = bld->type; 42009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca const unsigned n = type.length; 42109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca unsigned i, j; 42209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 42309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 42409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 42509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(cond[0] && cond[1] && cond[2] && cond[3]) 42609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 42709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) 42809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return b; 42909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == bld->undef || b == bld->undef) 43009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return bld->undef; 43109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 43209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 43309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * There are three major ways of accomplishing this: 43409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a shuffle, 43509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a select, 43609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - or with a bit mask. 43709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * 43809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Select isn't supported for vector types yet. 43909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * The flip between these is empirical and might need to be. 44009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 44109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if (n <= 4) { 44209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 44309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Shuffle. 44409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 44509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt32Type(); 44609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 44709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 44809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 44909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 45009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); 45109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 45209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 45309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 4541fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca else { 45509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0 4561fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca /* XXX: Unfortunately select of vectors do not work */ 45709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* Use a select */ 45809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt1Type(); 45909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; 46009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 46109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 46209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 46309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); 46409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 46509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); 4661fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else 46709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask = lp_build_const_mask_aos(type, cond); 46809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return lp_build_select(bld, mask, a, b); 4691fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif 47009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 47109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 47285c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin 4732b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul 4742b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul/** Return (a & ~b) */ 4752b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian PaulLLVMValueRef 4762b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paullp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) 4772b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul{ 4782b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul b = LLVMBuildNot(bld->builder, b, ""); 4792b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul b = LLVMBuildAnd(bld->builder, a, b, ""); 4802b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul return b; 4812b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul} 482