lp_bld_logic.c revision 6ed726b8fc6210a41fe325591e1428d19f419108
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/************************************************************************** 21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc. 41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved. 51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the 81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including 91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish, 101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to 111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to 121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions: 131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the 151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions 161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software. 171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/ 271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/** 295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file 305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations. 315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * 325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com> 335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */ 345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca 351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h" 3788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca#include "util/u_memory.h" 38854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h" 397cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca 401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h" 4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h" 421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h" 431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h" 441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 46a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/* 47a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX 48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like 50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * select <4 x i1> %C, %A, %B 52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not 54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * supported on any backend. 55a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 56a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in 57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * sext <4 x i1> %C to <4 x i32> 59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on 62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7. 63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */ 64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 65a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 66e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/** 672297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func. 682297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 69877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 70e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */ 711aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef 722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder, 732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul const struct lp_type type, 742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{ 781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef zeros = LLVMConstNull(int_vec_type); 801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef cond; 82241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMValueRef res; 831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 84e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func >= PIPE_FUNC_NEVER); 85e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func <= PIPE_FUNC_ALWAYS); 86a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 87a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 88e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul 891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_NEVER) 901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return zeros; 911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_ALWAYS) 921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return ones; 931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* TODO: optimize the constant case */ 951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* XXX: It is not clear if we should use the ordered or unordered operators */ 971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 98489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207 991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 1001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.width * type.length == 128) { 1017cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca if(type.floating && util_cpu_caps.has_sse) { 102e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* float[4] comparison */ 1037b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef args[3]; 1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca unsigned cc; 1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca boolean swap; 1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = FALSE; 1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 0; 1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 4; 1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 1322297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(swap) { 1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = b; 1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = a; 1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = a; 1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = b; 1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 1452297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, 1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca "llvm.x86.sse.cmp.ps", 1471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca vec_type, 1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args, 3); 1492297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 1501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return res; 1511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1527cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca else if(util_cpu_caps.has_sse2) { 153e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* int[4] comparison */ 154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca static const struct { 155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned swap:1; 156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned eq:1; 157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned gt:1; 158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned not:1; 159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } table[] = { 160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 162d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 166d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca }; 169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpeq; 170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpgt; 171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef args[2]; 172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef res; 1737b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca switch (type.width) { 176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 8: 177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 16: 181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 32: 185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 188d07b0383660cd318ba43abad11bb80de4a124951José Fonseca default: 189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca assert(0); 1902297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 191d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 193b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca /* There are no unsigned comparison instructions. So flip the sign bit 194b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca * so that the results match. 195d07b0383660cd318ba43abad11bb80de4a124951José Fonseca */ 196b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca if (table[func].gt && !type.sign) { 197185be3a87a5b38e8821a560c073975c11dcbd3e9Brian Paul LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); 1982297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul a = LLVMBuildXor(builder, a, msb, ""); 1992297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul b = LLVMBuildXor(builder, b, msb, ""); 200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].swap) { 203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = b; 204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = a; 205d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else { 207d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = a; 208d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = b; 209d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 210d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 211d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].eq) 2122297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 213d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else if (table[func].gt) 2142297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 215d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else 216d07b0383660cd318ba43abad11bb80de4a124951José Fonseca res = LLVMConstNull(vec_type); 217d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].not) 2192297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildNot(builder, res, ""); 220d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 221d07b0383660cd318ba43abad11bb80de4a124951José Fonseca return res; 222d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 2232ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } /* if (type.width * type.length == 128) */ 2241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif 225a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */ 2261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 2271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.floating) { 2281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMRealPredicate op; 2291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NEVER: 2311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateFalse; 2321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_ALWAYS: 2341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateTrue; 2351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUEQ; 2381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUNE; 2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULT; 2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULE; 2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGT; 2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGE; 2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 2562297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 258241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 259a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 2602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildFCmp(builder, op, a, b, ""); 261a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 262241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 2632ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 264e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildFCmp(builder, op, a, b, ""); 265e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 2662ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 2672ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 2687b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 2697b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 270e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 271e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 2722ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul debug_printf("%s: warning: using slow element-wise float" 2732ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul " vector comparison\n", __FUNCTION__); 2742ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for (i = 0; i < type.length; ++i) { 2752ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 2762ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildFCmp(builder, op, 2772ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 2782ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 2792ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 2802ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 2812ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 2822ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 2832ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 2842ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 2852ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 286241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 287241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 2881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 2891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 2901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMIntPredicate op; 2911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntEQ; 2941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntNE; 2971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLT : LLVMIntULT; 3001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 3021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLE : LLVMIntULE; 3031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGT : LLVMIntUGT; 3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGE : LLVMIntUGE; 3091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 3111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 3122297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 3131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 314241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 315a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 3162297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildICmp(builder, op, a, b, ""); 317a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 318241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 3192ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 320e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildICmp(builder, op, a, b, ""); 321e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 3222ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 3232ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 3247b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 3257b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 326e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 327e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 3282ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul debug_printf("%s: warning: using slow element-wise int" 3292ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul " vector comparison\n", __FUNCTION__); 3302ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul 3312ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for(i = 0; i < type.length; ++i) { 3322ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 3332ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildICmp(builder, op, 3342ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 3352ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 3362ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3372ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 3382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 3392ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 3402ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3412ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 3422ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 343241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 344241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 3451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 3461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 347241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca return res; 3481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca} 34909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 35009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3512297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3522297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/** 3532297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func. 3542297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 355877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 3562297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */ 3572297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef 3582297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld, 3592297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 3602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 3612297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 3622297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{ 3632297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_compare(bld->builder, bld->type, func, a, b); 3642297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul} 3652297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3662297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 367877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/** 3682a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * Return (mask & a) | (~mask & b); 3692a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */ 3702a45972fb2ba12a6561e5cba84d167f4c30566d4José FonsecaLLVMValueRef 3712a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonsecalp_build_select_bitwise(struct lp_build_context *bld, 3722a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef mask, 3732a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef a, 3742a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef b) 3752a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca{ 3762a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca struct lp_type type = bld->type; 3772a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef res; 3782a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 379a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 380a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 381a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 3822a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if (a == b) { 3832a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca return a; 3842a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 3852a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3862a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if(type.floating) { 3872a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 3882a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 3892a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 3902a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 3912a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3922a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca a = LLVMBuildAnd(bld->builder, a, mask, ""); 3932a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 3942a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca /* This often gets translated to PANDN, but sometimes the NOT is 3952a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * pre-computed and stored in another constant. The best strategy depends 3962a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * on available registers, so it is not a big deal -- hopefully LLVM does 3972a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * the right decision attending the rest of the program. 3982a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */ 3992a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 4002a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4012a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = LLVMBuildOr(bld->builder, a, b, ""); 4022a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4032a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if(type.floating) { 4042a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMTypeRef vec_type = lp_build_vec_type(type); 4052a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 4062a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 4072a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4082a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca return res; 4092a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca} 4102a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4112a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4122a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca/** 413877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b; 414e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca * 4152a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 4162a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * will yield unpredictable results. 417877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */ 41809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 41909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld, 42009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask, 42109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 42209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b) 42309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 424b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca struct lp_type type = bld->type; 42509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef res; 42609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 427a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 428a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 429a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 43009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 43109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 43209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 4332ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 434e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); 4352ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildSelect(bld->builder, mask, a, b, ""); 43609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 43788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca else if (util_cpu_caps.has_sse4_1 && 43888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca type.width * type.length == 128 && 43988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(a) && 44088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(b) && 44188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(mask)) { 44288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca const char *intrinsic; 44388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca LLVMTypeRef arg_type; 44488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca LLVMValueRef args[3]; 44588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 44688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (type.width == 64) { 44788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.blendvpd"; 44888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMDoubleType(), 2); 44988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } else if (type.width == 32) { 45088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.blendvps"; 45188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMFloatType(), 4); 45288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } else { 45388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.pblendvb"; 45488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMInt8Type(), 16); 45588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 45688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 45788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->int_vec_type) { 45888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca mask = LLVMBuildBitCast(bld->builder, mask, arg_type, ""); 45988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 46088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 46188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->vec_type) { 46288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca a = LLVMBuildBitCast(bld->builder, a, arg_type, ""); 46388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca b = LLVMBuildBitCast(bld->builder, b, arg_type, ""); 46488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 46588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 46688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[0] = b; 46788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[1] = a; 46888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[2] = mask; 46988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 47088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca res = lp_build_intrinsic(bld->builder, intrinsic, 47188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type, args, Elements(args)); 47288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 47388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->vec_type) { 47488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); 47588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 47688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 4772ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 4782a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = lp_build_select_bitwise(bld, mask, a, b); 47909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 48009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 48109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return res; 48209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 48309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 48409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 4856ed726b8fc6210a41fe325591e1428d19f419108José Fonseca/** 4866ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * Return mask ? a : b; 4876ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * 4886ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * mask is a TGSI_WRITEMASK_xxx. 4896ed726b8fc6210a41fe325591e1428d19f419108José Fonseca */ 49009a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 49109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld, 4926ed726b8fc6210a41fe325591e1428d19f419108José Fonseca unsigned mask, 49309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 4946ed726b8fc6210a41fe325591e1428d19f419108José Fonseca LLVMValueRef b) 49509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 496b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca const struct lp_type type = bld->type; 49709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca const unsigned n = type.length; 49809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca unsigned i, j; 49909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 5006ed726b8fc6210a41fe325591e1428d19f419108José Fonseca assert((mask & ~0xf) == 0); 501a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 502a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 503a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 50409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 50509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 5066ed726b8fc6210a41fe325591e1428d19f419108José Fonseca if((mask & 0xf) == 0xf) 50709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 5086ed726b8fc6210a41fe325591e1428d19f419108José Fonseca if((mask & 0xf) == 0x0) 50909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return b; 51009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == bld->undef || b == bld->undef) 51109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return bld->undef; 51209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 51309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 51409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * There are three major ways of accomplishing this: 51509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a shuffle, 51609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a select, 51709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - or with a bit mask. 51809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * 51909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Select isn't supported for vector types yet. 52009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * The flip between these is empirical and might need to be. 52109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 52209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if (n <= 4) { 52309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 52409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Shuffle. 52509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 52609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt32Type(); 52709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 52809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 52909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 53009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 5316ed726b8fc6210a41fe325591e1428d19f419108José Fonseca shuffles[j + i] = LLVMConstInt(elem_type, 5326ed726b8fc6210a41fe325591e1428d19f419108José Fonseca (mask & (1 << i) ? 0 : n) + j + i, 5336ed726b8fc6210a41fe325591e1428d19f419108José Fonseca 0); 53409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 53509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 53609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 5371fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca else { 53809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0 5391fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca /* XXX: Unfortunately select of vectors do not work */ 54009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* Use a select */ 54109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt1Type(); 5426ed726b8fc6210a41fe325591e1428d19f419108José Fonseca LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH]; 54309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 54409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 54509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 5466ed726b8fc6210a41fe325591e1428d19f419108José Fonseca cond_vec[j + i] = LLVMConstInt(elem_type, 5476ed726b8fc6210a41fe325591e1428d19f419108José Fonseca mask & (1 << i) ? 1 : 0, 0); 54809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 5496ed726b8fc6210a41fe325591e1428d19f419108José Fonseca return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, ""); 5501fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else 5516ed726b8fc6210a41fe325591e1428d19f419108José Fonseca LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask); 5526ed726b8fc6210a41fe325591e1428d19f419108José Fonseca return lp_build_select(bld, mask_vec, a, b); 5531fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif 55409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 55509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 55685c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin 5572b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul 5582b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul/** Return (a & ~b) */ 5592b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian PaulLLVMValueRef 5602b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paullp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) 5612b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul{ 56220b3e40f166c77bd7fa5b7171e5b4169ed035280nobled const struct lp_type type = bld->type; 56320b3e40f166c77bd7fa5b7171e5b4169ed035280nobled 56420b3e40f166c77bd7fa5b7171e5b4169ed035280nobled assert(lp_check_value(type, a)); 56520b3e40f166c77bd7fa5b7171e5b4169ed035280nobled assert(lp_check_value(type, b)); 56620b3e40f166c77bd7fa5b7171e5b4169ed035280nobled 56720b3e40f166c77bd7fa5b7171e5b4169ed035280nobled /* can't do bitwise ops on floating-point values */ 56820b3e40f166c77bd7fa5b7171e5b4169ed035280nobled if(type.floating) { 56920b3e40f166c77bd7fa5b7171e5b4169ed035280nobled a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, ""); 57020b3e40f166c77bd7fa5b7171e5b4169ed035280nobled b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, ""); 57120b3e40f166c77bd7fa5b7171e5b4169ed035280nobled } 572a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 5732b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul b = LLVMBuildNot(bld->builder, b, ""); 5742b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul b = LLVMBuildAnd(bld->builder, a, b, ""); 575a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 57620b3e40f166c77bd7fa5b7171e5b4169ed035280nobled if(type.floating) { 57720b3e40f166c77bd7fa5b7171e5b4169ed035280nobled b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, ""); 57820b3e40f166c77bd7fa5b7171e5b4169ed035280nobled } 5792b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul return b; 5802b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul} 581