lp_bld_logic.c revision c79f162367b99d9438bd1589ecfdeba69baa9d3d
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/************************************************************************** 21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc. 41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved. 51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the 81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including 91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish, 101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to 111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to 121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions: 131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the 151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions 161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software. 171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/ 271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/** 295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file 305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations. 315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * 325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com> 335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */ 345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca 351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h" 3788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca#include "util/u_memory.h" 38854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h" 397cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca 401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h" 4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h" 421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h" 43dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca#include "lp_bld_debug.h" 441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h" 451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 47a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/* 48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX 49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like 51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * select <4 x i1> %C, %A, %B 53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not 55a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * supported on any backend. 56a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in 58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * sext <4 x i1> %C to <4 x i32> 60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on 63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7. 64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */ 65a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 66a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 67e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/** 682297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func. 692297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 70877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 71e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */ 721aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef 732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder, 742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul const struct lp_type type, 752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{ 791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef zeros = LLVMConstNull(int_vec_type); 811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef cond; 83241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMValueRef res; 841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 85e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func >= PIPE_FUNC_NEVER); 86e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func <= PIPE_FUNC_ALWAYS); 87a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 88a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 89e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul 901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_NEVER) 911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return zeros; 921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_ALWAYS) 931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return ones; 941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 956b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 966b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca /* 976b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca * There are no unsigned integer comparison instructions in SSE. 986b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca */ 991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1006b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca if (!type.floating && !type.sign && 1016b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca type.width * type.length == 128 && 1026b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca util_cpu_caps.has_sse2 && 1036b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca (func == PIPE_FUNC_LESS || 1046b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca func == PIPE_FUNC_LEQUAL || 1056b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca func == PIPE_FUNC_GREATER || 1066b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca func == PIPE_FUNC_GEQUAL) && 1076b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca (gallivm_debug & GALLIVM_DEBUG_PERF)) { 1086b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", 1096b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca __FUNCTION__, type.length, type.width); 1106b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca } 1116b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca#endif 1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 113489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207 1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.width * type.length == 128) { 1167cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca if(type.floating && util_cpu_caps.has_sse) { 117e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* float[4] comparison */ 1187b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef args[3]; 1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca unsigned cc; 1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca boolean swap; 1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = FALSE; 1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 0; 1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 4; 1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 1472297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(swap) { 1511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = b; 1521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = a; 1531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 1551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = a; 1561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = b; 1571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 1602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, 1611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca "llvm.x86.sse.cmp.ps", 1621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca vec_type, 1631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args, 3); 1642297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 1651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return res; 1661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1677cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca else if(util_cpu_caps.has_sse2) { 168e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* int[4] comparison */ 169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca static const struct { 170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned swap:1; 171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned eq:1; 172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned gt:1; 173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned not:1; 174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } table[] = { 175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca }; 184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpeq; 185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpgt; 186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef args[2]; 187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef res; 1887b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(type); 189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 190d07b0383660cd318ba43abad11bb80de4a124951José Fonseca switch (type.width) { 191d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 8: 192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 193d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 194d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 195d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 16: 196d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 197d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 32: 200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca default: 204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca assert(0); 2052297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 207d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 208b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca /* There are no unsigned comparison instructions. So flip the sign bit 209b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca * so that the results match. 210d07b0383660cd318ba43abad11bb80de4a124951José Fonseca */ 211b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca if (table[func].gt && !type.sign) { 212185be3a87a5b38e8821a560c073975c11dcbd3e9Brian Paul LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); 2132297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul a = LLVMBuildXor(builder, a, msb, ""); 2142297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul b = LLVMBuildXor(builder, b, msb, ""); 215d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 216d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 217d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].swap) { 218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = b; 219d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = a; 220d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 221d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else { 222d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = a; 223d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = b; 224d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 225d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 226d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].eq) 2272297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 228d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else if (table[func].gt) 2292297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 230d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else 231d07b0383660cd318ba43abad11bb80de4a124951José Fonseca res = LLVMConstNull(vec_type); 232d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 233d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].not) 2342297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildNot(builder, res, ""); 235d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 236d07b0383660cd318ba43abad11bb80de4a124951José Fonseca return res; 237d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 2382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } /* if (type.width * type.length == 128) */ 2391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif 240a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */ 2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 2426b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca /* XXX: It is not clear if we should use the ordered or unordered operators */ 2436b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca 2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.floating) { 2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMRealPredicate op; 2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NEVER: 2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateFalse; 2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_ALWAYS: 2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateTrue; 2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUEQ; 2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUNE; 2581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULT; 2611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 2631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULE; 2641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 2661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGT; 2671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 2691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGE; 2701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 2721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 2732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 2741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 275241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 276a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 2772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildFCmp(builder, op, a, b, ""); 278a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 279241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 2802ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 281e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildFCmp(builder, op, a, b, ""); 282e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 2832ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 2842ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 2857b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 2867b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 287e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 288e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 2892ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul debug_printf("%s: warning: using slow element-wise float" 2902ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul " vector comparison\n", __FUNCTION__); 2912ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for (i = 0; i < type.length; ++i) { 2922ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 2932ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildFCmp(builder, op, 2942ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 2952ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 2962ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 2972ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 2982ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 2992ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 3002ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3012ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 3022ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 303241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 304241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMIntPredicate op; 3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 3091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 3101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntEQ; 3111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 3131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntNE; 3141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 3161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLT : LLVMIntULT; 3171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 3191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLE : LLVMIntULE; 3201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 3221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGT : LLVMIntUGT; 3231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 3251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGE : LLVMIntUGE; 3261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 3281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 3292297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 3301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 331241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 332a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 3332297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildICmp(builder, op, a, b, ""); 334a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 335241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 3362ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 337e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildICmp(builder, op, a, b, ""); 338e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 3392ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 3402ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 3417b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 3427b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 343e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 344e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 345dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca if (gallivm_debug & GALLIVM_DEBUG_PERF) { 346dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca debug_printf("%s: using slow element-wise int" 347dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca " vector comparison\n", __FUNCTION__); 348dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca } 3492ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul 3502ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for(i = 0; i < type.length; ++i) { 3512ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 3522ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildICmp(builder, op, 3532ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 3542ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 3552ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3562ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 3572ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 3582ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 3592ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3602ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 3612ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 362241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 363241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 3641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 3651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 366241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca return res; 3671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca} 36809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 36909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3702297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3712297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/** 3722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func. 3732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 374877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 3752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */ 3762297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef 3772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld, 3782297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 3792297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 3802297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 3812297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{ 3822297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_compare(bld->builder, bld->type, func, a, b); 3832297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul} 3842297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3852297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 386877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/** 3872a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * Return (mask & a) | (~mask & b); 3882a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */ 3892a45972fb2ba12a6561e5cba84d167f4c30566d4José FonsecaLLVMValueRef 3902a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonsecalp_build_select_bitwise(struct lp_build_context *bld, 3912a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef mask, 3922a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef a, 3932a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef b) 3942a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca{ 3952a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca struct lp_type type = bld->type; 3962a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef res; 3972a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 398a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 399a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 400a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 4012a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if (a == b) { 4022a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca return a; 4032a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 4042a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4052a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if(type.floating) { 4062a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 4072a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 4082a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 4092a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 4102a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4112a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca a = LLVMBuildAnd(bld->builder, a, mask, ""); 4122a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4132a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca /* This often gets translated to PANDN, but sometimes the NOT is 4142a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * pre-computed and stored in another constant. The best strategy depends 4152a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * on available registers, so it is not a big deal -- hopefully LLVM does 4162a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * the right decision attending the rest of the program. 4172a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */ 4182a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 4192a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4202a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = LLVMBuildOr(bld->builder, a, b, ""); 4212a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4222a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if(type.floating) { 4232a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMTypeRef vec_type = lp_build_vec_type(type); 4242a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 4252a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 4262a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4272a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca return res; 4282a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca} 4292a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4302a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4312a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca/** 432877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b; 433e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca * 4342a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 4352a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * will yield unpredictable results. 436877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */ 43709a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 43809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld, 43909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask, 44009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 44109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b) 44209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 443b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca struct lp_type type = bld->type; 44409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef res; 44509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 446a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 447a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 448a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 44909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 45009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 45109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 4522ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 453e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); 4542ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildSelect(bld->builder, mask, a, b, ""); 45509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 45688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca else if (util_cpu_caps.has_sse4_1 && 45788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca type.width * type.length == 128 && 45888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(a) && 45988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(b) && 46088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(mask)) { 46188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca const char *intrinsic; 46288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca LLVMTypeRef arg_type; 46388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca LLVMValueRef args[3]; 46488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 465c79f162367b99d9438bd1589ecfdeba69baa9d3dKeith Whitwell if (type.floating && 466c79f162367b99d9438bd1589ecfdeba69baa9d3dKeith Whitwell type.width == 64) { 46788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.blendvpd"; 46888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMDoubleType(), 2); 469c79f162367b99d9438bd1589ecfdeba69baa9d3dKeith Whitwell } else if (type.floating && 470c79f162367b99d9438bd1589ecfdeba69baa9d3dKeith Whitwell type.width == 32) { 47188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.blendvps"; 47288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMFloatType(), 4); 47388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } else { 47488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.pblendvb"; 47588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type = LLVMVectorType(LLVMInt8Type(), 16); 47688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 47788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 47888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->int_vec_type) { 47988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca mask = LLVMBuildBitCast(bld->builder, mask, arg_type, ""); 48088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 48188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 48288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->vec_type) { 48388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca a = LLVMBuildBitCast(bld->builder, a, arg_type, ""); 48488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca b = LLVMBuildBitCast(bld->builder, b, arg_type, ""); 48588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 48688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 48788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[0] = b; 48888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[1] = a; 48988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[2] = mask; 49088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 49188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca res = lp_build_intrinsic(bld->builder, intrinsic, 49288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type, args, Elements(args)); 49388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 49488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->vec_type) { 49588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); 49688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 49788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 4982ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 4992a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = lp_build_select_bitwise(bld, mask, a, b); 50009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 50109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 50209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return res; 50309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 50409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 50509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 5066ed726b8fc6210a41fe325591e1428d19f419108José Fonseca/** 5076ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * Return mask ? a : b; 5086ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * 5096ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * mask is a TGSI_WRITEMASK_xxx. 5106ed726b8fc6210a41fe325591e1428d19f419108José Fonseca */ 51109a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 51209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld, 5136ed726b8fc6210a41fe325591e1428d19f419108José Fonseca unsigned mask, 51409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 5156ed726b8fc6210a41fe325591e1428d19f419108José Fonseca LLVMValueRef b) 51609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 517b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca const struct lp_type type = bld->type; 51809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca const unsigned n = type.length; 51909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca unsigned i, j; 52009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 5216ed726b8fc6210a41fe325591e1428d19f419108José Fonseca assert((mask & ~0xf) == 0); 522a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 523a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 524a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 52509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 52609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 5276ed726b8fc6210a41fe325591e1428d19f419108José Fonseca if((mask & 0xf) == 0xf) 52809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 5296ed726b8fc6210a41fe325591e1428d19f419108José Fonseca if((mask & 0xf) == 0x0) 53009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return b; 53109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == bld->undef || b == bld->undef) 53209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return bld->undef; 53309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 53409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 53509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * There are three major ways of accomplishing this: 53609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a shuffle, 53709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a select, 53809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - or with a bit mask. 53909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * 54009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Select isn't supported for vector types yet. 54109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * The flip between these is empirical and might need to be. 54209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 54309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if (n <= 4) { 54409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 54509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Shuffle. 54609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 54709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt32Type(); 54809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 54909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 55009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 55109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 5526ed726b8fc6210a41fe325591e1428d19f419108José Fonseca shuffles[j + i] = LLVMConstInt(elem_type, 5536ed726b8fc6210a41fe325591e1428d19f419108José Fonseca (mask & (1 << i) ? 0 : n) + j + i, 5546ed726b8fc6210a41fe325591e1428d19f419108José Fonseca 0); 55509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 55609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 55709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 5581fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca else { 55909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0 5601fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca /* XXX: Unfortunately select of vectors do not work */ 56109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* Use a select */ 56209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt1Type(); 5636ed726b8fc6210a41fe325591e1428d19f419108José Fonseca LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH]; 56409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 56509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 56609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 5676ed726b8fc6210a41fe325591e1428d19f419108José Fonseca cond_vec[j + i] = LLVMConstInt(elem_type, 5686ed726b8fc6210a41fe325591e1428d19f419108José Fonseca mask & (1 << i) ? 1 : 0, 0); 56909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 5706ed726b8fc6210a41fe325591e1428d19f419108José Fonseca return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, ""); 5711fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else 5726ed726b8fc6210a41fe325591e1428d19f419108José Fonseca LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask); 5736ed726b8fc6210a41fe325591e1428d19f419108José Fonseca return lp_build_select(bld, mask_vec, a, b); 5741fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif 57509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 57609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 577