11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/************************************************************************** 21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc. 41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved. 51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the 81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including 91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish, 101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to 111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to 121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions: 131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the 151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions 161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software. 171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/ 271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/** 295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file 305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations. 315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * 325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com> 335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */ 345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca 351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h" 3788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca#include "util/u_memory.h" 38854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h" 397cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca 401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h" 4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h" 42efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul#include "lp_bld_init.h" 431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h" 44dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca#include "lp_bld_debug.h" 451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h" 461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/* 49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX 50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like 52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * select <4 x i1> %C, %A, %B 54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 553469715a8a171512cf9b528702e70393f01c6041José Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only 563469715a8a171512cf9b528702e70393f01c6041José Fonseca * supported on some backends (x86) starting with llvm 3.1. 57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in 59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * sext <4 x i1> %C to <4 x i32> 61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * 62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on 64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7. 65a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */ 66a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 67a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca 68e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/** 692297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func. 702297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 71877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 72e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */ 731aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef 74efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_compare(struct gallivm_state *gallivm, 752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul const struct lp_type type, 762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 782297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{ 80efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMBuilderRef builder = gallivm->builder; 81efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef zeros = LLVMConstNull(int_vec_type); 831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef cond; 85241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMValueRef res; 861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 87e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func >= PIPE_FUNC_NEVER); 88e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func <= PIPE_FUNC_ALWAYS); 89a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 90a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 91e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul 921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_NEVER) 931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return zeros; 941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_ALWAYS) 951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return ones; 961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 976b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 986b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca /* 996b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca * There are no unsigned integer comparison instructions in SSE. 1006b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca */ 1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1026b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca if (!type.floating && !type.sign && 1036b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca type.width * type.length == 128 && 1046b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca util_cpu_caps.has_sse2 && 1056b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca (func == PIPE_FUNC_LESS || 1066b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca func == PIPE_FUNC_LEQUAL || 1076b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca func == PIPE_FUNC_GREATER || 1086b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca func == PIPE_FUNC_GEQUAL) && 1096b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca (gallivm_debug & GALLIVM_DEBUG_PERF)) { 1106b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", 1116b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca __FUNCTION__, type.length, type.width); 1126b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca } 1136b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca#endif 1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 115489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207 1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.width * type.length == 128) { 1187cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca if(type.floating && util_cpu_caps.has_sse) { 119e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* float[4] comparison */ 120efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); 1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef args[3]; 1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca unsigned cc; 1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca boolean swap; 1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = FALSE; 1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 0; 1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 4; 1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 149efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul return lp_build_undef(gallivm, type); 1501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(swap) { 1531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = b; 1541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = a; 1551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 1571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = a; 1581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = b; 1591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 161efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); 1622297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, 1631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca "llvm.x86.sse.cmp.ps", 1641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca vec_type, 1651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args, 3); 1662297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 1671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return res; 1681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1697cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca else if(util_cpu_caps.has_sse2) { 170e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* int[4] comparison */ 171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca static const struct { 172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned swap:1; 173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned eq:1; 174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned gt:1; 175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned not:1; 176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } table[] = { 177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca }; 186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpeq; 187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpgt; 188d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef args[2]; 189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef res; 190efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); 191d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca switch (type.width) { 193d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 8: 194d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 195d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 196d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 197d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 16: 198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 32: 202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 205d07b0383660cd318ba43abad11bb80de4a124951José Fonseca default: 206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca assert(0); 207efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul return lp_build_undef(gallivm, type); 208d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 209d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 210b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca /* There are no unsigned comparison instructions. So flip the sign bit 211b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca * so that the results match. 212d07b0383660cd318ba43abad11bb80de4a124951José Fonseca */ 213b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca if (table[func].gt && !type.sign) { 214efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); 2152297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul a = LLVMBuildXor(builder, a, msb, ""); 2162297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul b = LLVMBuildXor(builder, b, msb, ""); 217d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 219d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].swap) { 220d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = b; 221d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = a; 222d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 223d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else { 224d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = a; 225d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = b; 226d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 227d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 228d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].eq) 2292297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 230d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else if (table[func].gt) 2312297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 232d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else 233d07b0383660cd318ba43abad11bb80de4a124951José Fonseca res = LLVMConstNull(vec_type); 234d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 235d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].not) 2362297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildNot(builder, res, ""); 237d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 238d07b0383660cd318ba43abad11bb80de4a124951José Fonseca return res; 239d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 2402ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } /* if (type.width * type.length == 128) */ 2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif 242a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */ 2431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 2446b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca /* XXX: It is not clear if we should use the ordered or unordered operators */ 2456b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca 2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.floating) { 2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMRealPredicate op; 2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NEVER: 2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateFalse; 2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_ALWAYS: 2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateTrue; 2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUEQ; 2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUNE; 2601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULT; 2631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 2651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULE; 2661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 2681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGT; 2691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 2711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGE; 2721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 2741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 275efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul return lp_build_undef(gallivm, type); 2761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 277241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 278a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 2792297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildFCmp(builder, op, a, b, ""); 280a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 281241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 2822ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 283e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildFCmp(builder, op, a, b, ""); 284e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 2852ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 2862ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 2877b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 2887b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 289e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 290e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 2912ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul debug_printf("%s: warning: using slow element-wise float" 2922ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul " vector comparison\n", __FUNCTION__); 2932ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for (i = 0; i < type.length; ++i) { 294efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef index = lp_build_const_int32(gallivm, i); 2952ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildFCmp(builder, op, 2962ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 2972ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 2982ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 2992ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 3002ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 3012ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 3022ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3032ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 3042ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 305241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 306241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 3091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMIntPredicate op; 3101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 3111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 3121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntEQ; 3131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 3151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntNE; 3161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 3181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLT : LLVMIntULT; 3191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 3211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLE : LLVMIntULE; 3221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 3241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGT : LLVMIntUGT; 3251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 3271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGE : LLVMIntUGE; 3281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 3291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 3301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 331efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul return lp_build_undef(gallivm, type); 3321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 333241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 334a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207 3352297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildICmp(builder, op, a, b, ""); 336a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 337241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 3382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 339e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca cond = LLVMBuildICmp(builder, op, a, b, ""); 340e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 3412ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 3422ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 3437b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul unsigned i; 3447b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul 345e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca res = LLVMGetUndef(int_vec_type); 346e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca 347dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca if (gallivm_debug & GALLIVM_DEBUG_PERF) { 348dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca debug_printf("%s: using slow element-wise int" 349dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca " vector comparison\n", __FUNCTION__); 350dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca } 3512ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul 3522ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul for(i = 0; i < type.length; ++i) { 353efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef index = lp_build_const_int32(gallivm, i); 3542ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildICmp(builder, op, 3552ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, a, index, ""), 3562ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMBuildExtractElement(builder, b, index, ""), 3572ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3582ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul cond = LLVMBuildSelect(builder, cond, 3592ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(ones, index), 3602ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul LLVMConstExtractElement(zeros, index), 3612ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul ""); 3622ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 3632ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul } 364241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 365241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 3661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 3671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 368241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca return res; 3691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca} 37009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 37109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/** 3742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func. 3752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 376877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 3772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */ 3782297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef 3792297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld, 3802297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 3812297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 3822297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 3832297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{ 384efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul return lp_build_compare(bld->gallivm, bld->type, func, a, b); 3852297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul} 3862297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3872297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 388877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/** 3892a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * Return (mask & a) | (~mask & b); 3902a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */ 3912a45972fb2ba12a6561e5cba84d167f4c30566d4José FonsecaLLVMValueRef 3922a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonsecalp_build_select_bitwise(struct lp_build_context *bld, 3932a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef mask, 3942a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef a, 3952a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef b) 3962a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca{ 3976299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul LLVMBuilderRef builder = bld->gallivm->builder; 3982a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca struct lp_type type = bld->type; 3992a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca LLVMValueRef res; 4002a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 401a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 402a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 403a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 4042a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if (a == b) { 4052a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca return a; 4062a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 4072a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4082a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if(type.floating) { 409efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); 4106299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul a = LLVMBuildBitCast(builder, a, int_vec_type, ""); 4116299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul b = LLVMBuildBitCast(builder, b, int_vec_type, ""); 4122a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 4132a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4146299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul a = LLVMBuildAnd(builder, a, mask, ""); 4152a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4162a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca /* This often gets translated to PANDN, but sometimes the NOT is 4172a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * pre-computed and stored in another constant. The best strategy depends 4182a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * on available registers, so it is not a big deal -- hopefully LLVM does 4192a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * the right decision attending the rest of the program. 4202a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */ 4216299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); 4222a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4236299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul res = LLVMBuildOr(builder, a, b, ""); 4242a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4252a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca if(type.floating) { 426efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 4276299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul res = LLVMBuildBitCast(builder, res, vec_type, ""); 4282a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca } 4292a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4302a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca return res; 4312a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca} 4322a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4332a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca 4342a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca/** 435877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b; 436e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca * 4372a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 4382a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * will yield unpredictable results. 439877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */ 44009a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 44109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld, 44209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask, 44309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 44409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b) 44509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 4466299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul LLVMBuilderRef builder = bld->gallivm->builder; 447efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMContextRef lc = bld->gallivm->context; 448b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca struct lp_type type = bld->type; 44909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef res; 45009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 451a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 452a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 453a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 45409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 45509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 45609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 4572ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul if (type.length == 1) { 4586299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); 4596299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul res = LLVMBuildSelect(builder, mask, a, b, ""); 46009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 4618c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca else if (0) { 4628c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca /* Generate a vector select. 4638c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * 4648c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * XXX: Using vector selects would avoid emitting intrinsics, but they aren't 4658c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * properly supported yet. 4668c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * 4678c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * LLVM 3.0 includes experimental support provided the -promote-elements 4688c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * options is passed to LLVM's command line (e.g., via 4698c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * llvm::cl::ParseCommandLineOptions), but resulting code quality is much 4708c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * worse, probably because some optimization passes don't know how to 4718c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * handle vector selects. 4728c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * 4738c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * See also: 4748c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html 4758c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca */ 4768c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca 4778c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca /* Convert the mask to a vector of booleans. 4788c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * XXX: There are two ways to do this. Decide what's best. 4798c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca */ 4808c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca if (1) { 4818c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); 4828c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); 4838c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca } else { 4848c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), ""); 4858c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca } 4868c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca res = LLVMBuildSelect(builder, mask, a, b, ""); 4878c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca } 4883469715a8a171512cf9b528702e70393f01c6041José Fonseca else if (((util_cpu_caps.has_sse4_1 && 4893469715a8a171512cf9b528702e70393f01c6041José Fonseca type.width * type.length == 128) || 4903469715a8a171512cf9b528702e70393f01c6041José Fonseca (util_cpu_caps.has_avx && 4913469715a8a171512cf9b528702e70393f01c6041José Fonseca type.width * type.length == 256 && type.width >= 32)) && 49288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(a) && 49388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(b) && 49488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca !LLVMIsConstant(mask)) { 49588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca const char *intrinsic; 49688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca LLVMTypeRef arg_type; 49788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca LLVMValueRef args[3]; 49888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 4993469715a8a171512cf9b528702e70393f01c6041José Fonseca /* 5003469715a8a171512cf9b528702e70393f01c6041José Fonseca * There's only float blend in AVX but can just cast i32/i64 5013469715a8a171512cf9b528702e70393f01c6041José Fonseca * to float. 5023469715a8a171512cf9b528702e70393f01c6041José Fonseca */ 5033469715a8a171512cf9b528702e70393f01c6041José Fonseca if (type.width * type.length == 256) { 5043469715a8a171512cf9b528702e70393f01c6041José Fonseca if (type.width == 64) { 5053469715a8a171512cf9b528702e70393f01c6041José Fonseca intrinsic = "llvm.x86.avx.blendv.pd.256"; 5063469715a8a171512cf9b528702e70393f01c6041José Fonseca arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); 5073469715a8a171512cf9b528702e70393f01c6041José Fonseca } 5083469715a8a171512cf9b528702e70393f01c6041José Fonseca else { 5093469715a8a171512cf9b528702e70393f01c6041José Fonseca intrinsic = "llvm.x86.avx.blendv.ps.256"; 5103469715a8a171512cf9b528702e70393f01c6041José Fonseca arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); 5113469715a8a171512cf9b528702e70393f01c6041José Fonseca } 5123469715a8a171512cf9b528702e70393f01c6041José Fonseca } 5133469715a8a171512cf9b528702e70393f01c6041José Fonseca else if (type.floating && 5143469715a8a171512cf9b528702e70393f01c6041José Fonseca type.width == 64) { 51588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.blendvpd"; 516efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); 517c79f162367b99d9438bd1589ecfdeba69baa9d3dKeith Whitwell } else if (type.floating && 518c79f162367b99d9438bd1589ecfdeba69baa9d3dKeith Whitwell type.width == 32) { 51988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.blendvps"; 520efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); 52188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } else { 52288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca intrinsic = "llvm.x86.sse41.pblendvb"; 523efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); 52488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 52588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 52688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->int_vec_type) { 5276299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul mask = LLVMBuildBitCast(builder, mask, arg_type, ""); 52888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 52988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 53088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->vec_type) { 5316299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul a = LLVMBuildBitCast(builder, a, arg_type, ""); 5326299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul b = LLVMBuildBitCast(builder, b, arg_type, ""); 53388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 53488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 53588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[0] = b; 53688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[1] = a; 53788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca args[2] = mask; 53888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 5396299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul res = lp_build_intrinsic(builder, intrinsic, 54088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca arg_type, args, Elements(args)); 54188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca 54288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca if (arg_type != bld->vec_type) { 5436299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); 54488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 54588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca } 5462ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul else { 5472a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca res = lp_build_select_bitwise(bld, mask, a, b); 54809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 54909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 55009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return res; 55109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 55209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 55309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 5546ed726b8fc6210a41fe325591e1428d19f419108José Fonseca/** 5556ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * Return mask ? a : b; 5566ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * 5576ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * mask is a TGSI_WRITEMASK_xxx. 5586ed726b8fc6210a41fe325591e1428d19f419108José Fonseca */ 55909a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 56009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld, 5616ed726b8fc6210a41fe325591e1428d19f419108José Fonseca unsigned mask, 56209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 5636ed726b8fc6210a41fe325591e1428d19f419108José Fonseca LLVMValueRef b) 56409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 5656299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul LLVMBuilderRef builder = bld->gallivm->builder; 566b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca const struct lp_type type = bld->type; 56709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca const unsigned n = type.length; 56809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca unsigned i, j; 56909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 5706ed726b8fc6210a41fe325591e1428d19f419108José Fonseca assert((mask & ~0xf) == 0); 571a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, a)); 572a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled assert(lp_check_value(type, b)); 573a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled 57409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 57509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 5766ed726b8fc6210a41fe325591e1428d19f419108José Fonseca if((mask & 0xf) == 0xf) 57709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 5786ed726b8fc6210a41fe325591e1428d19f419108José Fonseca if((mask & 0xf) == 0x0) 57909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return b; 58009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == bld->undef || b == bld->undef) 58109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return bld->undef; 58209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 58309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 5848c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * There are two major ways of accomplishing this: 5858c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * - with a shuffle 5868c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * - with a select 58709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * 5888c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca * The flip between these is empirical and might need to be adjusted. 58909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 59009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if (n <= 4) { 59109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 59209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Shuffle. 59309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 594efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 59509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 59609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 59709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 59809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 5996ed726b8fc6210a41fe325591e1428d19f419108José Fonseca shuffles[j + i] = LLVMConstInt(elem_type, 6006ed726b8fc6210a41fe325591e1428d19f419108José Fonseca (mask & (1 << i) ? 0 : n) + j + i, 6016ed726b8fc6210a41fe325591e1428d19f419108José Fonseca 0); 60209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 6036299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); 60409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 6051fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca else { 606efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask); 6076ed726b8fc6210a41fe325591e1428d19f419108José Fonseca return lp_build_select(bld, mask_vec, a, b); 60809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 60909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 6103469715a8a171512cf9b528702e70393f01c6041José Fonseca 6113469715a8a171512cf9b528702e70393f01c6041José Fonseca 6123469715a8a171512cf9b528702e70393f01c6041José Fonseca/** 6133469715a8a171512cf9b528702e70393f01c6041José Fonseca * Return (scalar-cast)val ? true : false; 6143469715a8a171512cf9b528702e70393f01c6041José Fonseca */ 6153469715a8a171512cf9b528702e70393f01c6041José FonsecaLLVMValueRef 6163469715a8a171512cf9b528702e70393f01c6041José Fonsecalp_build_any_true_range(struct lp_build_context *bld, 6173469715a8a171512cf9b528702e70393f01c6041José Fonseca unsigned real_length, 6183469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMValueRef val) 6193469715a8a171512cf9b528702e70393f01c6041José Fonseca{ 6203469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMBuilderRef builder = bld->gallivm->builder; 6213469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMTypeRef scalar_type; 6223469715a8a171512cf9b528702e70393f01c6041José Fonseca LLVMTypeRef true_type; 6233469715a8a171512cf9b528702e70393f01c6041José Fonseca 6243469715a8a171512cf9b528702e70393f01c6041José Fonseca assert(real_length <= bld->type.length); 6253469715a8a171512cf9b528702e70393f01c6041José Fonseca 6263469715a8a171512cf9b528702e70393f01c6041José Fonseca true_type = LLVMIntTypeInContext(bld->gallivm->context, 6273469715a8a171512cf9b528702e70393f01c6041José Fonseca bld->type.width * real_length); 6283469715a8a171512cf9b528702e70393f01c6041José Fonseca scalar_type = LLVMIntTypeInContext(bld->gallivm->context, 6293469715a8a171512cf9b528702e70393f01c6041José Fonseca bld->type.width * bld->type.length); 6303469715a8a171512cf9b528702e70393f01c6041José Fonseca val = LLVMBuildBitCast(builder, val, scalar_type, ""); 6313469715a8a171512cf9b528702e70393f01c6041José Fonseca /* 6323469715a8a171512cf9b528702e70393f01c6041José Fonseca * We're using always native types so we can use intrinsics. 6333469715a8a171512cf9b528702e70393f01c6041José Fonseca * However, if we don't do per-element calculations, we must ensure 6343469715a8a171512cf9b528702e70393f01c6041José Fonseca * the excess elements aren't used since they may contain garbage. 6353469715a8a171512cf9b528702e70393f01c6041José Fonseca */ 6363469715a8a171512cf9b528702e70393f01c6041José Fonseca if (real_length < bld->type.length) { 6373469715a8a171512cf9b528702e70393f01c6041José Fonseca val = LLVMBuildTrunc(builder, val, true_type, ""); 6383469715a8a171512cf9b528702e70393f01c6041José Fonseca } 6393469715a8a171512cf9b528702e70393f01c6041José Fonseca return LLVMBuildICmp(builder, LLVMIntNE, 6403469715a8a171512cf9b528702e70393f01c6041José Fonseca val, LLVMConstNull(true_type), ""); 6413469715a8a171512cf9b528702e70393f01c6041José Fonseca} 642