lp_bld_logic.c revision 877f2356b2ab7caa16beed496f36eca64ee201e1
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/************************************************************************** 21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc. 41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved. 51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the 81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including 91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish, 101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to 111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to 121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions: 131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the 151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions 161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software. 171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/ 271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/** 295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file 305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations. 315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * 325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com> 335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */ 345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca 351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h" 37854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h" 387cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca 391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h" 4009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h" 411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h" 421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h" 431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 45e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/** 462297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func. 472297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 48877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 49e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */ 501aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef 512297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder, 522297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul const struct lp_type type, 532297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 542297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 552297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{ 571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMTypeRef vec_type = lp_build_vec_type(type); 581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef zeros = LLVMConstNull(int_vec_type); 601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef cond; 62241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMValueRef res; 63241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca unsigned i; 641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 65e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func >= PIPE_FUNC_NEVER); 66e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul assert(func <= PIPE_FUNC_ALWAYS); 67e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul 681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_NEVER) 691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return zeros; 701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_ALWAYS) 711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return ones; 721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* TODO: optimize the constant case */ 741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* XXX: It is not clear if we should use the ordered or unordered operators */ 761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.width * type.length == 128) { 797cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca if(type.floating && util_cpu_caps.has_sse) { 80e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* float[4] comparison */ 811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef args[3]; 821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca unsigned cc; 831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca boolean swap; 841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 851aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = FALSE; 861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 0; 891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 4; 921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 1001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 1092297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(swap) { 1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = b; 1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = a; 1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = a; 1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = b; 1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 1222297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, 1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca "llvm.x86.sse.cmp.ps", 1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca vec_type, 1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args, 3); 1262297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return res; 1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1297cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca else if(util_cpu_caps.has_sse2) { 130e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul /* int[4] comparison */ 131d07b0383660cd318ba43abad11bb80de4a124951José Fonseca static const struct { 132d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned swap:1; 133d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned eq:1; 134d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned gt:1; 135d07b0383660cd318ba43abad11bb80de4a124951José Fonseca unsigned not:1; 136d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } table[] = { 137d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 138d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 139d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 140d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 141d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 142d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 143d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 144d07b0383660cd318ba43abad11bb80de4a124951José Fonseca {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 145d07b0383660cd318ba43abad11bb80de4a124951José Fonseca }; 146d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpeq; 147d07b0383660cd318ba43abad11bb80de4a124951José Fonseca const char *pcmpgt; 148d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef args[2]; 149d07b0383660cd318ba43abad11bb80de4a124951José Fonseca LLVMValueRef res; 150d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 151d07b0383660cd318ba43abad11bb80de4a124951José Fonseca switch (type.width) { 152d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 8: 153d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 16: 157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca case 32: 161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 162d07b0383660cd318ba43abad11bb80de4a124951José Fonseca pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca break; 164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca default: 165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca assert(0); 1662297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca /* There are no signed byte and unsigned word/dword comparison 170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca * instructions. So flip the sign bit so that the results match. 171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca */ 172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].gt && 173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca ((type.width == 8 && type.sign) || 174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca (type.width != 8 && !type.sign))) { 17577b35dc179473afbbd8c709c9f32c0f537c90776José Fonseca LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); 1762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul a = LLVMBuildXor(builder, a, msb, ""); 1772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul b = LLVMBuildXor(builder, b, msb, ""); 178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].swap) { 181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = b; 182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = a; 183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else { 185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[0] = a; 186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca args[1] = b; 187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 188d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].eq) 1902297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 191d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else if (table[func].gt) 1922297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 193d07b0383660cd318ba43abad11bb80de4a124951José Fonseca else 194d07b0383660cd318ba43abad11bb80de4a124951José Fonseca res = LLVMConstNull(vec_type); 195d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 196d07b0383660cd318ba43abad11bb80de4a124951José Fonseca if(table[func].not) 1972297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildNot(builder, res, ""); 198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca 199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca return res; 200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca } 2011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 2021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif 2031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 2041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.floating) { 2051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMRealPredicate op; 2061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NEVER: 2081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateFalse; 2091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_ALWAYS: 2111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateTrue; 2121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUEQ; 2151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUNE; 2181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULT; 2211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 2231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULE; 2241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 2261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGT; 2271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 2291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGE; 2301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 2321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 2332297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 2341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 235241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 236241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#if 0 237241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca /* XXX: Although valid IR, no LLVM target currently support this */ 2382297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildFCmp(builder, op, a, b, ""); 2392297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildSelect(builder, cond, ones, zeros, ""); 240241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 241241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca debug_printf("%s: warning: using slow element-wise vector comparison\n", 242241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca __FUNCTION__); 243241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca res = LLVMGetUndef(int_vec_type); 244241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca for(i = 0; i < type.length; ++i) { 245241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 2462297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildFCmp(builder, op, 2472297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMBuildExtractElement(builder, a, index, ""), 2482297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMBuildExtractElement(builder, b, index, ""), 249241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca ""); 2502297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildSelect(builder, cond, 251241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMConstExtractElement(ones, index), 252241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMConstExtractElement(zeros, index), 253241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca ""); 2542297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 255241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 256241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 2581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 2591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMIntPredicate op; 2601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 2611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 2621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntEQ; 2631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 2651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntNE; 2661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 2681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLT : LLVMIntULT; 2691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 2711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLE : LLVMIntULE; 2721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 2741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGT : LLVMIntUGT; 2751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 2771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGE : LLVMIntUGE; 2781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 2791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 2801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 2812297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_undef(type); 2821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 283241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca 284241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#if 0 285241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca /* XXX: Although valid IR, no LLVM target currently support this */ 2862297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildICmp(builder, op, a, b, ""); 2872297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildSelect(builder, cond, ones, zeros, ""); 288241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else 2892297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul debug_printf("%s: warning: using slow element-wise int vector comparison\n", 290241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca __FUNCTION__); 291241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca res = LLVMGetUndef(int_vec_type); 292241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca for(i = 0; i < type.length; ++i) { 293241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 2942297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildICmp(builder, op, 2952297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMBuildExtractElement(builder, a, index, ""), 2962297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMBuildExtractElement(builder, b, index, ""), 297241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca ""); 2982297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul cond = LLVMBuildSelect(builder, cond, 299241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMConstExtractElement(ones, index), 300241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca LLVMConstExtractElement(zeros, index), 301241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca ""); 3022297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul res = LLVMBuildInsertElement(builder, res, cond, index, ""); 303241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca } 304241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif 3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 307241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca return res; 3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca} 30909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 31009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 3112297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3122297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/** 3132297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func. 3142297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func one of PIPE_FUNC_x 315877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true. 3162297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */ 3172297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef 3182297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld, 3192297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul unsigned func, 3202297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef a, 3212297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul LLVMValueRef b) 3222297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{ 3232297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul return lp_build_compare(bld->builder, bld->type, func, a, b); 3242297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul} 3252297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 3262297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul 327877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/** 328877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b; 329877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */ 33009a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 33109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld, 33209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask, 33309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 33409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b) 33509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 336b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca struct lp_type type = bld->type; 33709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef res; 33809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 33909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 34009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 34109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 34209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(type.floating) { 34309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 34409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 34509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 34609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 34709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 34809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca a = LLVMBuildAnd(bld->builder, a, mask, ""); 34909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 35009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* This often gets translated to PANDN, but sometimes the NOT is 35109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * pre-computed and stored in another constant. The best strategy depends 35209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * on available registers, so it is not a big deal -- hopefully LLVM does 35309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * the right decision attending the rest of the program. 35409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 35509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 35609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 35709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca res = LLVMBuildOr(bld->builder, a, b, ""); 35809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 35909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(type.floating) { 36009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef vec_type = lp_build_vec_type(type); 36109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 36209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 36309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 36409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return res; 36509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 36609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 36709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 36809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 36909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld, 37009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 37109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b, 3725e13e987da6ce656b08f6c25f8d373c80949e3b0José Fonseca const boolean cond[4]) 37309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 374b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca const struct lp_type type = bld->type; 37509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca const unsigned n = type.length; 37609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca unsigned i, j; 37709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 37809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 37909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 38009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(cond[0] && cond[1] && cond[2] && cond[3]) 38109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 38209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) 38309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return b; 38409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == bld->undef || b == bld->undef) 38509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return bld->undef; 38609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 38709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 38809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * There are three major ways of accomplishing this: 38909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a shuffle, 39009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a select, 39109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - or with a bit mask. 39209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * 39309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Select isn't supported for vector types yet. 39409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * The flip between these is empirical and might need to be. 39509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 39609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if (n <= 4) { 39709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 39809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Shuffle. 39909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 40009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt32Type(); 40109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 40209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 40309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 40409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 40509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); 40609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 40709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 40809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 4091fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca else { 41009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0 4111fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca /* XXX: Unfortunately select of vectors do not work */ 41209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* Use a select */ 41309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt1Type(); 41409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; 41509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 41609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 41709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 41809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); 41909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 42009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); 4211fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else 42209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask = lp_build_const_mask_aos(type, cond); 42309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return lp_build_select(bld, mask, a, b); 4241fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif 42509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 42609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 42785c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin 42885c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack RusinLLVMValueRef 42985c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusinlp_build_alloca(struct lp_build_context *bld) 43085c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin{ 43185c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin const struct lp_type type = bld->type; 43285c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin 43385c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin if (type.length > 1) { /*vector*/ 43485c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), ""); 43585c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin } else { /*scalar*/ 43685c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); 43785c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin } 43885c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin} 439