lp_bld_logic.c revision 09a7b011acb3957725bb7e1bc4125f0939a295e7
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/************************************************************************** 21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc. 41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved. 51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a 71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the 81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including 91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish, 101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to 111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to 121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions: 131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the 151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions 161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software. 171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * 261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/ 271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "pipe/p_defines.h" 301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h" 3109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h" 321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h" 331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h" 341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 361aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef 371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonsecalp_build_cmp(struct lp_build_context *bld, 381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca unsigned func, 391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef a, 401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef b) 411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{ 421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca const union lp_type type = bld->type; 431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMTypeRef vec_type = lp_build_vec_type(type); 441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef zeros = LLVMConstNull(int_vec_type); 461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef cond; 481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_NEVER) 501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return zeros; 511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(func == PIPE_FUNC_ALWAYS) 521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return ones; 531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* TODO: optimize the constant case */ 551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca /* XXX: It is not clear if we should use the ordered or unordered operators */ 571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.width * type.length == 128) { 601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.floating) { 611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef args[3]; 621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca unsigned cc; 631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca boolean swap; 641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMValueRef res; 651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = FALSE; 671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 0; 701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 4; 731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 1; 821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 851aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cc = 2; 861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca swap = TRUE; 871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return bld->undef; 911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(swap) { 941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = b; 951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = a; 961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[0] = a; 991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[1] = b; 1001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca res = lp_build_intrinsic(bld->builder, 1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca "llvm.x86.sse.cmp.ps", 1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca vec_type, 1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca args, 3); 1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca res = LLVMBuildBitCast(bld->builder, res, int_vec_type, ""); 1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return res; 1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif 1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca if(type.floating) { 1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMRealPredicate op; 1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NEVER: 1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateFalse; 1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_ALWAYS: 1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealPredicateTrue; 1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUEQ; 1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUNE; 1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULT; 1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealULE; 1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGT; 1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMRealUGE; 1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return bld->undef; 1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cond = LLVMBuildFCmp(bld->builder, op, a, b, ""); 1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca else { 1471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca LLVMIntPredicate op; 1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca switch(func) { 1491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_EQUAL: 1501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntEQ; 1511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_NOTEQUAL: 1531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = LLVMIntNE; 1541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LESS: 1561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLT : LLVMIntULT; 1571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_LEQUAL: 1591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSLE : LLVMIntULE; 1601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GREATER: 1621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGT : LLVMIntUGT; 1631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca case PIPE_FUNC_GEQUAL: 1651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca op = type.sign ? LLVMIntSGE : LLVMIntUGE; 1661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca break; 1671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca default: 1681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca assert(0); 1691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return bld->undef; 1701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca cond = LLVMBuildICmp(bld->builder, op, a, b, ""); 1721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca } 1731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca 1741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca return LLVMBuildSelect(bld->builder, cond, ones, zeros, ""); 1751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca} 17609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 17709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 17809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 17909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld, 18009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask, 18109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 18209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b) 18309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 18409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca union lp_type type = bld->type; 18509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef res; 18609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 18709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 18809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 18909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 19009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(type.floating) { 19109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 19209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 19309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 19409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 19509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 19609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */ 19709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 19809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca a = LLVMBuildAnd(bld->builder, a, mask, ""); 19909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 20009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* This often gets translated to PANDN, but sometimes the NOT is 20109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * pre-computed and stored in another constant. The best strategy depends 20209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * on available registers, so it is not a big deal -- hopefully LLVM does 20309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * the right decision attending the rest of the program. 20409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 20509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 20609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 20709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca res = LLVMBuildOr(bld->builder, a, b, ""); 20809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 20909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(type.floating) { 21009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef vec_type = lp_build_vec_type(type); 21109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 21209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 21309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 21409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return res; 21509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 21609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 21709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 21809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef 21909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld, 22009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef a, 22109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef b, 22209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca boolean cond[4]) 22309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{ 22409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca const union lp_type type = bld->type; 22509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca const unsigned n = type.length; 22609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca unsigned i, j; 22709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 22809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == b) 22909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 23009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(cond[0] && cond[1] && cond[2] && cond[3]) 23109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return a; 23209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) 23309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return b; 23409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if(a == bld->undef || b == bld->undef) 23509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return bld->undef; 23609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 23709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 23809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * There are three major ways of accomplishing this: 23909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a shuffle, 24009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - with a select, 24109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * - or with a bit mask. 24209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * 24309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Select isn't supported for vector types yet. 24409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * The flip between these is empirical and might need to be. 24509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 24609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca if (n <= 4) { 24709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* 24809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca * Shuffle. 24909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca */ 25009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt32Type(); 25109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 25209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 25309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 25409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 25509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); 25609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 25709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 25809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 25909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0 26009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca else if(0) { 26109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* FIXME: Unfortunately select of vectors do not work */ 26209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca /* Use a select */ 26309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMTypeRef elem_type = LLVMInt1Type(); 26409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; 26509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 26609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(j = 0; j < n; j += 4) 26709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca for(i = 0; i < 4; ++i) 26809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); 26909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca 27009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); 27109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 27209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#endif 27309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca else { 27409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca LLVMValueRef mask = lp_build_const_mask_aos(type, cond); 27509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca return lp_build_select(bld, mask, a, b); 27609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca } 27709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca} 278