1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/************************************************************************** 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright 2009 VMware, Inc. 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * All Rights Reserved. 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * "Software"), to deal in the Software without restriction, including 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * without limitation the rights to use, copy, modify, merge, publish, 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * distribute, sub license, and/or sell copies of the Software, and to 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * permit persons to whom the Software is furnished to do so, subject to 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the following conditions: 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * next paragraph) shall be included in all copies or substantial portions 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * of the Software. 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org **************************************************************************/ 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @file 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Helper functions for logical operations. 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * @author Jose Fonseca <jfonseca@vmware.com> 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_cpu_detect.h" 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_memory.h" 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "util/u_debug.h" 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_type.h" 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_const.h" 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_init.h" 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_intr.h" 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_debug.h" 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "lp_bld_logic.h" 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/* 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * XXX 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Selection with vector conditional like 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * select <4 x i1> %C, %A, %B 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * supported on some backends (x86) starting with llvm 3.1. 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Expanding the boolean vector to full SIMD register width, as in 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * sext <4 x i1> %C to <4 x i32> 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * it causes assertion failures in LLVM 2.6. It appears to work correctly on 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LLVM 2.7. 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Build code to compare two values 'a' and 'b' of 'type' using the given func. 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param func one of PIPE_FUNC_x 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The result values will be 0 for false or ~0 for true. 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_compare(struct gallivm_state *gallivm, 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct lp_type type, 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned func, 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef a, 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef b) 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = gallivm->builder; 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef zeros = LLVMConstNull(int_vec_type); 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef cond; 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef res; 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(func >= PIPE_FUNC_NEVER); 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(func <= PIPE_FUNC_ALWAYS); 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(lp_check_value(type, a)); 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(lp_check_value(type, b)); 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(func == PIPE_FUNC_NEVER) 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return zeros; 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(func == PIPE_FUNC_ALWAYS) 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return ones; 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * There are no unsigned integer comparison instructions in SSE. 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!type.floating && !type.sign && 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type.width * type.length == 128 && 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org util_cpu_caps.has_sse2 && 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (func == PIPE_FUNC_LESS || 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org func == PIPE_FUNC_LEQUAL || 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org func == PIPE_FUNC_GREATER || 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org func == PIPE_FUNC_GEQUAL) && 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (gallivm_debug & GALLIVM_DEBUG_PERF)) { 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org __FUNCTION__, type.length, type.width); 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if HAVE_LLVM < 0x0207 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(type.width * type.length == 128) { 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(type.floating && util_cpu_caps.has_sse) { 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* float[4] comparison */ 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef args[3]; 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned cc; 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org boolean swap; 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swap = FALSE; 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(func) { 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_EQUAL: 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cc = 0; 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_NOTEQUAL: 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cc = 4; 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_LESS: 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cc = 1; 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_LEQUAL: 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cc = 2; 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_GREATER: 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cc = 1; 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swap = TRUE; 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_GEQUAL: 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cc = 2; 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org swap = TRUE; 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_undef(gallivm, type); 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(swap) { 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = b; 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = a; 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = a; 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = b; 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = lp_build_intrinsic(builder, 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org "llvm.x86.sse.cmp.ps", 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org vec_type, 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args, 3); 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return res; 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if(util_cpu_caps.has_sse2) { 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* int[4] comparison */ 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org static const struct { 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned swap:1; 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned eq:1; 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned gt:1; 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned not:1; 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } table[] = { 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const char *pcmpeq; 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const char *pcmpgt; 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef args[2]; 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef res; 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (type.width) { 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case 8: 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case 16: 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case 32: 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_undef(gallivm, type); 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* There are no unsigned comparison instructions. So flip the sign bit 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * so that the results match. 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (table[func].gt && !type.sign) { 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org a = LLVMBuildXor(builder, a, msb, ""); 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org b = LLVMBuildXor(builder, b, msb, ""); 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(table[func].swap) { 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = b; 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = a; 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = a; 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = b; 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(table[func].eq) 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if (table[func].gt) 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMConstNull(vec_type); 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(table[func].not) 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildNot(builder, res, ""); 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return res; 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } /* if (type.width * type.length == 128) */ 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif /* HAVE_LLVM < 0x0207 */ 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* XXX: It is not clear if we should use the ordered or unordered operators */ 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(type.floating) { 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMRealPredicate op; 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(func) { 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_NEVER: 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMRealPredicateFalse; 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_ALWAYS: 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMRealPredicateTrue; 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_EQUAL: 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMRealUEQ; 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_NOTEQUAL: 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMRealUNE; 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_LESS: 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMRealULT; 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_LEQUAL: 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMRealULE; 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_GREATER: 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMRealUGT; 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_GEQUAL: 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMRealUGE; 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_undef(gallivm, type); 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if HAVE_LLVM >= 0x0207 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cond = LLVMBuildFCmp(builder, op, a, b, ""); 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#else 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type.length == 1) { 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cond = LLVMBuildFCmp(builder, op, a, b, ""); 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned i; 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMGetUndef(int_vec_type); 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org debug_printf("%s: warning: using slow element-wise float" 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org " vector comparison\n", __FUNCTION__); 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for (i = 0; i < type.length; ++i) { 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef index = lp_build_const_int32(gallivm, i); 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cond = LLVMBuildFCmp(builder, op, 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuildExtractElement(builder, a, index, ""), 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuildExtractElement(builder, b, index, ""), 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ""); 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cond = LLVMBuildSelect(builder, cond, 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMConstExtractElement(ones, index), 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMConstExtractElement(zeros, index), 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ""); 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildInsertElement(builder, res, cond, index, ""); 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMIntPredicate op; 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch(func) { 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_EQUAL: 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMIntEQ; 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_NOTEQUAL: 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = LLVMIntNE; 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_LESS: 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = type.sign ? LLVMIntSLT : LLVMIntULT; 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_LEQUAL: 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = type.sign ? LLVMIntSLE : LLVMIntULE; 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_GREATER: 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = type.sign ? LLVMIntSGT : LLVMIntUGT; 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case PIPE_FUNC_GEQUAL: 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org op = type.sign ? LLVMIntSGE : LLVMIntUGE; 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(0); 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_undef(gallivm, type); 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#if HAVE_LLVM >= 0x0207 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cond = LLVMBuildICmp(builder, op, a, b, ""); 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#else 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type.length == 1) { 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cond = LLVMBuildICmp(builder, op, a, b, ""); 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned i; 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMGetUndef(int_vec_type); 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (gallivm_debug & GALLIVM_DEBUG_PERF) { 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org debug_printf("%s: using slow element-wise int" 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org " vector comparison\n", __FUNCTION__); 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = 0; i < type.length; ++i) { 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef index = lp_build_const_int32(gallivm, i); 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cond = LLVMBuildICmp(builder, op, 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuildExtractElement(builder, a, index, ""), 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuildExtractElement(builder, b, index, ""), 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ""); 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cond = LLVMBuildSelect(builder, cond, 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMConstExtractElement(ones, index), 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMConstExtractElement(zeros, index), 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ""); 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildInsertElement(builder, res, cond, index, ""); 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#endif 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return res; 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Build code to compare two values 'a' and 'b' using the given func. 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * \param func one of PIPE_FUNC_x 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The result values will be 0 for false or ~0 for true. 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_cmp(struct lp_build_context *bld, 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned func, 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef a, 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef b) 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_compare(bld->gallivm, bld->type, func, a, b); 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Return (mask & a) | (~mask & b); 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_select_bitwise(struct lp_build_context *bld, 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef mask, 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef a, 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef b) 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = bld->gallivm->builder; 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_type type = bld->type; 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef res; 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(lp_check_value(type, a)); 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(lp_check_value(type, b)); 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (a == b) { 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return a; 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(type.floating) { 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org a = LLVMBuildBitCast(builder, a, int_vec_type, ""); 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org b = LLVMBuildBitCast(builder, b, int_vec_type, ""); 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org a = LLVMBuildAnd(builder, a, mask, ""); 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* This often gets translated to PANDN, but sometimes the NOT is 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * pre-computed and stored in another constant. The best strategy depends 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * on available registers, so it is not a big deal -- hopefully LLVM does 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the right decision attending the rest of the program. 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildOr(builder, a, b, ""); 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(type.floating) { 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildBitCast(builder, res, vec_type, ""); 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return res; 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Return mask ? a : b; 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * will yield unpredictable results. 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_select(struct lp_build_context *bld, 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef mask, 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef a, 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef b) 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = bld->gallivm->builder; 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMContextRef lc = bld->gallivm->context; 448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org struct lp_type type = bld->type; 449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef res; 450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(lp_check_value(type, a)); 452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(lp_check_value(type, b)); 453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(a == b) 455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return a; 456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type.length == 1) { 458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); 459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildSelect(builder, mask, a, b, ""); 460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if (0) { 462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Generate a vector select. 463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * XXX: Using vector selects would avoid emitting intrinsics, but they aren't 465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * properly supported yet. 466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LLVM 3.0 includes experimental support provided the -promote-elements 468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * options is passed to LLVM's command line (e.g., via 469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * llvm::cl::ParseCommandLineOptions), but resulting code quality is much 470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * worse, probably because some optimization passes don't know how to 471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * handle vector selects. 472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * See also: 474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html 475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* Convert the mask to a vector of booleans. 478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * XXX: There are two ways to do this. Decide what's best. 479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (1) { 481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); 482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); 483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), ""); 485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildSelect(builder, mask, a, b, ""); 487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if (((util_cpu_caps.has_sse4_1 && 489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type.width * type.length == 128) || 490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (util_cpu_caps.has_avx && 491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type.width * type.length == 256 && type.width >= 32)) && 492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !LLVMIsConstant(a) && 493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !LLVMIsConstant(b) && 494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org !LLVMIsConstant(mask)) { 495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const char *intrinsic; 496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef arg_type; 497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef args[3]; 498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * There's only float blend in AVX but can just cast i32/i64 501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to float. 502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type.width * type.length == 256) { 504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (type.width == 64) { 505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intrinsic = "llvm.x86.avx.blendv.pd.256"; 506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); 507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intrinsic = "llvm.x86.avx.blendv.ps.256"; 510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); 511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else if (type.floating && 514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type.width == 64) { 515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intrinsic = "llvm.x86.sse41.blendvpd"; 516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); 517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else if (type.floating && 518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org type.width == 32) { 519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intrinsic = "llvm.x86.sse41.blendvps"; 520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); 521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } else { 522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org intrinsic = "llvm.x86.sse41.pblendvb"; 523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); 524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (arg_type != bld->int_vec_type) { 527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org mask = LLVMBuildBitCast(builder, mask, arg_type, ""); 528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (arg_type != bld->vec_type) { 531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org a = LLVMBuildBitCast(builder, a, arg_type, ""); 532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org b = LLVMBuildBitCast(builder, b, arg_type, ""); 533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[0] = b; 536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[1] = a; 537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org args[2] = mask; 538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = lp_build_intrinsic(builder, intrinsic, 540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org arg_type, args, Elements(args)); 541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (arg_type != bld->vec_type) { 543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); 544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org res = lp_build_select_bitwise(bld, mask, a, b); 548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return res; 551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Return mask ? a : b; 556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * mask is a TGSI_WRITEMASK_xxx. 558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_select_aos(struct lp_build_context *bld, 561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned mask, 562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef a, 563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef b) 564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 565f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = bld->gallivm->builder; 566f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const struct lp_type type = bld->type; 567f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const unsigned n = type.length; 568f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned i, j; 569f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 570f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert((mask & ~0xf) == 0); 571f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(lp_check_value(type, a)); 572f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(lp_check_value(type, b)); 573f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 574f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(a == b) 575f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return a; 576f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if((mask & 0xf) == 0xf) 577f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return a; 578f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if((mask & 0xf) == 0x0) 579f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return b; 580f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if(a == bld->undef || b == bld->undef) 581f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return bld->undef; 582f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 583f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 584f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * There are two major ways of accomplishing this: 585f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * - with a shuffle 586f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * - with a select 587f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * 588f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The flip between these is empirical and might need to be adjusted. 589f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 590f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (n <= 4) { 591f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 592f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Shuffle. 593f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 594f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 595f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 596f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 597f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(j = 0; j < n; j += 4) 598f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org for(i = 0; i < 4; ++i) 599f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org shuffles[j + i] = LLVMConstInt(elem_type, 600f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (mask & (1 << i) ? 0 : n) + j + i, 601f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 0); 602f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 603f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); 604f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 605f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else { 606f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask); 607f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return lp_build_select(bld, mask_vec, a, b); 608f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 609f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 610f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 611f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 612f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** 613f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Return (scalar-cast)val ? true : false; 614f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 615f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgLLVMValueRef 616f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orglp_build_any_true_range(struct lp_build_context *bld, 617f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned real_length, 618f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMValueRef val) 619f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 620f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMBuilderRef builder = bld->gallivm->builder; 621f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef scalar_type; 622f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LLVMTypeRef true_type; 623f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 624f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(real_length <= bld->type.length); 625f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 626f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org true_type = LLVMIntTypeInContext(bld->gallivm->context, 627f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->type.width * real_length); 628f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org scalar_type = LLVMIntTypeInContext(bld->gallivm->context, 629f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org bld->type.width * bld->type.length); 630f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org val = LLVMBuildBitCast(builder, val, scalar_type, ""); 631f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /* 632f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * We're using always native types so we can use intrinsics. 633f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * However, if we don't do per-element calculations, we must ensure 634f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the excess elements aren't used since they may contain garbage. 635f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */ 636f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (real_length < bld->type.length) { 637f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org val = LLVMBuildTrunc(builder, val, true_type, ""); 638f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 639f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return LLVMBuildICmp(builder, LLVMIntNE, 640f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org val, LLVMConstNull(true_type), ""); 641f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 642