11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/**************************************************************************
21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc.
41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved.
51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the
81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including
91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to
121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions:
131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the
151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions
161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software.
171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/
271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/**
295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file
305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations.
315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca *
325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com>
335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */
345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca
351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h"
3788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca#include "util/u_memory.h"
38854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h"
397cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca
401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h"
4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h"
42efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul#include "lp_bld_init.h"
431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h"
44dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca#include "lp_bld_debug.h"
451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h"
461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/*
49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX
50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like
52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    select <4 x i1> %C, %A, %B
54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
553469715a8a171512cf9b528702e70393f01c6041José Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
563469715a8a171512cf9b528702e70393f01c6041José Fonseca * supported on some backends (x86) starting with llvm 3.1.
57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in
59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    sext <4 x i1> %C to <4 x i32>
61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on
64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7.
65a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */
66a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
67a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
68e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/**
692297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func.
702297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
71877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
72e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */
731aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef
74efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paullp_build_compare(struct gallivm_state *gallivm,
752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 const struct lp_type type,
762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 unsigned func,
772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef a,
782297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef b)
791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{
80efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   LLVMBuilderRef builder = gallivm->builder;
81efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef cond;
85241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   LLVMValueRef res;
861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
87e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func >= PIPE_FUNC_NEVER);
88e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func <= PIPE_FUNC_ALWAYS);
89a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
90a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
91e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul
921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_NEVER)
931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return zeros;
941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_ALWAYS)
951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return ones;
961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
976b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
986b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca   /*
996b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca    * There are no unsigned integer comparison instructions in SSE.
1006b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca    */
1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1026b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca   if (!type.floating && !type.sign &&
1036b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca       type.width * type.length == 128 &&
1046b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca       util_cpu_caps.has_sse2 &&
1056b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca       (func == PIPE_FUNC_LESS ||
1066b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca        func == PIPE_FUNC_LEQUAL ||
1076b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca        func == PIPE_FUNC_GREATER ||
1086b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca        func == PIPE_FUNC_GEQUAL) &&
1096b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
1106b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
1116b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca                      __FUNCTION__, type.length, type.width);
1126b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca   }
1136b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca#endif
1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
115489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207
1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.width * type.length == 128) {
1187cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      if(type.floating && util_cpu_caps.has_sse) {
119e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* float[4] comparison */
120efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef args[3];
1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         unsigned cc;
1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         boolean swap;
1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         swap = FALSE;
1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         switch(func) {
1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_EQUAL:
1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 0;
1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_NOTEQUAL:
1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 4;
1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LESS:
1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LEQUAL:
1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GREATER:
1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GEQUAL:
1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         default:
1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            assert(0);
149efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul            return lp_build_undef(gallivm, type);
1501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         if(swap) {
1531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = b;
1541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = a;
1551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         else {
1571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = a;
1581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = b;
1591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
161efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0);
1622297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = lp_build_intrinsic(builder,
1631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  "llvm.x86.sse.cmp.ps",
1641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  vec_type,
1651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  args, 3);
1662297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
1671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return res;
1681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1697cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      else if(util_cpu_caps.has_sse2) {
170e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* int[4] comparison */
171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         static const struct {
172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned swap:1;
173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned eq:1;
174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned gt:1;
175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned not:1;
176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         } table[] = {
177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         };
186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpeq;
187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpgt;
188d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef args[2];
189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef res;
190efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
191d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         switch (type.width) {
193d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 8:
194d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
195d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
196d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
197d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 16:
198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 32:
202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
205d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         default:
206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            assert(0);
207efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul            return lp_build_undef(gallivm, type);
208d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
209d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
210b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         /* There are no unsigned comparison instructions. So flip the sign bit
211b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca          * so that the results match.
212d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          */
213b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         if (table[func].gt && !type.sign) {
214efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul            LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1));
2152297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            a = LLVMBuildXor(builder, a, msb, "");
2162297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            b = LLVMBuildXor(builder, b, msb, "");
217d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
219d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].swap) {
220d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = b;
221d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = a;
222d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
223d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else {
224d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = a;
225d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = b;
226d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
227d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
228d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].eq)
2292297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
230d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else if (table[func].gt)
2312297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
232d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else
233d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMConstNull(vec_type);
234d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
235d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].not)
2362297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = LLVMBuildNot(builder, res, "");
237d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
238d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         return res;
239d07b0383660cd318ba43abad11bb80de4a124951José Fonseca      }
2402ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   } /* if (type.width * type.length == 128) */
2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif
242a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */
2431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
2446b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca   /* XXX: It is not clear if we should use the ordered or unordered operators */
2456b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca
2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.floating) {
2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMRealPredicate op;
2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NEVER:
2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateFalse;
2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_ALWAYS:
2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateTrue;
2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUEQ;
2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUNE;
2601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULT;
2631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULE;
2661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGT;
2691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGE;
2721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
275efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         return lp_build_undef(gallivm, type);
2761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
277241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
278a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
2792297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildFCmp(builder, op, a, b, "");
280a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
281241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
2822ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
283e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildFCmp(builder, op, a, b, "");
284e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
2852ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
2862ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
2877b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
2887b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
289e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
290e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
2912ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         debug_printf("%s: warning: using slow element-wise float"
2922ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                      " vector comparison\n", __FUNCTION__);
2932ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for (i = 0; i < type.length; ++i) {
294efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul            LLVMValueRef index = lp_build_const_int32(gallivm, i);
2952ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildFCmp(builder, op,
2962ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
2972ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
2982ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
2992ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
3002ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
3012ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
3022ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
3032ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
3042ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
305241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
306241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   else {
3091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMIntPredicate op;
3101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
3111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
3121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntEQ;
3131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
3151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntNE;
3161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
3181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLT : LLVMIntULT;
3191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
3211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLE : LLVMIntULE;
3221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
3241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
3251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
3271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
3281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
3301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
331efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         return lp_build_undef(gallivm, type);
3321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
333241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
334a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
3352297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildICmp(builder, op, a, b, "");
336a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
337241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
3382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
339e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildICmp(builder, op, a, b, "");
340e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
3412ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
3422ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
3437b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
3447b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
345e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
346e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
347dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca         if (gallivm_debug & GALLIVM_DEBUG_PERF) {
348dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca            debug_printf("%s: using slow element-wise int"
349dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca                         " vector comparison\n", __FUNCTION__);
350dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca         }
3512ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul
3522ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for(i = 0; i < type.length; ++i) {
353efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul            LLVMValueRef index = lp_build_const_int32(gallivm, i);
3542ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildICmp(builder, op,
3552ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
3562ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
3572ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
3582ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
3592ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
3602ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
3612ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
3622ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
3632ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
364241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
365241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
3661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
3671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
368241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   return res;
3691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca}
37009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
37109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
3722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/**
3742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func.
3752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
376877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
3772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */
3782297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef
3792297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld,
3802297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             unsigned func,
3812297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef a,
3822297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef b)
3832297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{
384efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   return lp_build_compare(bld->gallivm, bld->type, func, a, b);
3852297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul}
3862297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3872297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
388877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/**
3892a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * Return (mask & a) | (~mask & b);
3902a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */
3912a45972fb2ba12a6561e5cba84d167f4c30566d4José FonsecaLLVMValueRef
3922a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonsecalp_build_select_bitwise(struct lp_build_context *bld,
3932a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef mask,
3942a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef a,
3952a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef b)
3962a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca{
3976299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul   LLVMBuilderRef builder = bld->gallivm->builder;
3982a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   struct lp_type type = bld->type;
3992a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   LLVMValueRef res;
4002a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
401a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
402a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
403a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
4042a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if (a == b) {
4052a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      return a;
4062a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
4072a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4082a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if(type.floating) {
409efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
4106299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      a = LLVMBuildBitCast(builder, a, int_vec_type, "");
4116299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      b = LLVMBuildBitCast(builder, b, int_vec_type, "");
4122a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
4132a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4146299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul   a = LLVMBuildAnd(builder, a, mask, "");
4152a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4162a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   /* This often gets translated to PANDN, but sometimes the NOT is
4172a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * pre-computed and stored in another constant. The best strategy depends
4182a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * on available registers, so it is not a big deal -- hopefully LLVM does
4192a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * the right decision attending the rest of the program.
4202a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    */
4216299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul   b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
4222a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4236299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul   res = LLVMBuildOr(builder, a, b, "");
4242a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4252a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if(type.floating) {
426efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
4276299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      res = LLVMBuildBitCast(builder, res, vec_type, "");
4282a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
4292a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4302a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   return res;
4312a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca}
4322a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4332a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4342a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca/**
435877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b;
436e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca *
4372a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
4382a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * will yield unpredictable results.
439877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */
44009a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
44109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld,
44209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef mask,
44309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef a,
44409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef b)
44509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
4466299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul   LLVMBuilderRef builder = bld->gallivm->builder;
447efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul   LLVMContextRef lc = bld->gallivm->context;
448b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   struct lp_type type = bld->type;
44909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   LLVMValueRef res;
45009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
451a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
452a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
453a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
45409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
45509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
45609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
4572ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   if (type.length == 1) {
4586299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
4596299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      res = LLVMBuildSelect(builder, mask, a, b, "");
46009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
4618c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca   else if (0) {
4628c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca      /* Generate a vector select.
4638c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       *
4648c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * XXX: Using vector selects would avoid emitting intrinsics, but they aren't
4658c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * properly supported yet.
4668c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       *
4678c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * LLVM 3.0 includes experimental support provided the -promote-elements
4688c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * options is passed to LLVM's command line (e.g., via
4698c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * llvm::cl::ParseCommandLineOptions), but resulting code quality is much
4708c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * worse, probably because some optimization passes don't know how to
4718c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * handle vector selects.
4728c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       *
4738c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * See also:
4748c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
4758c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       */
4768c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca
4778c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca      /* Convert the mask to a vector of booleans.
4788c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       * XXX: There are two ways to do this. Decide what's best.
4798c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca       */
4808c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca      if (1) {
4818c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca         LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
4828c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca         mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
4838c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca      } else {
4848c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca         mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
4858c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca      }
4868c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca      res = LLVMBuildSelect(builder, mask, a, b, "");
4878c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca   }
4883469715a8a171512cf9b528702e70393f01c6041José Fonseca   else if (((util_cpu_caps.has_sse4_1 &&
4893469715a8a171512cf9b528702e70393f01c6041José Fonseca              type.width * type.length == 128) ||
4903469715a8a171512cf9b528702e70393f01c6041José Fonseca             (util_cpu_caps.has_avx &&
4913469715a8a171512cf9b528702e70393f01c6041José Fonseca              type.width * type.length == 256 && type.width >= 32)) &&
49288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(a) &&
49388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(b) &&
49488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(mask)) {
49588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      const char *intrinsic;
49688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      LLVMTypeRef arg_type;
49788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      LLVMValueRef args[3];
49888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
4993469715a8a171512cf9b528702e70393f01c6041José Fonseca      /*
5003469715a8a171512cf9b528702e70393f01c6041José Fonseca       *  There's only float blend in AVX but can just cast i32/i64
5013469715a8a171512cf9b528702e70393f01c6041José Fonseca       *  to float.
5023469715a8a171512cf9b528702e70393f01c6041José Fonseca       */
5033469715a8a171512cf9b528702e70393f01c6041José Fonseca      if (type.width * type.length == 256) {
5043469715a8a171512cf9b528702e70393f01c6041José Fonseca         if (type.width == 64) {
5053469715a8a171512cf9b528702e70393f01c6041José Fonseca           intrinsic = "llvm.x86.avx.blendv.pd.256";
5063469715a8a171512cf9b528702e70393f01c6041José Fonseca           arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
5073469715a8a171512cf9b528702e70393f01c6041José Fonseca         }
5083469715a8a171512cf9b528702e70393f01c6041José Fonseca         else {
5093469715a8a171512cf9b528702e70393f01c6041José Fonseca            intrinsic = "llvm.x86.avx.blendv.ps.256";
5103469715a8a171512cf9b528702e70393f01c6041José Fonseca            arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
5113469715a8a171512cf9b528702e70393f01c6041José Fonseca         }
5123469715a8a171512cf9b528702e70393f01c6041José Fonseca      }
5133469715a8a171512cf9b528702e70393f01c6041José Fonseca      else if (type.floating &&
5143469715a8a171512cf9b528702e70393f01c6041José Fonseca               type.width == 64) {
51588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.blendvpd";
516efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
517c79f162367b99d9438bd1589ecfdeba69baa9d3dKeith Whitwell      } else if (type.floating &&
518c79f162367b99d9438bd1589ecfdeba69baa9d3dKeith Whitwell                 type.width == 32) {
51988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.blendvps";
520efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
52188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      } else {
52288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.pblendvb";
523efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul         arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
52488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
52588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
52688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->int_vec_type) {
5276299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul         mask = LLVMBuildBitCast(builder, mask, arg_type, "");
52888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
52988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
53088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->vec_type) {
5316299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul         a = LLVMBuildBitCast(builder, a, arg_type, "");
5326299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul         b = LLVMBuildBitCast(builder, b, arg_type, "");
53388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
53488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
53588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[0] = b;
53688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[1] = a;
53788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[2] = mask;
53888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
5396299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      res = lp_build_intrinsic(builder, intrinsic,
54088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca                               arg_type, args, Elements(args));
54188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
54288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->vec_type) {
5436299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul         res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
54488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
54588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca   }
5462ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   else {
5472a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      res = lp_build_select_bitwise(bld, mask, a, b);
54809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
54909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
55009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   return res;
55109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
55209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
55309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
5546ed726b8fc6210a41fe325591e1428d19f419108José Fonseca/**
5556ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * Return mask ? a : b;
5566ed726b8fc6210a41fe325591e1428d19f419108José Fonseca *
5576ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * mask is a TGSI_WRITEMASK_xxx.
5586ed726b8fc6210a41fe325591e1428d19f419108José Fonseca */
55909a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
56009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld,
5616ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                    unsigned mask,
56209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef a,
5636ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                    LLVMValueRef b)
56409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
5656299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul   LLVMBuilderRef builder = bld->gallivm->builder;
566b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   const struct lp_type type = bld->type;
56709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const unsigned n = type.length;
56809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   unsigned i, j;
56909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
5706ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   assert((mask & ~0xf) == 0);
571a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
572a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
573a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
57409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
57509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
5766ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   if((mask & 0xf) == 0xf)
57709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
5786ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   if((mask & 0xf) == 0x0)
57909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return b;
58009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == bld->undef || b == bld->undef)
58109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return bld->undef;
58209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
58309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /*
5848c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca    * There are two major ways of accomplishing this:
5858c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca    * - with a shuffle
5868c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca    * - with a select
58709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    *
5888c34a41278b8bfd36e4bd8ab2e417430382b3365José Fonseca    * The flip between these is empirical and might need to be adjusted.
58909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
59009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if (n <= 4) {
59109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /*
59209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       * Shuffle.
59309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       */
594efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
59509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
59609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
59709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
59809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
5996ed726b8fc6210a41fe325591e1428d19f419108José Fonseca            shuffles[j + i] = LLVMConstInt(elem_type,
6006ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                                           (mask & (1 << i) ? 0 : n) + j + i,
6016ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                                           0);
60209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
6036299f241e9fdd86e705d144a42d9b1979c13f9adBrian Paul      return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
60409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
6051fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca   else {
606efc82aef35a2aac5d2ed9774f6d28f2626796416Brian Paul      LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask);
6076ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      return lp_build_select(bld, mask_vec, a, b);
60809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
60909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
6103469715a8a171512cf9b528702e70393f01c6041José Fonseca
6113469715a8a171512cf9b528702e70393f01c6041José Fonseca
6123469715a8a171512cf9b528702e70393f01c6041José Fonseca/**
6133469715a8a171512cf9b528702e70393f01c6041José Fonseca * Return (scalar-cast)val ? true : false;
6143469715a8a171512cf9b528702e70393f01c6041José Fonseca */
6153469715a8a171512cf9b528702e70393f01c6041José FonsecaLLVMValueRef
6163469715a8a171512cf9b528702e70393f01c6041José Fonsecalp_build_any_true_range(struct lp_build_context *bld,
6173469715a8a171512cf9b528702e70393f01c6041José Fonseca                        unsigned real_length,
6183469715a8a171512cf9b528702e70393f01c6041José Fonseca                        LLVMValueRef val)
6193469715a8a171512cf9b528702e70393f01c6041José Fonseca{
6203469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMBuilderRef builder = bld->gallivm->builder;
6213469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMTypeRef scalar_type;
6223469715a8a171512cf9b528702e70393f01c6041José Fonseca   LLVMTypeRef true_type;
6233469715a8a171512cf9b528702e70393f01c6041José Fonseca
6243469715a8a171512cf9b528702e70393f01c6041José Fonseca   assert(real_length <= bld->type.length);
6253469715a8a171512cf9b528702e70393f01c6041José Fonseca
6263469715a8a171512cf9b528702e70393f01c6041José Fonseca   true_type = LLVMIntTypeInContext(bld->gallivm->context,
6273469715a8a171512cf9b528702e70393f01c6041José Fonseca                                    bld->type.width * real_length);
6283469715a8a171512cf9b528702e70393f01c6041José Fonseca   scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
6293469715a8a171512cf9b528702e70393f01c6041José Fonseca                                      bld->type.width * bld->type.length);
6303469715a8a171512cf9b528702e70393f01c6041José Fonseca   val = LLVMBuildBitCast(builder, val, scalar_type, "");
6313469715a8a171512cf9b528702e70393f01c6041José Fonseca   /*
6323469715a8a171512cf9b528702e70393f01c6041José Fonseca    * We're using always native types so we can use intrinsics.
6333469715a8a171512cf9b528702e70393f01c6041José Fonseca    * However, if we don't do per-element calculations, we must ensure
6343469715a8a171512cf9b528702e70393f01c6041José Fonseca    * the excess elements aren't used since they may contain garbage.
6353469715a8a171512cf9b528702e70393f01c6041José Fonseca    */
6363469715a8a171512cf9b528702e70393f01c6041José Fonseca   if (real_length < bld->type.length) {
6373469715a8a171512cf9b528702e70393f01c6041José Fonseca      val = LLVMBuildTrunc(builder, val, true_type, "");
6383469715a8a171512cf9b528702e70393f01c6041José Fonseca   }
6393469715a8a171512cf9b528702e70393f01c6041José Fonseca   return LLVMBuildICmp(builder, LLVMIntNE,
6403469715a8a171512cf9b528702e70393f01c6041José Fonseca                        val, LLVMConstNull(true_type), "");
6413469715a8a171512cf9b528702e70393f01c6041José Fonseca}
642