lp_bld_logic.c revision 2a45972fb2ba12a6561e5cba84d167f4c30566d4
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/**************************************************************************
21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc.
41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved.
51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the
81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including
91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to
121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions:
131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the
151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions
161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software.
171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/
271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/**
295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file
305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations.
315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca *
325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com>
335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */
345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca
351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h"
3788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca#include "util/u_memory.h"
38854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h"
397cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca
401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h"
4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h"
421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h"
431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h"
441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
46a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/*
47a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX
48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like
50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    select <4 x i1> %C, %A, %B
52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * supported on any backend.
55a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
56a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in
57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    sext <4 x i1> %C to <4 x i32>
59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on
62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7.
63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */
64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
65a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
66e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/**
672297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func.
682297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
69877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
70e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */
711aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef
722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder,
732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 const struct lp_type type,
742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 unsigned func,
752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef a,
762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef b)
771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{
781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef cond;
82241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   LLVMValueRef res;
831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
84e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func >= PIPE_FUNC_NEVER);
85e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func <= PIPE_FUNC_ALWAYS);
86e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul
871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_NEVER)
881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return zeros;
891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_ALWAYS)
901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return ones;
911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* TODO: optimize the constant case */
931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* XXX: It is not clear if we should use the ordered or unordered operators */
951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
96489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207
971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.width * type.length == 128) {
997cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      if(type.floating && util_cpu_caps.has_sse) {
100e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* float[4] comparison */
1017b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef args[3];
1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         unsigned cc;
1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         boolean swap;
1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         swap = FALSE;
1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         switch(func) {
1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_EQUAL:
1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 0;
1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_NOTEQUAL:
1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 4;
1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LESS:
1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LEQUAL:
1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GREATER:
1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GEQUAL:
1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         default:
1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            assert(0);
1302297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            return lp_build_undef(type);
1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         if(swap) {
1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = b;
1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = a;
1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         else {
1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = a;
1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = b;
1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
1432297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = lp_build_intrinsic(builder,
1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  "llvm.x86.sse.cmp.ps",
1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  vec_type,
1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  args, 3);
1472297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return res;
1491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1507cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      else if(util_cpu_caps.has_sse2) {
151e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* int[4] comparison */
152d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         static const struct {
153d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned swap:1;
154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned eq:1;
155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned gt:1;
156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned not:1;
157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         } table[] = {
158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
162d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
166d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         };
167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpeq;
168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpgt;
169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef args[2];
170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef res;
1717b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         switch (type.width) {
174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 8:
175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 16:
179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 32:
183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         default:
187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            assert(0);
1882297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            return lp_build_undef(type);
189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
190d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
191b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         /* There are no unsigned comparison instructions. So flip the sign bit
192b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca          * so that the results match.
193d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          */
194b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         if (table[func].gt && !type.sign) {
195185be3a87a5b38e8821a560c073975c11dcbd3e9Brian Paul            LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
1962297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            a = LLVMBuildXor(builder, a, msb, "");
1972297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            b = LLVMBuildXor(builder, b, msb, "");
198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].swap) {
201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = b;
202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = a;
203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else {
205d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = a;
206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = b;
207d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
208d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
209d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].eq)
2102297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
211d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else if (table[func].gt)
2122297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
213d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else
214d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMConstNull(vec_type);
215d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
216d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].not)
2172297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = LLVMBuildNot(builder, res, "");
218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
219d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         return res;
220d07b0383660cd318ba43abad11bb80de4a124951José Fonseca      }
2212ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   } /* if (type.width * type.length == 128) */
2221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif
223a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */
2241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
2251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.floating) {
2261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMRealPredicate op;
2271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NEVER:
2291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateFalse;
2301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_ALWAYS:
2321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateTrue;
2331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUEQ;
2361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUNE;
2391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULT;
2421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULE;
2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGT;
2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGE;
2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
2542297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         return lp_build_undef(type);
2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
256241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
257a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
2582297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildFCmp(builder, op, a, b, "");
259a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
260241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
2612ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
262e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildFCmp(builder, op, a, b, "");
263e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
2642ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
2652ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
2667b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
2677b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
268e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
269e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
2702ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         debug_printf("%s: warning: using slow element-wise float"
2712ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                      " vector comparison\n", __FUNCTION__);
2722ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for (i = 0; i < type.length; ++i) {
2732ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
2742ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildFCmp(builder, op,
2752ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
2762ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
2772ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
2782ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
2792ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
2802ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
2812ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
2822ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
2832ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
284241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
285241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
2861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
2871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   else {
2881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMIntPredicate op;
2891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntEQ;
2921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntNE;
2951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLT : LLVMIntULT;
2981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
3001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLE : LLVMIntULE;
3011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
3031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
3041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
3091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
3102297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         return lp_build_undef(type);
3111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
312241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
313a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
3142297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildICmp(builder, op, a, b, "");
315a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
316241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
3172ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
318e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildICmp(builder, op, a, b, "");
319e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
3202ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
3212ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
3227b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
3237b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
324e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
325e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
3262ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         debug_printf("%s: warning: using slow element-wise int"
3272ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                      " vector comparison\n", __FUNCTION__);
3282ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul
3292ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for(i = 0; i < type.length; ++i) {
3302ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
3312ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildICmp(builder, op,
3322ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
3332ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
3342ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
3352ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
3362ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
3372ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
3382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
3392ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
3402ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
341241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
342241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
3431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
3441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
345241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   return res;
3461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca}
34709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
34809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
3492297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3502297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/**
3512297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func.
3522297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
353877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
3542297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */
3552297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef
3562297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld,
3572297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             unsigned func,
3582297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef a,
3592297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef b)
3602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{
3612297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul   return lp_build_compare(bld->builder, bld->type, func, a, b);
3622297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul}
3632297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3642297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
365877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/**
3662a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * Return (mask & a) | (~mask & b);
3672a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */
3682a45972fb2ba12a6561e5cba84d167f4c30566d4José FonsecaLLVMValueRef
3692a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonsecalp_build_select_bitwise(struct lp_build_context *bld,
3702a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef mask,
3712a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef a,
3722a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef b)
3732a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca{
3742a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   struct lp_type type = bld->type;
3752a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   LLVMValueRef res;
3762a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3772a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if (a == b) {
3782a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      return a;
3792a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
3802a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3812a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if(type.floating) {
3822a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
3832a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
3842a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
3852a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
3862a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3872a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   a = LLVMBuildAnd(bld->builder, a, mask, "");
3882a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3892a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   /* This often gets translated to PANDN, but sometimes the NOT is
3902a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * pre-computed and stored in another constant. The best strategy depends
3912a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * on available registers, so it is not a big deal -- hopefully LLVM does
3922a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * the right decision attending the rest of the program.
3932a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    */
3942a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
3952a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3962a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   res = LLVMBuildOr(bld->builder, a, b, "");
3972a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3982a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if(type.floating) {
3992a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      LLVMTypeRef vec_type = lp_build_vec_type(type);
4002a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
4012a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
4022a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4032a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   return res;
4042a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca}
4052a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4062a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4072a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca/**
408877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b;
409e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca *
4102a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
4112a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * will yield unpredictable results.
412877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */
41309a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
41409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld,
41509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef mask,
41609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef a,
41709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef b)
41809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
419b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   struct lp_type type = bld->type;
42009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   LLVMValueRef res;
42109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
42209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
42309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
42409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
4252ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   if (type.length == 1) {
426e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca      mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
4272ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
42809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
42988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca   else if (util_cpu_caps.has_sse4_1 &&
43088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            type.width * type.length == 128 &&
43188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(a) &&
43288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(b) &&
43388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(mask)) {
43488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      const char *intrinsic;
43588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      LLVMTypeRef arg_type;
43688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      LLVMValueRef args[3];
43788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
43888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (type.width == 64) {
43988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.blendvpd";
44088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMDoubleType(), 2);
44188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      } else if (type.width == 32) {
44288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.blendvps";
44388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMFloatType(), 4);
44488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      } else {
44588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.pblendvb";
44688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMInt8Type(), 16);
44788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
44888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
44988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->int_vec_type) {
45088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         mask = LLVMBuildBitCast(bld->builder, mask, arg_type, "");
45188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
45288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
45388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->vec_type) {
45488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         a = LLVMBuildBitCast(bld->builder, a, arg_type, "");
45588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         b = LLVMBuildBitCast(bld->builder, b, arg_type, "");
45688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
45788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
45888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[0] = b;
45988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[1] = a;
46088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[2] = mask;
46188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
46288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      res = lp_build_intrinsic(bld->builder, intrinsic,
46388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca                               arg_type, args, Elements(args));
46488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
46588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->vec_type) {
46688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
46788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
46888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca   }
4692ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   else {
4702a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      res = lp_build_select_bitwise(bld, mask, a, b);
47109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
47209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
47309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   return res;
47409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
47509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
47609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
47709a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
47809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld,
47909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef a,
48009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef b,
4815e13e987da6ce656b08f6c25f8d373c80949e3b0José Fonseca                    const boolean cond[4])
48209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
483b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   const struct lp_type type = bld->type;
48409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const unsigned n = type.length;
48509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   unsigned i, j;
48609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
48709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
48809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
48909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(cond[0] && cond[1] && cond[2] && cond[3])
49009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
49109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
49209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return b;
49309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == bld->undef || b == bld->undef)
49409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return bld->undef;
49509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
49609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /*
49709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * There are three major ways of accomplishing this:
49809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a shuffle,
49909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a select,
50009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - or with a bit mask.
50109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    *
50209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * Select isn't supported for vector types yet.
50309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * The flip between these is empirical and might need to be.
50409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
50509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if (n <= 4) {
50609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /*
50709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       * Shuffle.
50809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       */
50909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt32Type();
51009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
51109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
51209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
51309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
51409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
51509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
51609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
51709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
5181fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca   else {
51909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0
5201fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca      /* XXX: Unfortunately select of vectors do not work */
52109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* Use a select */
52209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt1Type();
52309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
52409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
52509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
52609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
52709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
52809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
52909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
5301fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else
53109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
53209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return lp_build_select(bld, mask, a, b);
5331fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif
53409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
53509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
53685c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin
5372b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul
5382b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul/** Return (a & ~b) */
5392b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian PaulLLVMValueRef
5402b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paullp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
5412b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul{
5422b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   b = LLVMBuildNot(bld->builder, b, "");
5432b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   b = LLVMBuildAnd(bld->builder, a, b, "");
5442b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   return b;
5452b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul}
546