lp_bld_logic.c revision b3d4e5bd26a44870af7d2413cca7a6f576a0984a
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/**************************************************************************
21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc.
41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved.
51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the
81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including
91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to
121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions:
131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the
151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions
161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software.
171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/
271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/**
295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file
305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations.
315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca *
325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com>
335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */
345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca
351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h"
37854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h"
387cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca
391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h"
4009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h"
411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h"
421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h"
431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
45a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/*
46a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX
47a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like
49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    select <4 x i1> %C, %A, %B
51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * supported on any backend.
54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
55a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in
56a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    sext <4 x i1> %C to <4 x i32>
58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on
61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7.
62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */
63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
65e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/**
662297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func.
672297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
68877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
69e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */
701aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef
712297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder,
722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 const struct lp_type type,
732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 unsigned func,
742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef a,
752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef b)
761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{
771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef cond;
81241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   LLVMValueRef res;
821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
83e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func >= PIPE_FUNC_NEVER);
84e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func <= PIPE_FUNC_ALWAYS);
85e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul
861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_NEVER)
871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return zeros;
881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_ALWAYS)
891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return ones;
901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* TODO: optimize the constant case */
921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* XXX: It is not clear if we should use the ordered or unordered operators */
941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
95489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207
961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.width * type.length == 128) {
987cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      if(type.floating && util_cpu_caps.has_sse) {
99e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* float[4] comparison */
1007b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef args[3];
1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         unsigned cc;
1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         boolean swap;
1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         swap = FALSE;
1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         switch(func) {
1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_EQUAL:
1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 0;
1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_NOTEQUAL:
1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 4;
1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LESS:
1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LEQUAL:
1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GREATER:
1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GEQUAL:
1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         default:
1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            assert(0);
1292297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            return lp_build_undef(type);
1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         if(swap) {
1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = b;
1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = a;
1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         else {
1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = a;
1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = b;
1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
1422297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = lp_build_intrinsic(builder,
1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  "llvm.x86.sse.cmp.ps",
1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  vec_type,
1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  args, 3);
1462297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
1471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return res;
1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1497cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      else if(util_cpu_caps.has_sse2) {
150e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* int[4] comparison */
151d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         static const struct {
152d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned swap:1;
153d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned eq:1;
154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned gt:1;
155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned not:1;
156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         } table[] = {
157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
162d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         };
166d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpeq;
167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpgt;
168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef args[2];
169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef res;
1707b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         switch (type.width) {
173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 8:
174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 16:
178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 32:
182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         default:
186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            assert(0);
1872297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            return lp_build_undef(type);
188d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
190b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         /* There are no unsigned comparison instructions. So flip the sign bit
191b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca          * so that the results match.
192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          */
193b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         if (table[func].gt && !type.sign) {
194185be3a87a5b38e8821a560c073975c11dcbd3e9Brian Paul            LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
1952297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            a = LLVMBuildXor(builder, a, msb, "");
1962297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            b = LLVMBuildXor(builder, b, msb, "");
197d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].swap) {
200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = b;
201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = a;
202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else {
204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = a;
205d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = b;
206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
207d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
208d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].eq)
2092297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
210d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else if (table[func].gt)
2112297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
212d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else
213d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMConstNull(vec_type);
214d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
215d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].not)
2162297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = LLVMBuildNot(builder, res, "");
217d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         return res;
219d07b0383660cd318ba43abad11bb80de4a124951José Fonseca      }
2202ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   } /* if (type.width * type.length == 128) */
2211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif
222a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */
2231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
2241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.floating) {
2251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMRealPredicate op;
2261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NEVER:
2281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateFalse;
2291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_ALWAYS:
2311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateTrue;
2321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUEQ;
2351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUNE;
2381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULT;
2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULE;
2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGT;
2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGE;
2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
2532297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         return lp_build_undef(type);
2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
255241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
256a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
2572297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildFCmp(builder, op, a, b, "");
258a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
259241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
2602ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
261e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildFCmp(builder, op, a, b, "");
262e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
2632ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
2642ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
2657b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
2667b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
267e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
268e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
2692ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         debug_printf("%s: warning: using slow element-wise float"
2702ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                      " vector comparison\n", __FUNCTION__);
2712ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for (i = 0; i < type.length; ++i) {
2722ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
2732ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildFCmp(builder, op,
2742ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
2752ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
2762ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
2772ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
2782ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
2792ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
2802ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
2812ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
2822ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
283241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
284241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
2851aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
2861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   else {
2871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMIntPredicate op;
2881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntEQ;
2911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntNE;
2941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLT : LLVMIntULT;
2971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLE : LLVMIntULE;
3001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
3021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
3031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
3092297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         return lp_build_undef(type);
3101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
311241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
312a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
3132297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildICmp(builder, op, a, b, "");
314a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
315241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
3162ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
317e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildICmp(builder, op, a, b, "");
318e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
3192ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
3202ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
3217b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
3227b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
323e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
324e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
3252ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         debug_printf("%s: warning: using slow element-wise int"
3262ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                      " vector comparison\n", __FUNCTION__);
3272ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul
3282ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for(i = 0; i < type.length; ++i) {
3292ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
3302ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildICmp(builder, op,
3312ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
3322ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
3332ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
3342ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
3352ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
3362ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
3372ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
3382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
3392ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
340241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
341241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
3421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
3431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
344241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   return res;
3451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca}
34609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
34709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
3482297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3492297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/**
3502297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func.
3512297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
352877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
3532297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */
3542297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef
3552297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld,
3562297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             unsigned func,
3572297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef a,
3582297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef b)
3592297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{
3602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul   return lp_build_compare(bld->builder, bld->type, func, a, b);
3612297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul}
3622297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3632297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
364877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/**
365877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b;
366e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca *
367e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element.
368877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */
36909a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
37009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld,
37109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef mask,
37209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef a,
37309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef b)
37409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
375b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   struct lp_type type = bld->type;
37609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   LLVMValueRef res;
37709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
37809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
37909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
38009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
3812ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   if (type.length == 1) {
382e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca      mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
3832ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
38409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
3852ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   else {
3862ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if(type.floating) {
3872ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
3882ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
3892ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
3902ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
39109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
3922ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      a = LLVMBuildAnd(bld->builder, a, mask, "");
39309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
3942ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      /* This often gets translated to PANDN, but sometimes the NOT is
3952ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul       * pre-computed and stored in another constant. The best strategy depends
3962ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul       * on available registers, so it is not a big deal -- hopefully LLVM does
3972ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul       * the right decision attending the rest of the program.
3982ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul       */
3992ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
40009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
4012ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      res = LLVMBuildOr(bld->builder, a, b, "");
40209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
4032ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if(type.floating) {
4042ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
4052ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
4062ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
40709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
40809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
40909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   return res;
41009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
41109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
41209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
41309a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
41409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld,
41509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef a,
41609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef b,
4175e13e987da6ce656b08f6c25f8d373c80949e3b0José Fonseca                    const boolean cond[4])
41809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
419b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   const struct lp_type type = bld->type;
42009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const unsigned n = type.length;
42109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   unsigned i, j;
42209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
42309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
42409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
42509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(cond[0] && cond[1] && cond[2] && cond[3])
42609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
42709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
42809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return b;
42909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == bld->undef || b == bld->undef)
43009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return bld->undef;
43109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
43209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /*
43309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * There are three major ways of accomplishing this:
43409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a shuffle,
43509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a select,
43609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - or with a bit mask.
43709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    *
43809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * Select isn't supported for vector types yet.
43909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * The flip between these is empirical and might need to be.
44009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
44109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if (n <= 4) {
44209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /*
44309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       * Shuffle.
44409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       */
44509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt32Type();
44609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
44709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
44809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
44909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
45009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
45109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
45209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
45309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
4541fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca   else {
45509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0
4561fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca      /* XXX: Unfortunately select of vectors do not work */
45709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* Use a select */
45809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt1Type();
45909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
46009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
46109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
46209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
46309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
46409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
46509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
4661fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else
46709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
46809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return lp_build_select(bld, mask, a, b);
4691fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif
47009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
47109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
47285c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin
4732b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul
4742b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul/** Return (a & ~b) */
4752b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian PaulLLVMValueRef
4762b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paullp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
4772b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul{
4782b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   b = LLVMBuildNot(bld->builder, b, "");
4792b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   b = LLVMBuildAnd(bld->builder, a, b, "");
4802b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   return b;
4812b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul}
482