lp_bld_logic.c revision 6ed726b8fc6210a41fe325591e1428d19f419108
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/**************************************************************************
21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc.
41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved.
51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the
81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including
91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to
121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions:
131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the
151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions
161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software.
171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/
271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/**
295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file
305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations.
315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca *
325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com>
335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */
345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca
351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h"
3788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca#include "util/u_memory.h"
38854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h"
397cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca
401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h"
4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h"
421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h"
431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h"
441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
46a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/*
47a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX
48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like
50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    select <4 x i1> %C, %A, %B
52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * supported on any backend.
55a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
56a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in
57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    sext <4 x i1> %C to <4 x i32>
59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on
62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7.
63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */
64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
65a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
66e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/**
672297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func.
682297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
69877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
70e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */
711aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef
722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder,
732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 const struct lp_type type,
742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 unsigned func,
752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef a,
762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef b)
771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{
781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef cond;
82241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   LLVMValueRef res;
831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
84e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func >= PIPE_FUNC_NEVER);
85e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func <= PIPE_FUNC_ALWAYS);
86a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
87a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
88e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul
891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_NEVER)
901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return zeros;
911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_ALWAYS)
921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return ones;
931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* TODO: optimize the constant case */
951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* XXX: It is not clear if we should use the ordered or unordered operators */
971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
98489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207
991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
1001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.width * type.length == 128) {
1017cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      if(type.floating && util_cpu_caps.has_sse) {
102e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* float[4] comparison */
1037b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef args[3];
1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         unsigned cc;
1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         boolean swap;
1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         swap = FALSE;
1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         switch(func) {
1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_EQUAL:
1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 0;
1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_NOTEQUAL:
1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 4;
1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LESS:
1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LEQUAL:
1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GREATER:
1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GEQUAL:
1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         default:
1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            assert(0);
1322297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            return lp_build_undef(type);
1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         if(swap) {
1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = b;
1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = a;
1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         else {
1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = a;
1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = b;
1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
1452297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = lp_build_intrinsic(builder,
1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  "llvm.x86.sse.cmp.ps",
1471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  vec_type,
1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  args, 3);
1492297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
1501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return res;
1511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1527cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      else if(util_cpu_caps.has_sse2) {
153e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* int[4] comparison */
154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         static const struct {
155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned swap:1;
156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned eq:1;
157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned gt:1;
158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned not:1;
159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         } table[] = {
160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
162d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
166d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         };
169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpeq;
170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpgt;
171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef args[2];
172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef res;
1737b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         switch (type.width) {
176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 8:
177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 16:
181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 32:
185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
188d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         default:
189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            assert(0);
1902297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            return lp_build_undef(type);
191d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
193b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         /* There are no unsigned comparison instructions. So flip the sign bit
194b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca          * so that the results match.
195d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          */
196b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         if (table[func].gt && !type.sign) {
197185be3a87a5b38e8821a560c073975c11dcbd3e9Brian Paul            LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
1982297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            a = LLVMBuildXor(builder, a, msb, "");
1992297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            b = LLVMBuildXor(builder, b, msb, "");
200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].swap) {
203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = b;
204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = a;
205d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else {
207d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = a;
208d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = b;
209d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
210d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
211d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].eq)
2122297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
213d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else if (table[func].gt)
2142297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
215d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else
216d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMConstNull(vec_type);
217d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].not)
2192297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = LLVMBuildNot(builder, res, "");
220d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
221d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         return res;
222d07b0383660cd318ba43abad11bb80de4a124951José Fonseca      }
2232ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   } /* if (type.width * type.length == 128) */
2241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif
225a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */
2261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
2271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.floating) {
2281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMRealPredicate op;
2291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NEVER:
2311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateFalse;
2321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_ALWAYS:
2341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateTrue;
2351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUEQ;
2381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUNE;
2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULT;
2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULE;
2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGT;
2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGE;
2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
2562297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         return lp_build_undef(type);
2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
258241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
259a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
2602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildFCmp(builder, op, a, b, "");
261a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
262241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
2632ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
264e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildFCmp(builder, op, a, b, "");
265e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
2662ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
2672ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
2687b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
2697b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
270e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
271e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
2722ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         debug_printf("%s: warning: using slow element-wise float"
2732ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                      " vector comparison\n", __FUNCTION__);
2742ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for (i = 0; i < type.length; ++i) {
2752ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
2762ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildFCmp(builder, op,
2772ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
2782ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
2792ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
2802ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
2812ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
2822ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
2832ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
2842ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
2852ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
286241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
287241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
2881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
2891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   else {
2901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMIntPredicate op;
2911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntEQ;
2941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntNE;
2971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLT : LLVMIntULT;
3001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
3021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLE : LLVMIntULE;
3031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
3091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
3111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
3122297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         return lp_build_undef(type);
3131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
314241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
315a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
3162297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildICmp(builder, op, a, b, "");
317a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
318241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
3192ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
320e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildICmp(builder, op, a, b, "");
321e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
3222ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
3232ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
3247b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
3257b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
326e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
327e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
3282ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         debug_printf("%s: warning: using slow element-wise int"
3292ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                      " vector comparison\n", __FUNCTION__);
3302ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul
3312ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for(i = 0; i < type.length; ++i) {
3322ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
3332ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildICmp(builder, op,
3342ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
3352ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
3362ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
3372ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
3382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
3392ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
3402ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
3412ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
3422ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
343241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
344241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
3451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
3461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
347241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   return res;
3481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca}
34909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
35009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
3512297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3522297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/**
3532297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func.
3542297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
355877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
3562297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */
3572297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef
3582297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld,
3592297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             unsigned func,
3602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef a,
3612297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef b)
3622297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{
3632297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul   return lp_build_compare(bld->builder, bld->type, func, a, b);
3642297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul}
3652297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3662297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
367877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/**
3682a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * Return (mask & a) | (~mask & b);
3692a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */
3702a45972fb2ba12a6561e5cba84d167f4c30566d4José FonsecaLLVMValueRef
3712a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonsecalp_build_select_bitwise(struct lp_build_context *bld,
3722a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef mask,
3732a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef a,
3742a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef b)
3752a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca{
3762a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   struct lp_type type = bld->type;
3772a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   LLVMValueRef res;
3782a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
379a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
380a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
381a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
3822a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if (a == b) {
3832a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      return a;
3842a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
3852a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3862a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if(type.floating) {
3872a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
3882a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
3892a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
3902a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
3912a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3922a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   a = LLVMBuildAnd(bld->builder, a, mask, "");
3932a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
3942a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   /* This often gets translated to PANDN, but sometimes the NOT is
3952a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * pre-computed and stored in another constant. The best strategy depends
3962a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * on available registers, so it is not a big deal -- hopefully LLVM does
3972a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * the right decision attending the rest of the program.
3982a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    */
3992a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
4002a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4012a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   res = LLVMBuildOr(bld->builder, a, b, "");
4022a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4032a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if(type.floating) {
4042a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      LLVMTypeRef vec_type = lp_build_vec_type(type);
4052a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
4062a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
4072a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4082a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   return res;
4092a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca}
4102a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4112a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4122a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca/**
413877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b;
414e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca *
4152a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
4162a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * will yield unpredictable results.
417877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */
41809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
41909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld,
42009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef mask,
42109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef a,
42209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef b)
42309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
424b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   struct lp_type type = bld->type;
42509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   LLVMValueRef res;
42609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
427a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
428a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
429a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
43009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
43109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
43209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
4332ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   if (type.length == 1) {
434e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca      mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
4352ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
43609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
43788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca   else if (util_cpu_caps.has_sse4_1 &&
43888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            type.width * type.length == 128 &&
43988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(a) &&
44088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(b) &&
44188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(mask)) {
44288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      const char *intrinsic;
44388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      LLVMTypeRef arg_type;
44488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      LLVMValueRef args[3];
44588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
44688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (type.width == 64) {
44788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.blendvpd";
44888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMDoubleType(), 2);
44988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      } else if (type.width == 32) {
45088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.blendvps";
45188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMFloatType(), 4);
45288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      } else {
45388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.pblendvb";
45488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMInt8Type(), 16);
45588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
45688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
45788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->int_vec_type) {
45888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         mask = LLVMBuildBitCast(bld->builder, mask, arg_type, "");
45988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
46088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
46188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->vec_type) {
46288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         a = LLVMBuildBitCast(bld->builder, a, arg_type, "");
46388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         b = LLVMBuildBitCast(bld->builder, b, arg_type, "");
46488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
46588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
46688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[0] = b;
46788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[1] = a;
46888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[2] = mask;
46988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
47088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      res = lp_build_intrinsic(bld->builder, intrinsic,
47188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca                               arg_type, args, Elements(args));
47288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
47388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->vec_type) {
47488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
47588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
47688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca   }
4772ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   else {
4782a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      res = lp_build_select_bitwise(bld, mask, a, b);
47909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
48009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
48109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   return res;
48209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
48309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
48409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
4856ed726b8fc6210a41fe325591e1428d19f419108José Fonseca/**
4866ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * Return mask ? a : b;
4876ed726b8fc6210a41fe325591e1428d19f419108José Fonseca *
4886ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * mask is a TGSI_WRITEMASK_xxx.
4896ed726b8fc6210a41fe325591e1428d19f419108José Fonseca */
49009a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
49109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld,
4926ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                    unsigned mask,
49309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef a,
4946ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                    LLVMValueRef b)
49509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
496b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   const struct lp_type type = bld->type;
49709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const unsigned n = type.length;
49809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   unsigned i, j;
49909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
5006ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   assert((mask & ~0xf) == 0);
501a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
502a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
503a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
50409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
50509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
5066ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   if((mask & 0xf) == 0xf)
50709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
5086ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   if((mask & 0xf) == 0x0)
50909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return b;
51009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == bld->undef || b == bld->undef)
51109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return bld->undef;
51209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
51309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /*
51409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * There are three major ways of accomplishing this:
51509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a shuffle,
51609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a select,
51709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - or with a bit mask.
51809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    *
51909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * Select isn't supported for vector types yet.
52009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * The flip between these is empirical and might need to be.
52109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
52209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if (n <= 4) {
52309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /*
52409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       * Shuffle.
52509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       */
52609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt32Type();
52709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
52809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
52909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
53009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
5316ed726b8fc6210a41fe325591e1428d19f419108José Fonseca            shuffles[j + i] = LLVMConstInt(elem_type,
5326ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                                           (mask & (1 << i) ? 0 : n) + j + i,
5336ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                                           0);
53409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
53509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
53609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
5371fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca   else {
53809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0
5391fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca      /* XXX: Unfortunately select of vectors do not work */
54009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* Use a select */
54109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt1Type();
5426ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH];
54309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
54409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
54509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
5466ed726b8fc6210a41fe325591e1428d19f419108José Fonseca            cond_vec[j + i] = LLVMConstInt(elem_type,
5476ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                                           mask & (1 << i) ? 1 : 0, 0);
54809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
5496ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, "");
5501fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else
5516ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask);
5526ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      return lp_build_select(bld, mask_vec, a, b);
5531fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif
55409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
55509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
55685c7ec70ad41c8ada75a4cbace83d16815d3e2c5Zack Rusin
5572b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul
5582b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul/** Return (a & ~b) */
5592b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian PaulLLVMValueRef
5602b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paullp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
5612b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul{
56220b3e40f166c77bd7fa5b7171e5b4169ed035280nobled   const struct lp_type type = bld->type;
56320b3e40f166c77bd7fa5b7171e5b4169ed035280nobled
56420b3e40f166c77bd7fa5b7171e5b4169ed035280nobled   assert(lp_check_value(type, a));
56520b3e40f166c77bd7fa5b7171e5b4169ed035280nobled   assert(lp_check_value(type, b));
56620b3e40f166c77bd7fa5b7171e5b4169ed035280nobled
56720b3e40f166c77bd7fa5b7171e5b4169ed035280nobled   /* can't do bitwise ops on floating-point values */
56820b3e40f166c77bd7fa5b7171e5b4169ed035280nobled   if(type.floating) {
56920b3e40f166c77bd7fa5b7171e5b4169ed035280nobled      a = LLVMBuildBitCast(bld->builder, a, bld->int_vec_type, "");
57020b3e40f166c77bd7fa5b7171e5b4169ed035280nobled      b = LLVMBuildBitCast(bld->builder, b, bld->int_vec_type, "");
57120b3e40f166c77bd7fa5b7171e5b4169ed035280nobled   }
572a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
5732b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   b = LLVMBuildNot(bld->builder, b, "");
5742b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   b = LLVMBuildAnd(bld->builder, a, b, "");
575a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
57620b3e40f166c77bd7fa5b7171e5b4169ed035280nobled   if(type.floating) {
57720b3e40f166c77bd7fa5b7171e5b4169ed035280nobled      b = LLVMBuildBitCast(bld->builder, b, bld->vec_type, "");
57820b3e40f166c77bd7fa5b7171e5b4169ed035280nobled   }
5792b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul   return b;
5802b8db4ce156fbd4d094f46fad0b8b3291b057fffBrian Paul}
581