lp_bld_logic.c revision 6b0c79e058d4d008cad32c0ff06de9757ccd6fa0
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/**************************************************************************
21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc.
41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved.
51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the
81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including
91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to
121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions:
131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the
151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions
161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software.
171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/
271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/**
295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file
305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations.
315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca *
325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com>
335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */
345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca
351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h"
3788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca#include "util/u_memory.h"
38854627387db5c1bf3f69d56a638fab2af0c46010José Fonseca#include "util/u_debug.h"
397cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca
401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h"
4109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h"
421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h"
43dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca#include "lp_bld_debug.h"
441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h"
451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
47a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca/*
48a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * XXX
49a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
50a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Selection with vector conditional like
51a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
52a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    select <4 x i1> %C, %A, %B
53a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
54a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
55a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * supported on any backend.
56a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
57a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * Expanding the boolean vector to full SIMD register width, as in
58a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
59a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *    sext <4 x i1> %C to <4 x i32>
60a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca *
61a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
62a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * it causes assertion failures in LLVM 2.6. It appears to work correctly on
63a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca * LLVM 2.7.
64a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca */
65a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
66a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca
67e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/**
682297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' of 'type' using the given func.
692297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
70877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
71e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */
721aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef
732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_compare(LLVMBuilderRef builder,
742297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 const struct lp_type type,
752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 unsigned func,
762297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef a,
772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul                 LLVMValueRef b)
781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{
791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef cond;
83241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   LLVMValueRef res;
841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
85e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func >= PIPE_FUNC_NEVER);
86e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func <= PIPE_FUNC_ALWAYS);
87a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
88a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
89e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul
901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_NEVER)
911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return zeros;
921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_ALWAYS)
931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return ones;
941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
956b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
966b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca   /*
976b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca    * There are no unsigned integer comparison instructions in SSE.
986b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca    */
991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1006b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca   if (!type.floating && !type.sign &&
1016b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca       type.width * type.length == 128 &&
1026b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca       util_cpu_caps.has_sse2 &&
1036b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca       (func == PIPE_FUNC_LESS ||
1046b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca        func == PIPE_FUNC_LEQUAL ||
1056b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca        func == PIPE_FUNC_GREATER ||
1066b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca        func == PIPE_FUNC_GEQUAL) &&
1076b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
1086b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
1096b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca                      __FUNCTION__, type.length, type.width);
1106b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca   }
1116b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca#endif
1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
113489af2a3ba467e4341cb8504a0e59cf5828864d4Brian Paul#if HAVE_LLVM < 0x0207
1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.width * type.length == 128) {
1167cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      if(type.floating && util_cpu_caps.has_sse) {
117e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* float[4] comparison */
1187b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef args[3];
1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         unsigned cc;
1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         boolean swap;
1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         swap = FALSE;
1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         switch(func) {
1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_EQUAL:
1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 0;
1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_NOTEQUAL:
1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 4;
1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LESS:
1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LEQUAL:
1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GREATER:
1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GEQUAL:
1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         default:
1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            assert(0);
1472297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            return lp_build_undef(type);
1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         if(swap) {
1511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = b;
1521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = a;
1531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         else {
1551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = a;
1561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = b;
1571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
1602297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = lp_build_intrinsic(builder,
1611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  "llvm.x86.sse.cmp.ps",
1621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  vec_type,
1631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  args, 3);
1642297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
1651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return res;
1661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1677cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      else if(util_cpu_caps.has_sse2) {
168e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* int[4] comparison */
169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         static const struct {
170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned swap:1;
171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned eq:1;
172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned gt:1;
173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned not:1;
174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         } table[] = {
175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         };
184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpeq;
185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpgt;
186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef args[2];
187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef res;
1887b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         LLVMTypeRef vec_type = lp_build_vec_type(type);
189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
190d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         switch (type.width) {
191d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 8:
192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
193d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
194d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
195d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 16:
196d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
197d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
199d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 32:
200d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
201d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
202d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
203d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         default:
204d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            assert(0);
2052297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            return lp_build_undef(type);
206d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
207d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
208b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         /* There are no unsigned comparison instructions. So flip the sign bit
209b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca          * so that the results match.
210d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          */
211b3d4e5bd26a44870af7d2413cca7a6f576a0984aJosé Fonseca         if (table[func].gt && !type.sign) {
212185be3a87a5b38e8821a560c073975c11dcbd3e9Brian Paul            LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
2132297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            a = LLVMBuildXor(builder, a, msb, "");
2142297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            b = LLVMBuildXor(builder, b, msb, "");
215d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
216d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
217d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].swap) {
218d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = b;
219d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = a;
220d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
221d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else {
222d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = a;
223d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = b;
224d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
225d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
226d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].eq)
2272297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
228d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else if (table[func].gt)
2292297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
230d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else
231d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMConstNull(vec_type);
232d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
233d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].not)
2342297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul            res = LLVMBuildNot(builder, res, "");
235d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
236d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         return res;
237d07b0383660cd318ba43abad11bb80de4a124951José Fonseca      }
2382ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   } /* if (type.width * type.length == 128) */
2391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif
240a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#endif /* HAVE_LLVM < 0x0207 */
2411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
2426b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca   /* XXX: It is not clear if we should use the ordered or unordered operators */
2436b0c79e058d4d008cad32c0ff06de9757ccd6fa0José Fonseca
2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.floating) {
2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMRealPredicate op;
2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NEVER:
2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateFalse;
2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_ALWAYS:
2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateTrue;
2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUEQ;
2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUNE;
2581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULT;
2611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULE;
2641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGT;
2671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGE;
2701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
2732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         return lp_build_undef(type);
2741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
275241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
276a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
2772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildFCmp(builder, op, a, b, "");
278a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
279241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
2802ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
281e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildFCmp(builder, op, a, b, "");
282e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
2832ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
2842ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
2857b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
2867b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
287e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
288e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
2892ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         debug_printf("%s: warning: using slow element-wise float"
2902ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                      " vector comparison\n", __FUNCTION__);
2912ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for (i = 0; i < type.length; ++i) {
2922ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
2932ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildFCmp(builder, op,
2942ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
2952ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
2962ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
2972ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
2982ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
2992ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
3002ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
3012ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
3022ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
303241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
304241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
3051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   else {
3071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMIntPredicate op;
3081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
3091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
3101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntEQ;
3111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
3131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntNE;
3141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
3161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLT : LLVMIntULT;
3171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
3191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLE : LLVMIntULE;
3201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
3221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
3231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
3251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
3261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
3271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
3281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
3292297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul         return lp_build_undef(type);
3301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
331241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
332a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca#if HAVE_LLVM >= 0x0207
3332297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul      cond = LLVMBuildICmp(builder, op, a, b, "");
334a75519cb43c85b99cd54bc46dd83accda0cbd6cdJosé Fonseca      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
335241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
3362ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      if (type.length == 1) {
337e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         cond = LLVMBuildICmp(builder, op, a, b, "");
338e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
3392ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      }
3402ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      else {
3417b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul         unsigned i;
3427b42379b713ad8a48b3c97ba9a74813c834b9e74Brian Paul
343e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca         res = LLVMGetUndef(int_vec_type);
344e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca
345dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca         if (gallivm_debug & GALLIVM_DEBUG_PERF) {
346dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca            debug_printf("%s: using slow element-wise int"
347dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca                         " vector comparison\n", __FUNCTION__);
348dcc5d7f67220bc93aa7a351658649877c7e4cf69José Fonseca         }
3492ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul
3502ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         for(i = 0; i < type.length; ++i) {
3512ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
3522ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildICmp(builder, op,
3532ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, a, index, ""),
3542ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 LLVMBuildExtractElement(builder, b, index, ""),
3552ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                 "");
3562ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            cond = LLVMBuildSelect(builder, cond,
3572ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(ones, index),
3582ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   LLVMConstExtractElement(zeros, index),
3592ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul                                   "");
3602ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul            res = LLVMBuildInsertElement(builder, res, cond, index, "");
3612ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul         }
362241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
363241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
3641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
3651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
366241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   return res;
3671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca}
36809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
36909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
3702297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3712297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul/**
3722297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * Build code to compare two values 'a' and 'b' using the given func.
3732297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul * \param func  one of PIPE_FUNC_x
374877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * The result values will be 0 for false or ~0 for true.
3752297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul */
3762297bc9233be014b7b5aa037769209fbe9f6a66cBrian PaulLLVMValueRef
3772297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paullp_build_cmp(struct lp_build_context *bld,
3782297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             unsigned func,
3792297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef a,
3802297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul             LLVMValueRef b)
3812297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul{
3822297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul   return lp_build_compare(bld->builder, bld->type, func, a, b);
3832297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul}
3842297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
3852297bc9233be014b7b5aa037769209fbe9f6a66cBrian Paul
386877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul/**
3872a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * Return (mask & a) | (~mask & b);
3882a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca */
3892a45972fb2ba12a6561e5cba84d167f4c30566d4José FonsecaLLVMValueRef
3902a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonsecalp_build_select_bitwise(struct lp_build_context *bld,
3912a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef mask,
3922a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef a,
3932a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca                        LLVMValueRef b)
3942a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca{
3952a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   struct lp_type type = bld->type;
3962a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   LLVMValueRef res;
3972a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
398a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
399a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
400a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
4012a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if (a == b) {
4022a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      return a;
4032a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
4042a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4052a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if(type.floating) {
4062a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
4072a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
4082a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
4092a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
4102a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4112a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   a = LLVMBuildAnd(bld->builder, a, mask, "");
4122a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4132a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   /* This often gets translated to PANDN, but sometimes the NOT is
4142a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * pre-computed and stored in another constant. The best strategy depends
4152a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * on available registers, so it is not a big deal -- hopefully LLVM does
4162a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    * the right decision attending the rest of the program.
4172a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca    */
4182a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
4192a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4202a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   res = LLVMBuildOr(bld->builder, a, b, "");
4212a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4222a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   if(type.floating) {
4232a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      LLVMTypeRef vec_type = lp_build_vec_type(type);
4242a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
4252a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   }
4262a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4272a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca   return res;
4282a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca}
4292a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4302a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca
4312a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca/**
432877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul * Return mask ? a : b;
433e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca *
4342a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
4352a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca * will yield unpredictable results.
436877f2356b2ab7caa16beed496f36eca64ee201e1Brian Paul */
43709a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
43809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld,
43909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef mask,
44009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef a,
44109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef b)
44209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
443b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   struct lp_type type = bld->type;
44409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   LLVMValueRef res;
44509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
446a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
447a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
448a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
44909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
45009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
45109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
4522ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   if (type.length == 1) {
453e24e5324ed1adce8da8ff268f7047ad8172bb248José Fonseca      mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
4542ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
45509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
45688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca   else if (util_cpu_caps.has_sse4_1 &&
45788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            type.width * type.length == 128 &&
45888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(a) &&
45988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(b) &&
46088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca            !LLVMIsConstant(mask)) {
46188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      const char *intrinsic;
46288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      LLVMTypeRef arg_type;
46388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      LLVMValueRef args[3];
46488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
46588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (type.width == 64) {
46688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.blendvpd";
46788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMDoubleType(), 2);
46888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      } else if (type.width == 32) {
46988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.blendvps";
47088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMFloatType(), 4);
47188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      } else {
47288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         intrinsic = "llvm.x86.sse41.pblendvb";
47388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         arg_type = LLVMVectorType(LLVMInt8Type(), 16);
47488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
47588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
47688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->int_vec_type) {
47788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         mask = LLVMBuildBitCast(bld->builder, mask, arg_type, "");
47888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
47988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
48088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->vec_type) {
48188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         a = LLVMBuildBitCast(bld->builder, a, arg_type, "");
48288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         b = LLVMBuildBitCast(bld->builder, b, arg_type, "");
48388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
48488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
48588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[0] = b;
48688b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[1] = a;
48788b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      args[2] = mask;
48888b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
48988b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      res = lp_build_intrinsic(bld->builder, intrinsic,
49088b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca                               arg_type, args, Elements(args));
49188b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca
49288b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      if (arg_type != bld->vec_type) {
49388b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca         res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
49488b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca      }
49588b6abfba5e95866877dd3939ae43c6dfd71422cJosé Fonseca   }
4962ccae040a458ad0f95ee46916e2ea467d5cf9d02Brian Paul   else {
4972a45972fb2ba12a6561e5cba84d167f4c30566d4José Fonseca      res = lp_build_select_bitwise(bld, mask, a, b);
49809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
49909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
50009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   return res;
50109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
50209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
50309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
5046ed726b8fc6210a41fe325591e1428d19f419108José Fonseca/**
5056ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * Return mask ? a : b;
5066ed726b8fc6210a41fe325591e1428d19f419108José Fonseca *
5076ed726b8fc6210a41fe325591e1428d19f419108José Fonseca * mask is a TGSI_WRITEMASK_xxx.
5086ed726b8fc6210a41fe325591e1428d19f419108José Fonseca */
50909a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
51009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld,
5116ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                    unsigned mask,
51209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef a,
5136ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                    LLVMValueRef b)
51409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
515b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   const struct lp_type type = bld->type;
51609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const unsigned n = type.length;
51709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   unsigned i, j;
51809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
5196ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   assert((mask & ~0xf) == 0);
520a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, a));
521a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled   assert(lp_check_value(type, b));
522a44a6960fab8c0053678fe74ce4c978ef40b06ffnobled
52309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
52409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
5256ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   if((mask & 0xf) == 0xf)
52609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
5276ed726b8fc6210a41fe325591e1428d19f419108José Fonseca   if((mask & 0xf) == 0x0)
52809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return b;
52909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == bld->undef || b == bld->undef)
53009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return bld->undef;
53109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
53209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /*
53309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * There are three major ways of accomplishing this:
53409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a shuffle,
53509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a select,
53609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - or with a bit mask.
53709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    *
53809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * Select isn't supported for vector types yet.
53909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * The flip between these is empirical and might need to be.
54009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
54109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if (n <= 4) {
54209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /*
54309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       * Shuffle.
54409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       */
54509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt32Type();
54609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
54709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
54809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
54909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
5506ed726b8fc6210a41fe325591e1428d19f419108José Fonseca            shuffles[j + i] = LLVMConstInt(elem_type,
5516ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                                           (mask & (1 << i) ? 0 : n) + j + i,
5526ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                                           0);
55309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
55409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
55509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
5561fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca   else {
55709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0
5581fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca      /* XXX: Unfortunately select of vectors do not work */
55909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* Use a select */
56009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt1Type();
5616ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH];
56209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
56309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
56409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
5656ed726b8fc6210a41fe325591e1428d19f419108José Fonseca            cond_vec[j + i] = LLVMConstInt(elem_type,
5666ed726b8fc6210a41fe325591e1428d19f419108José Fonseca                                           mask & (1 << i) ? 1 : 0, 0);
56709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
5686ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, "");
5691fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else
5706ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask);
5716ed726b8fc6210a41fe325591e1428d19f419108José Fonseca      return lp_build_select(bld, mask_vec, a, b);
5721fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif
57309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
57409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
575