lp_bld_logic.c revision e01fa1eaec34675d0b30127de4f78b020a092a83
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/**************************************************************************
21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc.
41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved.
51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the
81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including
91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to
121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions:
131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the
151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions
161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software.
171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/
271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/**
295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file
305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations.
315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca *
325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com>
335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */
345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca
351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
367cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca#include "util/u_cpu_detect.h"
377cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca
381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h"
3909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h"
401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h"
411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h"
421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
44e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul/**
45e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul * Build code to compare two values 'a' and 'b' using the given func.
46e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul * \parm func  one of PIPE_FUNC_x
47e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul */
481aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef
491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonsecalp_build_cmp(struct lp_build_context *bld,
501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             unsigned func,
511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             LLVMValueRef a,
521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             LLVMValueRef b)
531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{
54b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   const struct lp_type type = bld->type;
551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef vec_type = lp_build_vec_type(type);
561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef cond;
60241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   LLVMValueRef res;
61241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   unsigned i;
621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
63e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func >= PIPE_FUNC_NEVER);
64e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul   assert(func <= PIPE_FUNC_ALWAYS);
65e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul
661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_NEVER)
671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return zeros;
681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_ALWAYS)
691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return ones;
701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* TODO: optimize the constant case */
721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* XXX: It is not clear if we should use the ordered or unordered operators */
741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.width * type.length == 128) {
777cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      if(type.floating && util_cpu_caps.has_sse) {
78e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* float[4] comparison */
791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef args[3];
801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         unsigned cc;
811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         boolean swap;
821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         swap = FALSE;
841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         switch(func) {
851aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_EQUAL:
861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 0;
871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_NOTEQUAL:
891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 4;
901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LESS:
921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LEQUAL:
951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GREATER:
981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GEQUAL:
1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         default:
1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            assert(0);
1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            return bld->undef;
1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         if(swap) {
1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = b;
1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = a;
1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         else {
1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = a;
1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = b;
1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         res = lp_build_intrinsic(bld->builder,
1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  "llvm.x86.sse.cmp.ps",
1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  vec_type,
1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  args, 3);
1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         res = LLVMBuildBitCast(bld->builder, res, int_vec_type, "");
1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return res;
1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1277cda8ea44c2b65265cefa79bd29a4990ac81cee6José Fonseca      else if(util_cpu_caps.has_sse2) {
128e01fa1eaec34675d0b30127de4f78b020a092a83Brian Paul         /* int[4] comparison */
129d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         static const struct {
130d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned swap:1;
131d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned eq:1;
132d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned gt:1;
133d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned not:1;
134d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         } table[] = {
135d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
136d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
137d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
138d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
139d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
140d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
141d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
142d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
143d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         };
144d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpeq;
145d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpgt;
146d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef args[2];
147d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef res;
148d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
149d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         switch (type.width) {
150d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 8:
151d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
152d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
153d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 16:
155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 32:
159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
162d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         default:
163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            assert(0);
164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            return bld->undef;
165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
166d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         /* There are no signed byte and unsigned word/dword comparison
168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          * instructions. So flip the sign bit so that the results match.
169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          */
170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].gt &&
171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            ((type.width == 8 && type.sign) ||
172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca             (type.width != 8 && !type.sign))) {
17377b35dc179473afbbd8c709c9f32c0f537c90776José Fonseca            LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            a = LLVMBuildXor(bld->builder, a, msb, "");
175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            b = LLVMBuildXor(bld->builder, b, msb, "");
176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].swap) {
179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = b;
180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = a;
181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else {
183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = a;
184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = b;
185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].eq)
188d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = lp_build_intrinsic(bld->builder, pcmpeq, vec_type, args, 2);
189d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else if (table[func].gt)
190d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = lp_build_intrinsic(bld->builder, pcmpgt, vec_type, args, 2);
191d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else
192d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMConstNull(vec_type);
193d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
194d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].not)
195d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMBuildNot(bld->builder, res, "");
196d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
197d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         return res;
198d07b0383660cd318ba43abad11bb80de4a124951José Fonseca      }
1991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
2001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif
2011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
2021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.floating) {
2031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMRealPredicate op;
2041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NEVER:
2061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateFalse;
2071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_ALWAYS:
2091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateTrue;
2101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUEQ;
2131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUNE;
2161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULT;
2191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULE;
2221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGT;
2251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGE;
2281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
2311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return bld->undef;
2321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
233241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
234241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#if 0
235241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      /* XXX: Although valid IR, no LLVM target currently support this */
2361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      cond = LLVMBuildFCmp(bld->builder, op, a, b, "");
237241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      res = LLVMBuildSelect(bld->builder, cond, ones, zeros, "");
238241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
239241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      debug_printf("%s: warning: using slow element-wise vector comparison\n",
240241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                   __FUNCTION__);
241241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      res = LLVMGetUndef(int_vec_type);
242241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      for(i = 0; i < type.length; ++i) {
243241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
244241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         cond = LLVMBuildFCmp(bld->builder, op,
245241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              LLVMBuildExtractElement(bld->builder, a, index, ""),
246241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              LLVMBuildExtractElement(bld->builder, b, index, ""),
247241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              "");
248241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         cond = LLVMBuildSelect(bld->builder, cond,
249241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                LLVMConstExtractElement(ones, index),
250241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                LLVMConstExtractElement(zeros, index),
251241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                "");
252241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         res = LLVMBuildInsertElement(bld->builder, res, cond, index, "");
253241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
254241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
2561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   else {
2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMIntPredicate op;
2581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntEQ;
2611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntNE;
2641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLT : LLVMIntULT;
2671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLE : LLVMIntULE;
2701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
2731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
2761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
2791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return bld->undef;
2801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
281241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
282241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#if 0
283241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      /* XXX: Although valid IR, no LLVM target currently support this */
2841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      cond = LLVMBuildICmp(bld->builder, op, a, b, "");
285241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      res = LLVMBuildSelect(bld->builder, cond, ones, zeros, "");
286241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
287241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      debug_printf("%s: warning: using slow element-wise vector comparison\n",
288241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                   __FUNCTION__);
289241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      res = LLVMGetUndef(int_vec_type);
290241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      for(i = 0; i < type.length; ++i) {
291241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
292241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         cond = LLVMBuildICmp(bld->builder, op,
293241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              LLVMBuildExtractElement(bld->builder, a, index, ""),
294241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              LLVMBuildExtractElement(bld->builder, b, index, ""),
295241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              "");
296241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         cond = LLVMBuildSelect(bld->builder, cond,
297241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                LLVMConstExtractElement(ones, index),
298241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                LLVMConstExtractElement(zeros, index),
299241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                "");
300241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         res = LLVMBuildInsertElement(bld->builder, res, cond, index, "");
301241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
302241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
3031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
3041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
305241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   return res;
3061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca}
30709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
30809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
30909a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
31009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld,
31109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef mask,
31209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef a,
31309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef b)
31409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
315b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   struct lp_type type = bld->type;
31609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   LLVMValueRef res;
31709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
31809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
31909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
32009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
32109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(type.floating) {
32209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
32309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
32409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
32509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
32609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
32709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   a = LLVMBuildAnd(bld->builder, a, mask, "");
32809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
32909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /* This often gets translated to PANDN, but sometimes the NOT is
33009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * pre-computed and stored in another constant. The best strategy depends
33109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * on available registers, so it is not a big deal -- hopefully LLVM does
33209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * the right decision attending the rest of the program.
33309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
33409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
33509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
33609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   res = LLVMBuildOr(bld->builder, a, b, "");
33709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
33809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(type.floating) {
33909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef vec_type = lp_build_vec_type(type);
34009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
34109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
34209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
34309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   return res;
34409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
34509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
34609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
34709a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
34809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld,
34909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef a,
35009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef b,
3515e13e987da6ce656b08f6c25f8d373c80949e3b0José Fonseca                    const boolean cond[4])
35209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
353b4835ea03d64261da5a892f9590c9977b06920e8José Fonseca   const struct lp_type type = bld->type;
35409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const unsigned n = type.length;
35509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   unsigned i, j;
35609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
35709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
35809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
35909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(cond[0] && cond[1] && cond[2] && cond[3])
36009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
36109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
36209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return b;
36309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == bld->undef || b == bld->undef)
36409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return bld->undef;
36509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
36609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /*
36709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * There are three major ways of accomplishing this:
36809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a shuffle,
36909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a select,
37009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - or with a bit mask.
37109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    *
37209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * Select isn't supported for vector types yet.
37309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * The flip between these is empirical and might need to be.
37409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
37509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if (n <= 4) {
37609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /*
37709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       * Shuffle.
37809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       */
37909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt32Type();
38009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
38109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
38209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
38309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
38409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
38509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
38609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
38709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
3881fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca   else {
38909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0
3901fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca      /* XXX: Unfortunately select of vectors do not work */
39109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* Use a select */
39209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt1Type();
39309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
39409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
39509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
39609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
39709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
39809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
39909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
4001fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#else
40109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
40209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return lp_build_select(bld, mask, a, b);
4031fc41002252419f4688c24ea8c3814553b3d76adJosé Fonseca#endif
40409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
40509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
406