lp_bld_logic.c revision 241c3a1d8001fc5a30e2af4b4636b48e6f99690a
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/**************************************************************************
21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc.
41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved.
51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the
81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including
91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to
121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions:
131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the
151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions
161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software.
171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/
271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
285811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca/**
295811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @file
305811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * Helper functions for logical operations.
315811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca *
325811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca * @author Jose Fonseca <jfonseca@vmware.com>
335811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca */
345811ed87d732101ab8cfbd087bc99d8c6c963f30José Fonseca
351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h"
3709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h"
381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h"
391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h"
401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
421aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef
431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonsecalp_build_cmp(struct lp_build_context *bld,
441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             unsigned func,
451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             LLVMValueRef a,
461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             LLVMValueRef b)
471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{
481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   const union lp_type type = bld->type;
491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef vec_type = lp_build_vec_type(type);
501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef cond;
54241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   LLVMValueRef res;
55241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   unsigned i;
561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_NEVER)
581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return zeros;
591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_ALWAYS)
601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return ones;
611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* TODO: optimize the constant case */
631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* XXX: It is not clear if we should use the ordered or unordered operators */
651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.width * type.length == 128) {
681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      if(type.floating) {
691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef args[3];
701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         unsigned cc;
711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         boolean swap;
721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         swap = FALSE;
741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         switch(func) {
751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_EQUAL:
761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 0;
771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_NOTEQUAL:
791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 4;
801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LESS:
821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LEQUAL:
851aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GREATER:
881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GEQUAL:
921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         default:
961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            assert(0);
971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            return bld->undef;
981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         if(swap) {
1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = b;
1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = a;
1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         else {
1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = a;
1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = b;
1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         res = lp_build_intrinsic(bld->builder,
1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  "llvm.x86.sse.cmp.ps",
1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  vec_type,
1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  args, 3);
1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         res = LLVMBuildBitCast(bld->builder, res, int_vec_type, "");
1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return res;
1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
117d07b0383660cd318ba43abad11bb80de4a124951José Fonseca      else {
118d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         static const struct {
119d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned swap:1;
120d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned eq:1;
121d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned gt:1;
122d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            unsigned not:1;
123d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         } table[] = {
124d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
125d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
126d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
127d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
128d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
129d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
130d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
131d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
132d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         };
133d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpeq;
134d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         const char *pcmpgt;
135d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef args[2];
136d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         LLVMValueRef res;
137d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
138d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         switch (type.width) {
139d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 8:
140d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
141d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
142d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
143d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 16:
144d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
145d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
146d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
147d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         case 32:
148d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
149d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
150d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            break;
151d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         default:
152d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            assert(0);
153d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            return bld->undef;
154d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
155d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
156d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         /* There are no signed byte and unsigned word/dword comparison
157d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          * instructions. So flip the sign bit so that the results match.
158d07b0383660cd318ba43abad11bb80de4a124951José Fonseca          */
159d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].gt &&
160d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            ((type.width == 8 && type.sign) ||
161d07b0383660cd318ba43abad11bb80de4a124951José Fonseca             (type.width != 8 && !type.sign))) {
16277b35dc179473afbbd8c709c9f32c0f537c90776José Fonseca            LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
163d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            a = LLVMBuildXor(bld->builder, a, msb, "");
164d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            b = LLVMBuildXor(bld->builder, b, msb, "");
165d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
166d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
167d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].swap) {
168d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = b;
169d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = a;
170d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
171d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else {
172d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[0] = a;
173d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            args[1] = b;
174d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         }
175d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
176d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].eq)
177d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = lp_build_intrinsic(bld->builder, pcmpeq, vec_type, args, 2);
178d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else if (table[func].gt)
179d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = lp_build_intrinsic(bld->builder, pcmpgt, vec_type, args, 2);
180d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         else
181d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMConstNull(vec_type);
182d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
183d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         if(table[func].not)
184d07b0383660cd318ba43abad11bb80de4a124951José Fonseca            res = LLVMBuildNot(bld->builder, res, "");
185d07b0383660cd318ba43abad11bb80de4a124951José Fonseca
186d07b0383660cd318ba43abad11bb80de4a124951José Fonseca         return res;
187d07b0383660cd318ba43abad11bb80de4a124951José Fonseca      }
1881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
1891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif
1901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.floating) {
1921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMRealPredicate op;
1931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
1941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NEVER:
1951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateFalse;
1961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_ALWAYS:
1981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateTrue;
1991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUEQ;
2021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUNE;
2051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULT;
2081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULE;
2111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGT;
2141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGE;
2171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
2201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return bld->undef;
2211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
222241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
223241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#if 0
224241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      /* XXX: Although valid IR, no LLVM target currently support this */
2251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      cond = LLVMBuildFCmp(bld->builder, op, a, b, "");
226241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      res = LLVMBuildSelect(bld->builder, cond, ones, zeros, "");
227241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
228241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      debug_printf("%s: warning: using slow element-wise vector comparison\n",
229241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                   __FUNCTION__);
230241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      res = LLVMGetUndef(int_vec_type);
231241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      for(i = 0; i < type.length; ++i) {
232241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
233241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         cond = LLVMBuildFCmp(bld->builder, op,
234241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              LLVMBuildExtractElement(bld->builder, a, index, ""),
235241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              LLVMBuildExtractElement(bld->builder, b, index, ""),
236241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              "");
237241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         cond = LLVMBuildSelect(bld->builder, cond,
238241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                LLVMConstExtractElement(ones, index),
239241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                LLVMConstExtractElement(zeros, index),
240241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                "");
241241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         res = LLVMBuildInsertElement(bld->builder, res, cond, index, "");
242241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
243241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
2441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
2451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   else {
2461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMIntPredicate op;
2471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
2481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
2491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntEQ;
2501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
2521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntNE;
2531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
2551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLT : LLVMIntULT;
2561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
2581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLE : LLVMIntULE;
2591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
2611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
2621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
2641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
2651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
2661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
2671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
2681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return bld->undef;
2691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
270241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca
271241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#if 0
272241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      /* XXX: Although valid IR, no LLVM target currently support this */
2731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      cond = LLVMBuildICmp(bld->builder, op, a, b, "");
274241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      res = LLVMBuildSelect(bld->builder, cond, ones, zeros, "");
275241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#else
276241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      debug_printf("%s: warning: using slow element-wise vector comparison\n",
277241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                   __FUNCTION__);
278241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      res = LLVMGetUndef(int_vec_type);
279241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      for(i = 0; i < type.length; ++i) {
280241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
281241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         cond = LLVMBuildICmp(bld->builder, op,
282241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              LLVMBuildExtractElement(bld->builder, a, index, ""),
283241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              LLVMBuildExtractElement(bld->builder, b, index, ""),
284241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                              "");
285241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         cond = LLVMBuildSelect(bld->builder, cond,
286241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                LLVMConstExtractElement(ones, index),
287241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                LLVMConstExtractElement(zeros, index),
288241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca                                "");
289241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca         res = LLVMBuildInsertElement(bld->builder, res, cond, index, "");
290241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca      }
291241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca#endif
2921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
2931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
294241c3a1d8001fc5a30e2af4b4636b48e6f99690aJosé Fonseca   return res;
2951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca}
29609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
29709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
29809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
29909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld,
30009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef mask,
30109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef a,
30209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef b)
30309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
30409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   union lp_type type = bld->type;
30509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   LLVMValueRef res;
30609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
30709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
30809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
30909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
31009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(type.floating) {
31109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
31209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
31309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
31409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
31509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
31609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
31709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
31809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   a = LLVMBuildAnd(bld->builder, a, mask, "");
31909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
32009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /* This often gets translated to PANDN, but sometimes the NOT is
32109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * pre-computed and stored in another constant. The best strategy depends
32209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * on available registers, so it is not a big deal -- hopefully LLVM does
32309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * the right decision attending the rest of the program.
32409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
32509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
32609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
32709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   res = LLVMBuildOr(bld->builder, a, b, "");
32809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
32909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(type.floating) {
33009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef vec_type = lp_build_vec_type(type);
33109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
33209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
33309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
33409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   return res;
33509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
33609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
33709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
33809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
33909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld,
34009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef a,
34109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef b,
34209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    boolean cond[4])
34309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
34409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const union lp_type type = bld->type;
34509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const unsigned n = type.length;
34609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   unsigned i, j;
34709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
34809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
34909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
35009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(cond[0] && cond[1] && cond[2] && cond[3])
35109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
35209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
35309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return b;
35409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == bld->undef || b == bld->undef)
35509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return bld->undef;
35609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
35709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /*
35809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * There are three major ways of accomplishing this:
35909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a shuffle,
36009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a select,
36109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - or with a bit mask.
36209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    *
36309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * Select isn't supported for vector types yet.
36409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * The flip between these is empirical and might need to be.
36509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
36609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if (n <= 4) {
36709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /*
36809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       * Shuffle.
36909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       */
37009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt32Type();
37109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
37209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
37309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
37409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
37509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
37609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
37709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
37809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
37909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0
38009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   else if(0) {
38109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* FIXME: Unfortunately select of vectors do not work */
38209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* Use a select */
38309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt1Type();
38409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
38509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
38609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
38709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
38809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
38909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
39009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
39109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
39209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#endif
39309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   else {
39409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
39509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return lp_build_select(bld, mask, a, b);
39609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
39709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
398