lp_bld_logic.c revision 09a7b011acb3957725bb7e1bc4125f0939a295e7
11aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca/**************************************************************************
21aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
31aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Copyright 2009 VMware, Inc.
41aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * All Rights Reserved.
51aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
61aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * Permission is hereby granted, free of charge, to any person obtaining a
71aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * copy of this software and associated documentation files (the
81aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * "Software"), to deal in the Software without restriction, including
91aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * without limitation the rights to use, copy, modify, merge, publish,
101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * distribute, sub license, and/or sell copies of the Software, and to
111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * permit persons to whom the Software is furnished to do so, subject to
121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * the following conditions:
131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * The above copyright notice and this permission notice (including the
151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * next paragraph) shall be included in all copies or substantial portions
161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * of the Software.
171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca *
261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca **************************************************************************/
271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "pipe/p_defines.h"
301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_type.h"
3109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#include "lp_bld_const.h"
321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_intr.h"
331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#include "lp_bld_logic.h"
341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
361aede69d3a8d288af11c2ef620b51e71c2ce89b2José FonsecaLLVMValueRef
371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonsecalp_build_cmp(struct lp_build_context *bld,
381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             unsigned func,
391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             LLVMValueRef a,
401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca             LLVMValueRef b)
411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca{
421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   const union lp_type type = bld->type;
431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef vec_type = lp_build_vec_type(type);
441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   LLVMValueRef cond;
481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_NEVER)
501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return zeros;
511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(func == PIPE_FUNC_ALWAYS)
521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      return ones;
531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* TODO: optimize the constant case */
551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   /* XXX: It is not clear if we should use the ordered or unordered operators */
571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.width * type.length == 128) {
601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      if(type.floating) {
611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef args[3];
621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         unsigned cc;
631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         boolean swap;
641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         LLVMValueRef res;
651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         swap = FALSE;
671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         switch(func) {
681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_EQUAL:
691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 0;
701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_NOTEQUAL:
721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 4;
731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LESS:
751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
761aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
771aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_LEQUAL:
781aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
791aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
801aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GREATER:
811aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 1;
821aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
831aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
841aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         case PIPE_FUNC_GEQUAL:
851aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            cc = 2;
861aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            swap = TRUE;
871aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            break;
881aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         default:
891aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            assert(0);
901aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            return bld->undef;
911aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
921aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
931aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         if(swap) {
941aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = b;
951aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = a;
961aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
971aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         else {
981aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[0] = a;
991aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca            args[1] = b;
1001aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         }
1011aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1021aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
1031aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         res = lp_build_intrinsic(bld->builder,
1041aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  "llvm.x86.sse.cmp.ps",
1051aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  vec_type,
1061aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca                                  args, 3);
1071aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         res = LLVMBuildBitCast(bld->builder, res, int_vec_type, "");
1081aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return res;
1091aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1101aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
1111aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca#endif
1121aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1131aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   if(type.floating) {
1141aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMRealPredicate op;
1151aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
1161aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NEVER:
1171aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateFalse;
1181aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1191aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_ALWAYS:
1201aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealPredicateTrue;
1211aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1221aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
1231aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUEQ;
1241aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1251aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
1261aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUNE;
1271aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1281aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
1291aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULT;
1301aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1311aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
1321aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealULE;
1331aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1341aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
1351aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGT;
1361aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1371aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
1381aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMRealUGE;
1391aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1401aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
1411aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
1421aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return bld->undef;
1431aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1441aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      cond = LLVMBuildFCmp(bld->builder, op, a, b, "");
1451aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
1461aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   else {
1471aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      LLVMIntPredicate op;
1481aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      switch(func) {
1491aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_EQUAL:
1501aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntEQ;
1511aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1521aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_NOTEQUAL:
1531aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = LLVMIntNE;
1541aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1551aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LESS:
1561aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLT : LLVMIntULT;
1571aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1581aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_LEQUAL:
1591aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSLE : LLVMIntULE;
1601aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1611aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GREATER:
1621aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
1631aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1641aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      case PIPE_FUNC_GEQUAL:
1651aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
1661aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         break;
1671aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      default:
1681aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         assert(0);
1691aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca         return bld->undef;
1701aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      }
1711aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca      cond = LLVMBuildICmp(bld->builder, op, a, b, "");
1721aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   }
1731aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca
1741aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca   return LLVMBuildSelect(bld->builder, cond, ones, zeros, "");
1751aede69d3a8d288af11c2ef620b51e71c2ce89b2José Fonseca}
17609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
17709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
17809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
17909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select(struct lp_build_context *bld,
18009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef mask,
18109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef a,
18209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                LLVMValueRef b)
18309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
18409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   union lp_type type = bld->type;
18509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   LLVMValueRef res;
18609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
18709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
18809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
18909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
19009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(type.floating) {
19109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
19209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
19309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
19409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
19509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
19609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
19709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
19809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   a = LLVMBuildAnd(bld->builder, a, mask, "");
19909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
20009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /* This often gets translated to PANDN, but sometimes the NOT is
20109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * pre-computed and stored in another constant. The best strategy depends
20209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * on available registers, so it is not a big deal -- hopefully LLVM does
20309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * the right decision attending the rest of the program.
20409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
20509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
20609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
20709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   res = LLVMBuildOr(bld->builder, a, b, "");
20809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
20909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(type.floating) {
21009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef vec_type = lp_build_vec_type(type);
21109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
21209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
21309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
21409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   return res;
21509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
21609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
21709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
21809a7b011acb3957725bb7e1bc4125f0939a295e7José FonsecaLLVMValueRef
21909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonsecalp_build_select_aos(struct lp_build_context *bld,
22009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef a,
22109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    LLVMValueRef b,
22209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca                    boolean cond[4])
22309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca{
22409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const union lp_type type = bld->type;
22509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   const unsigned n = type.length;
22609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   unsigned i, j;
22709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
22809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == b)
22909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
23009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(cond[0] && cond[1] && cond[2] && cond[3])
23109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return a;
23209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
23309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return b;
23409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if(a == bld->undef || b == bld->undef)
23509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return bld->undef;
23609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
23709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   /*
23809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * There are three major ways of accomplishing this:
23909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a shuffle,
24009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - with a select,
24109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * - or with a bit mask.
24209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    *
24309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * Select isn't supported for vector types yet.
24409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    * The flip between these is empirical and might need to be.
24509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca    */
24609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   if (n <= 4) {
24709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /*
24809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       * Shuffle.
24909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca       */
25009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt32Type();
25109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
25209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
25309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
25409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
25509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
25609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
25709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
25809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
25909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#if 0
26009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   else if(0) {
26109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* FIXME: Unfortunately select of vectors do not work */
26209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      /* Use a select */
26309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMTypeRef elem_type = LLVMInt1Type();
26409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
26509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
26609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      for(j = 0; j < n; j += 4)
26709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca         for(i = 0; i < 4; ++i)
26809a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
26909a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca
27009a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
27109a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
27209a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca#endif
27309a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   else {
27409a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
27509a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca      return lp_build_select(bld, mask, a, b);
27609a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca   }
27709a7b011acb3957725bb7e1bc4125f0939a295e7José Fonseca}
278