lp_bld_logic.c revision b3d4e5bd26a44870af7d2413cca7a6f576a0984a
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Helper functions for logical operations.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36#include "util/u_cpu_detect.h"
37#include "util/u_debug.h"
38
39#include "lp_bld_type.h"
40#include "lp_bld_const.h"
41#include "lp_bld_intr.h"
42#include "lp_bld_logic.h"
43
44
45/*
46 * XXX
47 *
48 * Selection with vector conditional like
49 *
50 *    select <4 x i1> %C, %A, %B
51 *
52 * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
53 * supported on any backend.
54 *
55 * Expanding the boolean vector to full SIMD register width, as in
56 *
57 *    sext <4 x i1> %C to <4 x i32>
58 *
59 * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
60 * it causes assertion failures in LLVM 2.6. It appears to work correctly on
61 * LLVM 2.7.
62 */
63
64
65/**
66 * Build code to compare two values 'a' and 'b' of 'type' using the given func.
67 * \param func  one of PIPE_FUNC_x
68 * The result values will be 0 for false or ~0 for true.
69 */
70LLVMValueRef
71lp_build_compare(LLVMBuilderRef builder,
72                 const struct lp_type type,
73                 unsigned func,
74                 LLVMValueRef a,
75                 LLVMValueRef b)
76{
77   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
78   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
79   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
80   LLVMValueRef cond;
81   LLVMValueRef res;
82
83   assert(func >= PIPE_FUNC_NEVER);
84   assert(func <= PIPE_FUNC_ALWAYS);
85
86   if(func == PIPE_FUNC_NEVER)
87      return zeros;
88   if(func == PIPE_FUNC_ALWAYS)
89      return ones;
90
91   /* TODO: optimize the constant case */
92
93   /* XXX: It is not clear if we should use the ordered or unordered operators */
94
95#if HAVE_LLVM < 0x0207
96#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
97   if(type.width * type.length == 128) {
98      if(type.floating && util_cpu_caps.has_sse) {
99         /* float[4] comparison */
100         LLVMTypeRef vec_type = lp_build_vec_type(type);
101         LLVMValueRef args[3];
102         unsigned cc;
103         boolean swap;
104
105         swap = FALSE;
106         switch(func) {
107         case PIPE_FUNC_EQUAL:
108            cc = 0;
109            break;
110         case PIPE_FUNC_NOTEQUAL:
111            cc = 4;
112            break;
113         case PIPE_FUNC_LESS:
114            cc = 1;
115            break;
116         case PIPE_FUNC_LEQUAL:
117            cc = 2;
118            break;
119         case PIPE_FUNC_GREATER:
120            cc = 1;
121            swap = TRUE;
122            break;
123         case PIPE_FUNC_GEQUAL:
124            cc = 2;
125            swap = TRUE;
126            break;
127         default:
128            assert(0);
129            return lp_build_undef(type);
130         }
131
132         if(swap) {
133            args[0] = b;
134            args[1] = a;
135         }
136         else {
137            args[0] = a;
138            args[1] = b;
139         }
140
141         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
142         res = lp_build_intrinsic(builder,
143                                  "llvm.x86.sse.cmp.ps",
144                                  vec_type,
145                                  args, 3);
146         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
147         return res;
148      }
149      else if(util_cpu_caps.has_sse2) {
150         /* int[4] comparison */
151         static const struct {
152            unsigned swap:1;
153            unsigned eq:1;
154            unsigned gt:1;
155            unsigned not:1;
156         } table[] = {
157            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
158            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
159            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
160            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
161            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
162            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
163            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
164            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
165         };
166         const char *pcmpeq;
167         const char *pcmpgt;
168         LLVMValueRef args[2];
169         LLVMValueRef res;
170         LLVMTypeRef vec_type = lp_build_vec_type(type);
171
172         switch (type.width) {
173         case 8:
174            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
175            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
176            break;
177         case 16:
178            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
179            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
180            break;
181         case 32:
182            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
183            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
184            break;
185         default:
186            assert(0);
187            return lp_build_undef(type);
188         }
189
190         /* There are no unsigned comparison instructions. So flip the sign bit
191          * so that the results match.
192          */
193         if (table[func].gt && !type.sign) {
194            LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
195            a = LLVMBuildXor(builder, a, msb, "");
196            b = LLVMBuildXor(builder, b, msb, "");
197         }
198
199         if(table[func].swap) {
200            args[0] = b;
201            args[1] = a;
202         }
203         else {
204            args[0] = a;
205            args[1] = b;
206         }
207
208         if(table[func].eq)
209            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
210         else if (table[func].gt)
211            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
212         else
213            res = LLVMConstNull(vec_type);
214
215         if(table[func].not)
216            res = LLVMBuildNot(builder, res, "");
217
218         return res;
219      }
220   } /* if (type.width * type.length == 128) */
221#endif
222#endif /* HAVE_LLVM < 0x0207 */
223
224   if(type.floating) {
225      LLVMRealPredicate op;
226      switch(func) {
227      case PIPE_FUNC_NEVER:
228         op = LLVMRealPredicateFalse;
229         break;
230      case PIPE_FUNC_ALWAYS:
231         op = LLVMRealPredicateTrue;
232         break;
233      case PIPE_FUNC_EQUAL:
234         op = LLVMRealUEQ;
235         break;
236      case PIPE_FUNC_NOTEQUAL:
237         op = LLVMRealUNE;
238         break;
239      case PIPE_FUNC_LESS:
240         op = LLVMRealULT;
241         break;
242      case PIPE_FUNC_LEQUAL:
243         op = LLVMRealULE;
244         break;
245      case PIPE_FUNC_GREATER:
246         op = LLVMRealUGT;
247         break;
248      case PIPE_FUNC_GEQUAL:
249         op = LLVMRealUGE;
250         break;
251      default:
252         assert(0);
253         return lp_build_undef(type);
254      }
255
256#if HAVE_LLVM >= 0x0207
257      cond = LLVMBuildFCmp(builder, op, a, b, "");
258      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
259#else
260      if (type.length == 1) {
261         cond = LLVMBuildFCmp(builder, op, a, b, "");
262         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
263      }
264      else {
265         unsigned i;
266
267         res = LLVMGetUndef(int_vec_type);
268
269         debug_printf("%s: warning: using slow element-wise float"
270                      " vector comparison\n", __FUNCTION__);
271         for (i = 0; i < type.length; ++i) {
272            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
273            cond = LLVMBuildFCmp(builder, op,
274                                 LLVMBuildExtractElement(builder, a, index, ""),
275                                 LLVMBuildExtractElement(builder, b, index, ""),
276                                 "");
277            cond = LLVMBuildSelect(builder, cond,
278                                   LLVMConstExtractElement(ones, index),
279                                   LLVMConstExtractElement(zeros, index),
280                                   "");
281            res = LLVMBuildInsertElement(builder, res, cond, index, "");
282         }
283      }
284#endif
285   }
286   else {
287      LLVMIntPredicate op;
288      switch(func) {
289      case PIPE_FUNC_EQUAL:
290         op = LLVMIntEQ;
291         break;
292      case PIPE_FUNC_NOTEQUAL:
293         op = LLVMIntNE;
294         break;
295      case PIPE_FUNC_LESS:
296         op = type.sign ? LLVMIntSLT : LLVMIntULT;
297         break;
298      case PIPE_FUNC_LEQUAL:
299         op = type.sign ? LLVMIntSLE : LLVMIntULE;
300         break;
301      case PIPE_FUNC_GREATER:
302         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
303         break;
304      case PIPE_FUNC_GEQUAL:
305         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
306         break;
307      default:
308         assert(0);
309         return lp_build_undef(type);
310      }
311
312#if HAVE_LLVM >= 0x0207
313      cond = LLVMBuildICmp(builder, op, a, b, "");
314      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
315#else
316      if (type.length == 1) {
317         cond = LLVMBuildICmp(builder, op, a, b, "");
318         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
319      }
320      else {
321         unsigned i;
322
323         res = LLVMGetUndef(int_vec_type);
324
325         debug_printf("%s: warning: using slow element-wise int"
326                      " vector comparison\n", __FUNCTION__);
327
328         for(i = 0; i < type.length; ++i) {
329            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
330            cond = LLVMBuildICmp(builder, op,
331                                 LLVMBuildExtractElement(builder, a, index, ""),
332                                 LLVMBuildExtractElement(builder, b, index, ""),
333                                 "");
334            cond = LLVMBuildSelect(builder, cond,
335                                   LLVMConstExtractElement(ones, index),
336                                   LLVMConstExtractElement(zeros, index),
337                                   "");
338            res = LLVMBuildInsertElement(builder, res, cond, index, "");
339         }
340      }
341#endif
342   }
343
344   return res;
345}
346
347
348
349/**
350 * Build code to compare two values 'a' and 'b' using the given func.
351 * \param func  one of PIPE_FUNC_x
352 * The result values will be 0 for false or ~0 for true.
353 */
354LLVMValueRef
355lp_build_cmp(struct lp_build_context *bld,
356             unsigned func,
357             LLVMValueRef a,
358             LLVMValueRef b)
359{
360   return lp_build_compare(bld->builder, bld->type, func, a, b);
361}
362
363
364/**
365 * Return mask ? a : b;
366 *
367 * mask is a bitwise mask, composed of 0 or ~0 for each element.
368 */
369LLVMValueRef
370lp_build_select(struct lp_build_context *bld,
371                LLVMValueRef mask,
372                LLVMValueRef a,
373                LLVMValueRef b)
374{
375   struct lp_type type = bld->type;
376   LLVMValueRef res;
377
378   if(a == b)
379      return a;
380
381   if (type.length == 1) {
382      mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
383      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
384   }
385   else {
386      if(type.floating) {
387         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
388         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
389         b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
390      }
391
392      a = LLVMBuildAnd(bld->builder, a, mask, "");
393
394      /* This often gets translated to PANDN, but sometimes the NOT is
395       * pre-computed and stored in another constant. The best strategy depends
396       * on available registers, so it is not a big deal -- hopefully LLVM does
397       * the right decision attending the rest of the program.
398       */
399      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
400
401      res = LLVMBuildOr(bld->builder, a, b, "");
402
403      if(type.floating) {
404         LLVMTypeRef vec_type = lp_build_vec_type(type);
405         res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
406      }
407   }
408
409   return res;
410}
411
412
413LLVMValueRef
414lp_build_select_aos(struct lp_build_context *bld,
415                    LLVMValueRef a,
416                    LLVMValueRef b,
417                    const boolean cond[4])
418{
419   const struct lp_type type = bld->type;
420   const unsigned n = type.length;
421   unsigned i, j;
422
423   if(a == b)
424      return a;
425   if(cond[0] && cond[1] && cond[2] && cond[3])
426      return a;
427   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
428      return b;
429   if(a == bld->undef || b == bld->undef)
430      return bld->undef;
431
432   /*
433    * There are three major ways of accomplishing this:
434    * - with a shuffle,
435    * - with a select,
436    * - or with a bit mask.
437    *
438    * Select isn't supported for vector types yet.
439    * The flip between these is empirical and might need to be.
440    */
441   if (n <= 4) {
442      /*
443       * Shuffle.
444       */
445      LLVMTypeRef elem_type = LLVMInt32Type();
446      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
447
448      for(j = 0; j < n; j += 4)
449         for(i = 0; i < 4; ++i)
450            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
451
452      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
453   }
454   else {
455#if 0
456      /* XXX: Unfortunately select of vectors do not work */
457      /* Use a select */
458      LLVMTypeRef elem_type = LLVMInt1Type();
459      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
460
461      for(j = 0; j < n; j += 4)
462         for(i = 0; i < 4; ++i)
463            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
464
465      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
466#else
467      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
468      return lp_build_select(bld, mask, a, b);
469#endif
470   }
471}
472
473
474/** Return (a & ~b) */
475LLVMValueRef
476lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
477{
478   b = LLVMBuildNot(bld->builder, b, "");
479   b = LLVMBuildAnd(bld->builder, a, b, "");
480   return b;
481}
482