lp_bld_logic.c revision 2b8db4ce156fbd4d094f46fad0b8b3291b057fff
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Helper functions for logical operations.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36#include "util/u_cpu_detect.h"
37#include "util/u_debug.h"
38
39#include "lp_bld_type.h"
40#include "lp_bld_const.h"
41#include "lp_bld_intr.h"
42#include "lp_bld_logic.h"
43
44
45/*
46 * XXX
47 *
48 * Selection with vector conditional like
49 *
50 *    select <4 x i1> %C, %A, %B
51 *
52 * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
53 * supported on any backend.
54 *
55 * Expanding the boolean vector to full SIMD register width, as in
56 *
57 *    sext <4 x i1> %C to <4 x i32>
58 *
59 * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
60 * it causes assertion failures in LLVM 2.6. It appears to work correctly on
61 * LLVM 2.7.
62 */
63
64
65/**
66 * Build code to compare two values 'a' and 'b' of 'type' using the given func.
67 * \param func  one of PIPE_FUNC_x
68 * The result values will be 0 for false or ~0 for true.
69 */
70LLVMValueRef
71lp_build_compare(LLVMBuilderRef builder,
72                 const struct lp_type type,
73                 unsigned func,
74                 LLVMValueRef a,
75                 LLVMValueRef b)
76{
77   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
78   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
79   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
80   LLVMValueRef cond;
81   LLVMValueRef res;
82
83   assert(func >= PIPE_FUNC_NEVER);
84   assert(func <= PIPE_FUNC_ALWAYS);
85
86   if(func == PIPE_FUNC_NEVER)
87      return zeros;
88   if(func == PIPE_FUNC_ALWAYS)
89      return ones;
90
91   /* TODO: optimize the constant case */
92
93   /* XXX: It is not clear if we should use the ordered or unordered operators */
94
95#if HAVE_LLVM < 0x0207
96#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
97   if(type.width * type.length == 128) {
98      if(type.floating && util_cpu_caps.has_sse) {
99         /* float[4] comparison */
100         LLVMTypeRef vec_type = lp_build_vec_type(type);
101         LLVMValueRef args[3];
102         unsigned cc;
103         boolean swap;
104
105         swap = FALSE;
106         switch(func) {
107         case PIPE_FUNC_EQUAL:
108            cc = 0;
109            break;
110         case PIPE_FUNC_NOTEQUAL:
111            cc = 4;
112            break;
113         case PIPE_FUNC_LESS:
114            cc = 1;
115            break;
116         case PIPE_FUNC_LEQUAL:
117            cc = 2;
118            break;
119         case PIPE_FUNC_GREATER:
120            cc = 1;
121            swap = TRUE;
122            break;
123         case PIPE_FUNC_GEQUAL:
124            cc = 2;
125            swap = TRUE;
126            break;
127         default:
128            assert(0);
129            return lp_build_undef(type);
130         }
131
132         if(swap) {
133            args[0] = b;
134            args[1] = a;
135         }
136         else {
137            args[0] = a;
138            args[1] = b;
139         }
140
141         args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
142         res = lp_build_intrinsic(builder,
143                                  "llvm.x86.sse.cmp.ps",
144                                  vec_type,
145                                  args, 3);
146         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
147         return res;
148      }
149      else if(util_cpu_caps.has_sse2) {
150         /* int[4] comparison */
151         static const struct {
152            unsigned swap:1;
153            unsigned eq:1;
154            unsigned gt:1;
155            unsigned not:1;
156         } table[] = {
157            {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
158            {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
159            {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
160            {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
161            {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
162            {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
163            {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
164            {0, 0, 0, 0}  /* PIPE_FUNC_ALWAYS */
165         };
166         const char *pcmpeq;
167         const char *pcmpgt;
168         LLVMValueRef args[2];
169         LLVMValueRef res;
170         LLVMTypeRef vec_type = lp_build_vec_type(type);
171
172         switch (type.width) {
173         case 8:
174            pcmpeq = "llvm.x86.sse2.pcmpeq.b";
175            pcmpgt = "llvm.x86.sse2.pcmpgt.b";
176            break;
177         case 16:
178            pcmpeq = "llvm.x86.sse2.pcmpeq.w";
179            pcmpgt = "llvm.x86.sse2.pcmpgt.w";
180            break;
181         case 32:
182            pcmpeq = "llvm.x86.sse2.pcmpeq.d";
183            pcmpgt = "llvm.x86.sse2.pcmpgt.d";
184            break;
185         default:
186            assert(0);
187            return lp_build_undef(type);
188         }
189
190         /* There are no signed byte and unsigned word/dword comparison
191          * instructions. So flip the sign bit so that the results match.
192          */
193         if(table[func].gt &&
194            ((type.width == 8 && type.sign) ||
195             (type.width != 8 && !type.sign))) {
196            LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
197            a = LLVMBuildXor(builder, a, msb, "");
198            b = LLVMBuildXor(builder, b, msb, "");
199         }
200
201         if(table[func].swap) {
202            args[0] = b;
203            args[1] = a;
204         }
205         else {
206            args[0] = a;
207            args[1] = b;
208         }
209
210         if(table[func].eq)
211            res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
212         else if (table[func].gt)
213            res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
214         else
215            res = LLVMConstNull(vec_type);
216
217         if(table[func].not)
218            res = LLVMBuildNot(builder, res, "");
219
220         return res;
221      }
222   } /* if (type.width * type.length == 128) */
223#endif
224#endif /* HAVE_LLVM < 0x0207 */
225
226   if(type.floating) {
227      LLVMRealPredicate op;
228      switch(func) {
229      case PIPE_FUNC_NEVER:
230         op = LLVMRealPredicateFalse;
231         break;
232      case PIPE_FUNC_ALWAYS:
233         op = LLVMRealPredicateTrue;
234         break;
235      case PIPE_FUNC_EQUAL:
236         op = LLVMRealUEQ;
237         break;
238      case PIPE_FUNC_NOTEQUAL:
239         op = LLVMRealUNE;
240         break;
241      case PIPE_FUNC_LESS:
242         op = LLVMRealULT;
243         break;
244      case PIPE_FUNC_LEQUAL:
245         op = LLVMRealULE;
246         break;
247      case PIPE_FUNC_GREATER:
248         op = LLVMRealUGT;
249         break;
250      case PIPE_FUNC_GEQUAL:
251         op = LLVMRealUGE;
252         break;
253      default:
254         assert(0);
255         return lp_build_undef(type);
256      }
257
258#if HAVE_LLVM >= 0x0207
259      cond = LLVMBuildFCmp(builder, op, a, b, "");
260      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
261#else
262      if (type.length == 1) {
263         cond = LLVMBuildFCmp(builder, op, a, b, "");
264         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
265      }
266      else {
267         unsigned i;
268
269         res = LLVMGetUndef(int_vec_type);
270
271         debug_printf("%s: warning: using slow element-wise float"
272                      " vector comparison\n", __FUNCTION__);
273         for (i = 0; i < type.length; ++i) {
274            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
275            cond = LLVMBuildFCmp(builder, op,
276                                 LLVMBuildExtractElement(builder, a, index, ""),
277                                 LLVMBuildExtractElement(builder, b, index, ""),
278                                 "");
279            cond = LLVMBuildSelect(builder, cond,
280                                   LLVMConstExtractElement(ones, index),
281                                   LLVMConstExtractElement(zeros, index),
282                                   "");
283            res = LLVMBuildInsertElement(builder, res, cond, index, "");
284         }
285      }
286#endif
287   }
288   else {
289      LLVMIntPredicate op;
290      switch(func) {
291      case PIPE_FUNC_EQUAL:
292         op = LLVMIntEQ;
293         break;
294      case PIPE_FUNC_NOTEQUAL:
295         op = LLVMIntNE;
296         break;
297      case PIPE_FUNC_LESS:
298         op = type.sign ? LLVMIntSLT : LLVMIntULT;
299         break;
300      case PIPE_FUNC_LEQUAL:
301         op = type.sign ? LLVMIntSLE : LLVMIntULE;
302         break;
303      case PIPE_FUNC_GREATER:
304         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
305         break;
306      case PIPE_FUNC_GEQUAL:
307         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
308         break;
309      default:
310         assert(0);
311         return lp_build_undef(type);
312      }
313
314#if HAVE_LLVM >= 0x0207
315      cond = LLVMBuildICmp(builder, op, a, b, "");
316      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
317#else
318      if (type.length == 1) {
319         cond = LLVMBuildICmp(builder, op, a, b, "");
320         res = LLVMBuildSExt(builder, cond, int_vec_type, "");
321      }
322      else {
323         unsigned i;
324
325         res = LLVMGetUndef(int_vec_type);
326
327         debug_printf("%s: warning: using slow element-wise int"
328                      " vector comparison\n", __FUNCTION__);
329
330         for(i = 0; i < type.length; ++i) {
331            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
332            cond = LLVMBuildICmp(builder, op,
333                                 LLVMBuildExtractElement(builder, a, index, ""),
334                                 LLVMBuildExtractElement(builder, b, index, ""),
335                                 "");
336            cond = LLVMBuildSelect(builder, cond,
337                                   LLVMConstExtractElement(ones, index),
338                                   LLVMConstExtractElement(zeros, index),
339                                   "");
340            res = LLVMBuildInsertElement(builder, res, cond, index, "");
341         }
342      }
343#endif
344   }
345
346   return res;
347}
348
349
350
351/**
352 * Build code to compare two values 'a' and 'b' using the given func.
353 * \param func  one of PIPE_FUNC_x
354 * The result values will be 0 for false or ~0 for true.
355 */
356LLVMValueRef
357lp_build_cmp(struct lp_build_context *bld,
358             unsigned func,
359             LLVMValueRef a,
360             LLVMValueRef b)
361{
362   return lp_build_compare(bld->builder, bld->type, func, a, b);
363}
364
365
366/**
367 * Return mask ? a : b;
368 *
369 * mask is a bitwise mask, composed of 0 or ~0 for each element.
370 */
371LLVMValueRef
372lp_build_select(struct lp_build_context *bld,
373                LLVMValueRef mask,
374                LLVMValueRef a,
375                LLVMValueRef b)
376{
377   struct lp_type type = bld->type;
378   LLVMValueRef res;
379
380   if(a == b)
381      return a;
382
383   if (type.length == 1) {
384      mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
385      res = LLVMBuildSelect(bld->builder, mask, a, b, "");
386   }
387   else {
388      if(type.floating) {
389         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
390         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
391         b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
392      }
393
394      a = LLVMBuildAnd(bld->builder, a, mask, "");
395
396      /* This often gets translated to PANDN, but sometimes the NOT is
397       * pre-computed and stored in another constant. The best strategy depends
398       * on available registers, so it is not a big deal -- hopefully LLVM does
399       * the right decision attending the rest of the program.
400       */
401      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
402
403      res = LLVMBuildOr(bld->builder, a, b, "");
404
405      if(type.floating) {
406         LLVMTypeRef vec_type = lp_build_vec_type(type);
407         res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
408      }
409   }
410
411   return res;
412}
413
414
415LLVMValueRef
416lp_build_select_aos(struct lp_build_context *bld,
417                    LLVMValueRef a,
418                    LLVMValueRef b,
419                    const boolean cond[4])
420{
421   const struct lp_type type = bld->type;
422   const unsigned n = type.length;
423   unsigned i, j;
424
425   if(a == b)
426      return a;
427   if(cond[0] && cond[1] && cond[2] && cond[3])
428      return a;
429   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
430      return b;
431   if(a == bld->undef || b == bld->undef)
432      return bld->undef;
433
434   /*
435    * There are three major ways of accomplishing this:
436    * - with a shuffle,
437    * - with a select,
438    * - or with a bit mask.
439    *
440    * Select isn't supported for vector types yet.
441    * The flip between these is empirical and might need to be.
442    */
443   if (n <= 4) {
444      /*
445       * Shuffle.
446       */
447      LLVMTypeRef elem_type = LLVMInt32Type();
448      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
449
450      for(j = 0; j < n; j += 4)
451         for(i = 0; i < 4; ++i)
452            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
453
454      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
455   }
456   else {
457#if 0
458      /* XXX: Unfortunately select of vectors do not work */
459      /* Use a select */
460      LLVMTypeRef elem_type = LLVMInt1Type();
461      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
462
463      for(j = 0; j < n; j += 4)
464         for(i = 0; i < 4; ++i)
465            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
466
467      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
468#else
469      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
470      return lp_build_select(bld, mask, a, b);
471#endif
472   }
473}
474
475LLVMValueRef
476lp_build_alloca(struct lp_build_context *bld)
477{
478   const struct lp_type type = bld->type;
479
480   if (type.length > 1) { /*vector*/
481      return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), "");
482   } else { /*scalar*/
483      return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), "");
484   }
485}
486
487
488/** Return (a & ~b) */
489LLVMValueRef
490lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b)
491{
492   b = LLVMBuildNot(bld->builder, b, "");
493   b = LLVMBuildAnd(bld->builder, a, b, "");
494   return b;
495}
496