lp_bld_logic.c revision efc82aef35a2aac5d2ed9774f6d28f2626796416
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Helper functions for logical operations. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_cpu_detect.h" 37#include "util/u_memory.h" 38#include "util/u_debug.h" 39 40#include "lp_bld_type.h" 41#include "lp_bld_const.h" 42#include "lp_bld_init.h" 43#include "lp_bld_intr.h" 44#include "lp_bld_debug.h" 45#include "lp_bld_logic.h" 46 47 48/* 49 * XXX 50 * 51 * Selection with vector conditional like 52 * 53 * select <4 x i1> %C, %A, %B 54 * 55 * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not 56 * supported on any backend. 57 * 58 * Expanding the boolean vector to full SIMD register width, as in 59 * 60 * sext <4 x i1> %C to <4 x i32> 61 * 62 * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 63 * it causes assertion failures in LLVM 2.6. It appears to work correctly on 64 * LLVM 2.7. 65 */ 66 67 68/** 69 * Build code to compare two values 'a' and 'b' of 'type' using the given func. 70 * \param func one of PIPE_FUNC_x 71 * The result values will be 0 for false or ~0 for true. 72 */ 73LLVMValueRef 74lp_build_compare(struct gallivm_state *gallivm, 75 const struct lp_type type, 76 unsigned func, 77 LLVMValueRef a, 78 LLVMValueRef b) 79{ 80 LLVMBuilderRef builder = gallivm->builder; 81 LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 82 LLVMValueRef zeros = LLVMConstNull(int_vec_type); 83 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 84 LLVMValueRef cond; 85 LLVMValueRef res; 86 87 assert(func >= PIPE_FUNC_NEVER); 88 assert(func <= PIPE_FUNC_ALWAYS); 89 assert(lp_check_value(type, a)); 90 assert(lp_check_value(type, b)); 91 92 if(func == PIPE_FUNC_NEVER) 93 return zeros; 94 if(func == PIPE_FUNC_ALWAYS) 95 return ones; 96 97#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 98 /* 99 * There are no unsigned integer comparison instructions in SSE. 100 */ 101 102 if (!type.floating && !type.sign && 103 type.width * type.length == 128 && 104 util_cpu_caps.has_sse2 && 105 (func == PIPE_FUNC_LESS || 106 func == PIPE_FUNC_LEQUAL || 107 func == PIPE_FUNC_GREATER || 108 func == PIPE_FUNC_GEQUAL) && 109 (gallivm_debug & GALLIVM_DEBUG_PERF)) { 110 debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", 111 __FUNCTION__, type.length, type.width); 112 } 113#endif 114 115#if HAVE_LLVM < 0x0207 116#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 117 if(type.width * type.length == 128) { 118 if(type.floating && util_cpu_caps.has_sse) { 119 /* float[4] comparison */ 120 LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); 121 LLVMValueRef args[3]; 122 unsigned cc; 123 boolean swap; 124 125 swap = FALSE; 126 switch(func) { 127 case PIPE_FUNC_EQUAL: 128 cc = 0; 129 break; 130 case PIPE_FUNC_NOTEQUAL: 131 cc = 4; 132 break; 133 case PIPE_FUNC_LESS: 134 cc = 1; 135 break; 136 case PIPE_FUNC_LEQUAL: 137 cc = 2; 138 break; 139 case PIPE_FUNC_GREATER: 140 cc = 1; 141 swap = TRUE; 142 break; 143 case PIPE_FUNC_GEQUAL: 144 cc = 2; 145 swap = TRUE; 146 break; 147 default: 148 assert(0); 149 return lp_build_undef(gallivm, type); 150 } 151 152 if(swap) { 153 args[0] = b; 154 args[1] = a; 155 } 156 else { 157 args[0] = a; 158 args[1] = b; 159 } 160 161 args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); 162 res = lp_build_intrinsic(builder, 163 "llvm.x86.sse.cmp.ps", 164 vec_type, 165 args, 3); 166 res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 167 return res; 168 } 169 else if(util_cpu_caps.has_sse2) { 170 /* int[4] comparison */ 171 static const struct { 172 unsigned swap:1; 173 unsigned eq:1; 174 unsigned gt:1; 175 unsigned not:1; 176 } table[] = { 177 {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 178 {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 179 {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 180 {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 181 {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 182 {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 183 {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 184 {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 185 }; 186 const char *pcmpeq; 187 const char *pcmpgt; 188 LLVMValueRef args[2]; 189 LLVMValueRef res; 190 LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); 191 192 switch (type.width) { 193 case 8: 194 pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 195 pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 196 break; 197 case 16: 198 pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 199 pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 200 break; 201 case 32: 202 pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 203 pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 204 break; 205 default: 206 assert(0); 207 return lp_build_undef(gallivm, type); 208 } 209 210 /* There are no unsigned comparison instructions. So flip the sign bit 211 * so that the results match. 212 */ 213 if (table[func].gt && !type.sign) { 214 LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); 215 a = LLVMBuildXor(builder, a, msb, ""); 216 b = LLVMBuildXor(builder, b, msb, ""); 217 } 218 219 if(table[func].swap) { 220 args[0] = b; 221 args[1] = a; 222 } 223 else { 224 args[0] = a; 225 args[1] = b; 226 } 227 228 if(table[func].eq) 229 res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 230 else if (table[func].gt) 231 res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 232 else 233 res = LLVMConstNull(vec_type); 234 235 if(table[func].not) 236 res = LLVMBuildNot(builder, res, ""); 237 238 return res; 239 } 240 } /* if (type.width * type.length == 128) */ 241#endif 242#endif /* HAVE_LLVM < 0x0207 */ 243 244 /* XXX: It is not clear if we should use the ordered or unordered operators */ 245 246 if(type.floating) { 247 LLVMRealPredicate op; 248 switch(func) { 249 case PIPE_FUNC_NEVER: 250 op = LLVMRealPredicateFalse; 251 break; 252 case PIPE_FUNC_ALWAYS: 253 op = LLVMRealPredicateTrue; 254 break; 255 case PIPE_FUNC_EQUAL: 256 op = LLVMRealUEQ; 257 break; 258 case PIPE_FUNC_NOTEQUAL: 259 op = LLVMRealUNE; 260 break; 261 case PIPE_FUNC_LESS: 262 op = LLVMRealULT; 263 break; 264 case PIPE_FUNC_LEQUAL: 265 op = LLVMRealULE; 266 break; 267 case PIPE_FUNC_GREATER: 268 op = LLVMRealUGT; 269 break; 270 case PIPE_FUNC_GEQUAL: 271 op = LLVMRealUGE; 272 break; 273 default: 274 assert(0); 275 return lp_build_undef(gallivm, type); 276 } 277 278#if HAVE_LLVM >= 0x0207 279 cond = LLVMBuildFCmp(builder, op, a, b, ""); 280 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 281#else 282 if (type.length == 1) { 283 cond = LLVMBuildFCmp(builder, op, a, b, ""); 284 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 285 } 286 else { 287 unsigned i; 288 289 res = LLVMGetUndef(int_vec_type); 290 291 debug_printf("%s: warning: using slow element-wise float" 292 " vector comparison\n", __FUNCTION__); 293 for (i = 0; i < type.length; ++i) { 294 LLVMValueRef index = lp_build_const_int32(gallivm, i); 295 cond = LLVMBuildFCmp(builder, op, 296 LLVMBuildExtractElement(builder, a, index, ""), 297 LLVMBuildExtractElement(builder, b, index, ""), 298 ""); 299 cond = LLVMBuildSelect(builder, cond, 300 LLVMConstExtractElement(ones, index), 301 LLVMConstExtractElement(zeros, index), 302 ""); 303 res = LLVMBuildInsertElement(builder, res, cond, index, ""); 304 } 305 } 306#endif 307 } 308 else { 309 LLVMIntPredicate op; 310 switch(func) { 311 case PIPE_FUNC_EQUAL: 312 op = LLVMIntEQ; 313 break; 314 case PIPE_FUNC_NOTEQUAL: 315 op = LLVMIntNE; 316 break; 317 case PIPE_FUNC_LESS: 318 op = type.sign ? LLVMIntSLT : LLVMIntULT; 319 break; 320 case PIPE_FUNC_LEQUAL: 321 op = type.sign ? LLVMIntSLE : LLVMIntULE; 322 break; 323 case PIPE_FUNC_GREATER: 324 op = type.sign ? LLVMIntSGT : LLVMIntUGT; 325 break; 326 case PIPE_FUNC_GEQUAL: 327 op = type.sign ? LLVMIntSGE : LLVMIntUGE; 328 break; 329 default: 330 assert(0); 331 return lp_build_undef(gallivm, type); 332 } 333 334#if HAVE_LLVM >= 0x0207 335 cond = LLVMBuildICmp(builder, op, a, b, ""); 336 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 337#else 338 if (type.length == 1) { 339 cond = LLVMBuildICmp(builder, op, a, b, ""); 340 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 341 } 342 else { 343 unsigned i; 344 345 res = LLVMGetUndef(int_vec_type); 346 347 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 348 debug_printf("%s: using slow element-wise int" 349 " vector comparison\n", __FUNCTION__); 350 } 351 352 for(i = 0; i < type.length; ++i) { 353 LLVMValueRef index = lp_build_const_int32(gallivm, i); 354 cond = LLVMBuildICmp(builder, op, 355 LLVMBuildExtractElement(builder, a, index, ""), 356 LLVMBuildExtractElement(builder, b, index, ""), 357 ""); 358 cond = LLVMBuildSelect(builder, cond, 359 LLVMConstExtractElement(ones, index), 360 LLVMConstExtractElement(zeros, index), 361 ""); 362 res = LLVMBuildInsertElement(builder, res, cond, index, ""); 363 } 364 } 365#endif 366 } 367 368 return res; 369} 370 371 372 373/** 374 * Build code to compare two values 'a' and 'b' using the given func. 375 * \param func one of PIPE_FUNC_x 376 * The result values will be 0 for false or ~0 for true. 377 */ 378LLVMValueRef 379lp_build_cmp(struct lp_build_context *bld, 380 unsigned func, 381 LLVMValueRef a, 382 LLVMValueRef b) 383{ 384 return lp_build_compare(bld->gallivm, bld->type, func, a, b); 385} 386 387 388/** 389 * Return (mask & a) | (~mask & b); 390 */ 391LLVMValueRef 392lp_build_select_bitwise(struct lp_build_context *bld, 393 LLVMValueRef mask, 394 LLVMValueRef a, 395 LLVMValueRef b) 396{ 397 struct lp_type type = bld->type; 398 LLVMValueRef res; 399 400 assert(lp_check_value(type, a)); 401 assert(lp_check_value(type, b)); 402 403 if (a == b) { 404 return a; 405 } 406 407 if(type.floating) { 408 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); 409 a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 410 b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 411 } 412 413 a = LLVMBuildAnd(bld->builder, a, mask, ""); 414 415 /* This often gets translated to PANDN, but sometimes the NOT is 416 * pre-computed and stored in another constant. The best strategy depends 417 * on available registers, so it is not a big deal -- hopefully LLVM does 418 * the right decision attending the rest of the program. 419 */ 420 b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 421 422 res = LLVMBuildOr(bld->builder, a, b, ""); 423 424 if(type.floating) { 425 LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 426 res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 427 } 428 429 return res; 430} 431 432 433/** 434 * Return mask ? a : b; 435 * 436 * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 437 * will yield unpredictable results. 438 */ 439LLVMValueRef 440lp_build_select(struct lp_build_context *bld, 441 LLVMValueRef mask, 442 LLVMValueRef a, 443 LLVMValueRef b) 444{ 445 LLVMContextRef lc = bld->gallivm->context; 446 struct lp_type type = bld->type; 447 LLVMValueRef res; 448 449 assert(lp_check_value(type, a)); 450 assert(lp_check_value(type, b)); 451 452 if(a == b) 453 return a; 454 455 if (type.length == 1) { 456 mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1TypeInContext(lc), ""); 457 res = LLVMBuildSelect(bld->builder, mask, a, b, ""); 458 } 459 else if (util_cpu_caps.has_sse4_1 && 460 type.width * type.length == 128 && 461 !LLVMIsConstant(a) && 462 !LLVMIsConstant(b) && 463 !LLVMIsConstant(mask)) { 464 const char *intrinsic; 465 LLVMTypeRef arg_type; 466 LLVMValueRef args[3]; 467 468 if (type.floating && 469 type.width == 64) { 470 intrinsic = "llvm.x86.sse41.blendvpd"; 471 arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); 472 } else if (type.floating && 473 type.width == 32) { 474 intrinsic = "llvm.x86.sse41.blendvps"; 475 arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); 476 } else { 477 intrinsic = "llvm.x86.sse41.pblendvb"; 478 arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); 479 } 480 481 if (arg_type != bld->int_vec_type) { 482 mask = LLVMBuildBitCast(bld->builder, mask, arg_type, ""); 483 } 484 485 if (arg_type != bld->vec_type) { 486 a = LLVMBuildBitCast(bld->builder, a, arg_type, ""); 487 b = LLVMBuildBitCast(bld->builder, b, arg_type, ""); 488 } 489 490 args[0] = b; 491 args[1] = a; 492 args[2] = mask; 493 494 res = lp_build_intrinsic(bld->builder, intrinsic, 495 arg_type, args, Elements(args)); 496 497 if (arg_type != bld->vec_type) { 498 res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, ""); 499 } 500 } 501 else { 502 res = lp_build_select_bitwise(bld, mask, a, b); 503 } 504 505 return res; 506} 507 508 509/** 510 * Return mask ? a : b; 511 * 512 * mask is a TGSI_WRITEMASK_xxx. 513 */ 514LLVMValueRef 515lp_build_select_aos(struct lp_build_context *bld, 516 unsigned mask, 517 LLVMValueRef a, 518 LLVMValueRef b) 519{ 520 const struct lp_type type = bld->type; 521 const unsigned n = type.length; 522 unsigned i, j; 523 524 assert((mask & ~0xf) == 0); 525 assert(lp_check_value(type, a)); 526 assert(lp_check_value(type, b)); 527 528 if(a == b) 529 return a; 530 if((mask & 0xf) == 0xf) 531 return a; 532 if((mask & 0xf) == 0x0) 533 return b; 534 if(a == bld->undef || b == bld->undef) 535 return bld->undef; 536 537 /* 538 * There are three major ways of accomplishing this: 539 * - with a shuffle, 540 * - with a select, 541 * - or with a bit mask. 542 * 543 * Select isn't supported for vector types yet. 544 * The flip between these is empirical and might need to be. 545 */ 546 if (n <= 4) { 547 /* 548 * Shuffle. 549 */ 550 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 551 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 552 553 for(j = 0; j < n; j += 4) 554 for(i = 0; i < 4; ++i) 555 shuffles[j + i] = LLVMConstInt(elem_type, 556 (mask & (1 << i) ? 0 : n) + j + i, 557 0); 558 559 return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 560 } 561 else { 562#if 0 563 /* XXX: Unfortunately select of vectors do not work */ 564 /* Use a select */ 565 LLVMTypeRef elem_type = LLVMInt1Type(); 566 LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH]; 567 568 for(j = 0; j < n; j += 4) 569 for(i = 0; i < 4; ++i) 570 cond_vec[j + i] = LLVMConstInt(elem_type, 571 mask & (1 << i) ? 1 : 0, 0); 572 573 return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, ""); 574#else 575 LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask); 576 return lp_build_select(bld, mask_vec, a, b); 577#endif 578 } 579} 580