lp_bld_logic.c revision b3d4e5bd26a44870af7d2413cca7a6f576a0984a
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Helper functions for logical operations. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_cpu_detect.h" 37#include "util/u_debug.h" 38 39#include "lp_bld_type.h" 40#include "lp_bld_const.h" 41#include "lp_bld_intr.h" 42#include "lp_bld_logic.h" 43 44 45/* 46 * XXX 47 * 48 * Selection with vector conditional like 49 * 50 * select <4 x i1> %C, %A, %B 51 * 52 * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not 53 * supported on any backend. 54 * 55 * Expanding the boolean vector to full SIMD register width, as in 56 * 57 * sext <4 x i1> %C to <4 x i32> 58 * 59 * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 60 * it causes assertion failures in LLVM 2.6. It appears to work correctly on 61 * LLVM 2.7. 62 */ 63 64 65/** 66 * Build code to compare two values 'a' and 'b' of 'type' using the given func. 67 * \param func one of PIPE_FUNC_x 68 * The result values will be 0 for false or ~0 for true. 69 */ 70LLVMValueRef 71lp_build_compare(LLVMBuilderRef builder, 72 const struct lp_type type, 73 unsigned func, 74 LLVMValueRef a, 75 LLVMValueRef b) 76{ 77 LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 78 LLVMValueRef zeros = LLVMConstNull(int_vec_type); 79 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 80 LLVMValueRef cond; 81 LLVMValueRef res; 82 83 assert(func >= PIPE_FUNC_NEVER); 84 assert(func <= PIPE_FUNC_ALWAYS); 85 86 if(func == PIPE_FUNC_NEVER) 87 return zeros; 88 if(func == PIPE_FUNC_ALWAYS) 89 return ones; 90 91 /* TODO: optimize the constant case */ 92 93 /* XXX: It is not clear if we should use the ordered or unordered operators */ 94 95#if HAVE_LLVM < 0x0207 96#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 97 if(type.width * type.length == 128) { 98 if(type.floating && util_cpu_caps.has_sse) { 99 /* float[4] comparison */ 100 LLVMTypeRef vec_type = lp_build_vec_type(type); 101 LLVMValueRef args[3]; 102 unsigned cc; 103 boolean swap; 104 105 swap = FALSE; 106 switch(func) { 107 case PIPE_FUNC_EQUAL: 108 cc = 0; 109 break; 110 case PIPE_FUNC_NOTEQUAL: 111 cc = 4; 112 break; 113 case PIPE_FUNC_LESS: 114 cc = 1; 115 break; 116 case PIPE_FUNC_LEQUAL: 117 cc = 2; 118 break; 119 case PIPE_FUNC_GREATER: 120 cc = 1; 121 swap = TRUE; 122 break; 123 case PIPE_FUNC_GEQUAL: 124 cc = 2; 125 swap = TRUE; 126 break; 127 default: 128 assert(0); 129 return lp_build_undef(type); 130 } 131 132 if(swap) { 133 args[0] = b; 134 args[1] = a; 135 } 136 else { 137 args[0] = a; 138 args[1] = b; 139 } 140 141 args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 142 res = lp_build_intrinsic(builder, 143 "llvm.x86.sse.cmp.ps", 144 vec_type, 145 args, 3); 146 res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 147 return res; 148 } 149 else if(util_cpu_caps.has_sse2) { 150 /* int[4] comparison */ 151 static const struct { 152 unsigned swap:1; 153 unsigned eq:1; 154 unsigned gt:1; 155 unsigned not:1; 156 } table[] = { 157 {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 158 {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 159 {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 160 {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 161 {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 162 {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 163 {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 164 {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 165 }; 166 const char *pcmpeq; 167 const char *pcmpgt; 168 LLVMValueRef args[2]; 169 LLVMValueRef res; 170 LLVMTypeRef vec_type = lp_build_vec_type(type); 171 172 switch (type.width) { 173 case 8: 174 pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 175 pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 176 break; 177 case 16: 178 pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 179 pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 180 break; 181 case 32: 182 pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 183 pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 184 break; 185 default: 186 assert(0); 187 return lp_build_undef(type); 188 } 189 190 /* There are no unsigned comparison instructions. So flip the sign bit 191 * so that the results match. 192 */ 193 if (table[func].gt && !type.sign) { 194 LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); 195 a = LLVMBuildXor(builder, a, msb, ""); 196 b = LLVMBuildXor(builder, b, msb, ""); 197 } 198 199 if(table[func].swap) { 200 args[0] = b; 201 args[1] = a; 202 } 203 else { 204 args[0] = a; 205 args[1] = b; 206 } 207 208 if(table[func].eq) 209 res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 210 else if (table[func].gt) 211 res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 212 else 213 res = LLVMConstNull(vec_type); 214 215 if(table[func].not) 216 res = LLVMBuildNot(builder, res, ""); 217 218 return res; 219 } 220 } /* if (type.width * type.length == 128) */ 221#endif 222#endif /* HAVE_LLVM < 0x0207 */ 223 224 if(type.floating) { 225 LLVMRealPredicate op; 226 switch(func) { 227 case PIPE_FUNC_NEVER: 228 op = LLVMRealPredicateFalse; 229 break; 230 case PIPE_FUNC_ALWAYS: 231 op = LLVMRealPredicateTrue; 232 break; 233 case PIPE_FUNC_EQUAL: 234 op = LLVMRealUEQ; 235 break; 236 case PIPE_FUNC_NOTEQUAL: 237 op = LLVMRealUNE; 238 break; 239 case PIPE_FUNC_LESS: 240 op = LLVMRealULT; 241 break; 242 case PIPE_FUNC_LEQUAL: 243 op = LLVMRealULE; 244 break; 245 case PIPE_FUNC_GREATER: 246 op = LLVMRealUGT; 247 break; 248 case PIPE_FUNC_GEQUAL: 249 op = LLVMRealUGE; 250 break; 251 default: 252 assert(0); 253 return lp_build_undef(type); 254 } 255 256#if HAVE_LLVM >= 0x0207 257 cond = LLVMBuildFCmp(builder, op, a, b, ""); 258 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 259#else 260 if (type.length == 1) { 261 cond = LLVMBuildFCmp(builder, op, a, b, ""); 262 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 263 } 264 else { 265 unsigned i; 266 267 res = LLVMGetUndef(int_vec_type); 268 269 debug_printf("%s: warning: using slow element-wise float" 270 " vector comparison\n", __FUNCTION__); 271 for (i = 0; i < type.length; ++i) { 272 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 273 cond = LLVMBuildFCmp(builder, op, 274 LLVMBuildExtractElement(builder, a, index, ""), 275 LLVMBuildExtractElement(builder, b, index, ""), 276 ""); 277 cond = LLVMBuildSelect(builder, cond, 278 LLVMConstExtractElement(ones, index), 279 LLVMConstExtractElement(zeros, index), 280 ""); 281 res = LLVMBuildInsertElement(builder, res, cond, index, ""); 282 } 283 } 284#endif 285 } 286 else { 287 LLVMIntPredicate op; 288 switch(func) { 289 case PIPE_FUNC_EQUAL: 290 op = LLVMIntEQ; 291 break; 292 case PIPE_FUNC_NOTEQUAL: 293 op = LLVMIntNE; 294 break; 295 case PIPE_FUNC_LESS: 296 op = type.sign ? LLVMIntSLT : LLVMIntULT; 297 break; 298 case PIPE_FUNC_LEQUAL: 299 op = type.sign ? LLVMIntSLE : LLVMIntULE; 300 break; 301 case PIPE_FUNC_GREATER: 302 op = type.sign ? LLVMIntSGT : LLVMIntUGT; 303 break; 304 case PIPE_FUNC_GEQUAL: 305 op = type.sign ? LLVMIntSGE : LLVMIntUGE; 306 break; 307 default: 308 assert(0); 309 return lp_build_undef(type); 310 } 311 312#if HAVE_LLVM >= 0x0207 313 cond = LLVMBuildICmp(builder, op, a, b, ""); 314 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 315#else 316 if (type.length == 1) { 317 cond = LLVMBuildICmp(builder, op, a, b, ""); 318 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 319 } 320 else { 321 unsigned i; 322 323 res = LLVMGetUndef(int_vec_type); 324 325 debug_printf("%s: warning: using slow element-wise int" 326 " vector comparison\n", __FUNCTION__); 327 328 for(i = 0; i < type.length; ++i) { 329 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 330 cond = LLVMBuildICmp(builder, op, 331 LLVMBuildExtractElement(builder, a, index, ""), 332 LLVMBuildExtractElement(builder, b, index, ""), 333 ""); 334 cond = LLVMBuildSelect(builder, cond, 335 LLVMConstExtractElement(ones, index), 336 LLVMConstExtractElement(zeros, index), 337 ""); 338 res = LLVMBuildInsertElement(builder, res, cond, index, ""); 339 } 340 } 341#endif 342 } 343 344 return res; 345} 346 347 348 349/** 350 * Build code to compare two values 'a' and 'b' using the given func. 351 * \param func one of PIPE_FUNC_x 352 * The result values will be 0 for false or ~0 for true. 353 */ 354LLVMValueRef 355lp_build_cmp(struct lp_build_context *bld, 356 unsigned func, 357 LLVMValueRef a, 358 LLVMValueRef b) 359{ 360 return lp_build_compare(bld->builder, bld->type, func, a, b); 361} 362 363 364/** 365 * Return mask ? a : b; 366 * 367 * mask is a bitwise mask, composed of 0 or ~0 for each element. 368 */ 369LLVMValueRef 370lp_build_select(struct lp_build_context *bld, 371 LLVMValueRef mask, 372 LLVMValueRef a, 373 LLVMValueRef b) 374{ 375 struct lp_type type = bld->type; 376 LLVMValueRef res; 377 378 if(a == b) 379 return a; 380 381 if (type.length == 1) { 382 mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); 383 res = LLVMBuildSelect(bld->builder, mask, a, b, ""); 384 } 385 else { 386 if(type.floating) { 387 LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 388 a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 389 b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 390 } 391 392 a = LLVMBuildAnd(bld->builder, a, mask, ""); 393 394 /* This often gets translated to PANDN, but sometimes the NOT is 395 * pre-computed and stored in another constant. The best strategy depends 396 * on available registers, so it is not a big deal -- hopefully LLVM does 397 * the right decision attending the rest of the program. 398 */ 399 b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 400 401 res = LLVMBuildOr(bld->builder, a, b, ""); 402 403 if(type.floating) { 404 LLVMTypeRef vec_type = lp_build_vec_type(type); 405 res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 406 } 407 } 408 409 return res; 410} 411 412 413LLVMValueRef 414lp_build_select_aos(struct lp_build_context *bld, 415 LLVMValueRef a, 416 LLVMValueRef b, 417 const boolean cond[4]) 418{ 419 const struct lp_type type = bld->type; 420 const unsigned n = type.length; 421 unsigned i, j; 422 423 if(a == b) 424 return a; 425 if(cond[0] && cond[1] && cond[2] && cond[3]) 426 return a; 427 if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) 428 return b; 429 if(a == bld->undef || b == bld->undef) 430 return bld->undef; 431 432 /* 433 * There are three major ways of accomplishing this: 434 * - with a shuffle, 435 * - with a select, 436 * - or with a bit mask. 437 * 438 * Select isn't supported for vector types yet. 439 * The flip between these is empirical and might need to be. 440 */ 441 if (n <= 4) { 442 /* 443 * Shuffle. 444 */ 445 LLVMTypeRef elem_type = LLVMInt32Type(); 446 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 447 448 for(j = 0; j < n; j += 4) 449 for(i = 0; i < 4; ++i) 450 shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); 451 452 return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 453 } 454 else { 455#if 0 456 /* XXX: Unfortunately select of vectors do not work */ 457 /* Use a select */ 458 LLVMTypeRef elem_type = LLVMInt1Type(); 459 LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; 460 461 for(j = 0; j < n; j += 4) 462 for(i = 0; i < 4; ++i) 463 cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); 464 465 return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); 466#else 467 LLVMValueRef mask = lp_build_const_mask_aos(type, cond); 468 return lp_build_select(bld, mask, a, b); 469#endif 470 } 471} 472 473 474/** Return (a & ~b) */ 475LLVMValueRef 476lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) 477{ 478 b = LLVMBuildNot(bld->builder, b, ""); 479 b = LLVMBuildAnd(bld->builder, a, b, ""); 480 return b; 481} 482