lp_bld_logic.c revision 489af2a3ba467e4341cb8504a0e59cf5828864d4
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Helper functions for logical operations. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_cpu_detect.h" 37#include "util/u_debug.h" 38 39#include "lp_bld_type.h" 40#include "lp_bld_const.h" 41#include "lp_bld_intr.h" 42#include "lp_bld_logic.h" 43 44 45/* 46 * XXX 47 * 48 * Selection with vector conditional like 49 * 50 * select <4 x i1> %C, %A, %B 51 * 52 * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not 53 * supported on any backend. 54 * 55 * Expanding the boolean vector to full SIMD register width, as in 56 * 57 * sext <4 x i1> %C to <4 x i32> 58 * 59 * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 60 * it causes assertion failures in LLVM 2.6. It appears to work correctly on 61 * LLVM 2.7. 62 */ 63 64 65/** 66 * Build code to compare two values 'a' and 'b' of 'type' using the given func. 67 * \param func one of PIPE_FUNC_x 68 * The result values will be 0 for false or ~0 for true. 69 */ 70LLVMValueRef 71lp_build_compare(LLVMBuilderRef builder, 72 const struct lp_type type, 73 unsigned func, 74 LLVMValueRef a, 75 LLVMValueRef b) 76{ 77 LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 78 LLVMValueRef zeros = LLVMConstNull(int_vec_type); 79 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 80 LLVMValueRef cond; 81 LLVMValueRef res; 82 83 assert(func >= PIPE_FUNC_NEVER); 84 assert(func <= PIPE_FUNC_ALWAYS); 85 86 if(func == PIPE_FUNC_NEVER) 87 return zeros; 88 if(func == PIPE_FUNC_ALWAYS) 89 return ones; 90 91 /* TODO: optimize the constant case */ 92 93 /* XXX: It is not clear if we should use the ordered or unordered operators */ 94 95#if HAVE_LLVM < 0x0207 96#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 97 if(type.width * type.length == 128) { 98 if(type.floating && util_cpu_caps.has_sse) { 99 /* float[4] comparison */ 100 LLVMTypeRef vec_type = lp_build_vec_type(type); 101 LLVMValueRef args[3]; 102 unsigned cc; 103 boolean swap; 104 105 swap = FALSE; 106 switch(func) { 107 case PIPE_FUNC_EQUAL: 108 cc = 0; 109 break; 110 case PIPE_FUNC_NOTEQUAL: 111 cc = 4; 112 break; 113 case PIPE_FUNC_LESS: 114 cc = 1; 115 break; 116 case PIPE_FUNC_LEQUAL: 117 cc = 2; 118 break; 119 case PIPE_FUNC_GREATER: 120 cc = 1; 121 swap = TRUE; 122 break; 123 case PIPE_FUNC_GEQUAL: 124 cc = 2; 125 swap = TRUE; 126 break; 127 default: 128 assert(0); 129 return lp_build_undef(type); 130 } 131 132 if(swap) { 133 args[0] = b; 134 args[1] = a; 135 } 136 else { 137 args[0] = a; 138 args[1] = b; 139 } 140 141 args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 142 res = lp_build_intrinsic(builder, 143 "llvm.x86.sse.cmp.ps", 144 vec_type, 145 args, 3); 146 res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 147 return res; 148 } 149 else if(util_cpu_caps.has_sse2) { 150 /* int[4] comparison */ 151 static const struct { 152 unsigned swap:1; 153 unsigned eq:1; 154 unsigned gt:1; 155 unsigned not:1; 156 } table[] = { 157 {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 158 {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 159 {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 160 {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 161 {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 162 {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 163 {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 164 {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 165 }; 166 const char *pcmpeq; 167 const char *pcmpgt; 168 LLVMValueRef args[2]; 169 LLVMValueRef res; 170 LLVMTypeRef vec_type = lp_build_vec_type(type); 171 172 switch (type.width) { 173 case 8: 174 pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 175 pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 176 break; 177 case 16: 178 pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 179 pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 180 break; 181 case 32: 182 pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 183 pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 184 break; 185 default: 186 assert(0); 187 return lp_build_undef(type); 188 } 189 190 /* There are no signed byte and unsigned word/dword comparison 191 * instructions. So flip the sign bit so that the results match. 192 */ 193 if(table[func].gt && 194 ((type.width == 8 && type.sign) || 195 (type.width != 8 && !type.sign))) { 196 LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); 197 a = LLVMBuildXor(builder, a, msb, ""); 198 b = LLVMBuildXor(builder, b, msb, ""); 199 } 200 201 if(table[func].swap) { 202 args[0] = b; 203 args[1] = a; 204 } 205 else { 206 args[0] = a; 207 args[1] = b; 208 } 209 210 if(table[func].eq) 211 res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 212 else if (table[func].gt) 213 res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 214 else 215 res = LLVMConstNull(vec_type); 216 217 if(table[func].not) 218 res = LLVMBuildNot(builder, res, ""); 219 220 return res; 221 } 222 } /* if (type.width * type.length == 128) */ 223#endif 224#endif /* HAVE_LLVM < 0x0207 */ 225 226 if(type.floating) { 227 LLVMRealPredicate op; 228 switch(func) { 229 case PIPE_FUNC_NEVER: 230 op = LLVMRealPredicateFalse; 231 break; 232 case PIPE_FUNC_ALWAYS: 233 op = LLVMRealPredicateTrue; 234 break; 235 case PIPE_FUNC_EQUAL: 236 op = LLVMRealUEQ; 237 break; 238 case PIPE_FUNC_NOTEQUAL: 239 op = LLVMRealUNE; 240 break; 241 case PIPE_FUNC_LESS: 242 op = LLVMRealULT; 243 break; 244 case PIPE_FUNC_LEQUAL: 245 op = LLVMRealULE; 246 break; 247 case PIPE_FUNC_GREATER: 248 op = LLVMRealUGT; 249 break; 250 case PIPE_FUNC_GEQUAL: 251 op = LLVMRealUGE; 252 break; 253 default: 254 assert(0); 255 return lp_build_undef(type); 256 } 257 258#if HAVE_LLVM >= 0x0207 259 cond = LLVMBuildFCmp(builder, op, a, b, ""); 260 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 261#else 262 if (type.length == 1) { 263 cond = LLVMBuildFCmp(builder, op, a, b, ""); 264 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 265 } 266 else { 267 unsigned i; 268 269 res = LLVMGetUndef(int_vec_type); 270 271 debug_printf("%s: warning: using slow element-wise float" 272 " vector comparison\n", __FUNCTION__); 273 for (i = 0; i < type.length; ++i) { 274 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 275 cond = LLVMBuildFCmp(builder, op, 276 LLVMBuildExtractElement(builder, a, index, ""), 277 LLVMBuildExtractElement(builder, b, index, ""), 278 ""); 279 cond = LLVMBuildSelect(builder, cond, 280 LLVMConstExtractElement(ones, index), 281 LLVMConstExtractElement(zeros, index), 282 ""); 283 res = LLVMBuildInsertElement(builder, res, cond, index, ""); 284 } 285 } 286#endif 287 } 288 else { 289 LLVMIntPredicate op; 290 switch(func) { 291 case PIPE_FUNC_EQUAL: 292 op = LLVMIntEQ; 293 break; 294 case PIPE_FUNC_NOTEQUAL: 295 op = LLVMIntNE; 296 break; 297 case PIPE_FUNC_LESS: 298 op = type.sign ? LLVMIntSLT : LLVMIntULT; 299 break; 300 case PIPE_FUNC_LEQUAL: 301 op = type.sign ? LLVMIntSLE : LLVMIntULE; 302 break; 303 case PIPE_FUNC_GREATER: 304 op = type.sign ? LLVMIntSGT : LLVMIntUGT; 305 break; 306 case PIPE_FUNC_GEQUAL: 307 op = type.sign ? LLVMIntSGE : LLVMIntUGE; 308 break; 309 default: 310 assert(0); 311 return lp_build_undef(type); 312 } 313 314#if HAVE_LLVM >= 0x0207 315 cond = LLVMBuildICmp(builder, op, a, b, ""); 316 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 317#else 318 if (type.length == 1) { 319 cond = LLVMBuildICmp(builder, op, a, b, ""); 320 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 321 } 322 else { 323 unsigned i; 324 325 res = LLVMGetUndef(int_vec_type); 326 327 debug_printf("%s: warning: using slow element-wise int" 328 " vector comparison\n", __FUNCTION__); 329 330 for(i = 0; i < type.length; ++i) { 331 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 332 cond = LLVMBuildICmp(builder, op, 333 LLVMBuildExtractElement(builder, a, index, ""), 334 LLVMBuildExtractElement(builder, b, index, ""), 335 ""); 336 cond = LLVMBuildSelect(builder, cond, 337 LLVMConstExtractElement(ones, index), 338 LLVMConstExtractElement(zeros, index), 339 ""); 340 res = LLVMBuildInsertElement(builder, res, cond, index, ""); 341 } 342 } 343#endif 344 } 345 346 return res; 347} 348 349 350 351/** 352 * Build code to compare two values 'a' and 'b' using the given func. 353 * \param func one of PIPE_FUNC_x 354 * The result values will be 0 for false or ~0 for true. 355 */ 356LLVMValueRef 357lp_build_cmp(struct lp_build_context *bld, 358 unsigned func, 359 LLVMValueRef a, 360 LLVMValueRef b) 361{ 362 return lp_build_compare(bld->builder, bld->type, func, a, b); 363} 364 365 366/** 367 * Return mask ? a : b; 368 * 369 * mask is a bitwise mask, composed of 0 or ~0 for each element. 370 */ 371LLVMValueRef 372lp_build_select(struct lp_build_context *bld, 373 LLVMValueRef mask, 374 LLVMValueRef a, 375 LLVMValueRef b) 376{ 377 struct lp_type type = bld->type; 378 LLVMValueRef res; 379 380 if(a == b) 381 return a; 382 383 if (type.length == 1) { 384 mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); 385 res = LLVMBuildSelect(bld->builder, mask, a, b, ""); 386 } 387 else { 388 if(type.floating) { 389 LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 390 a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 391 b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 392 } 393 394 a = LLVMBuildAnd(bld->builder, a, mask, ""); 395 396 /* This often gets translated to PANDN, but sometimes the NOT is 397 * pre-computed and stored in another constant. The best strategy depends 398 * on available registers, so it is not a big deal -- hopefully LLVM does 399 * the right decision attending the rest of the program. 400 */ 401 b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 402 403 res = LLVMBuildOr(bld->builder, a, b, ""); 404 405 if(type.floating) { 406 LLVMTypeRef vec_type = lp_build_vec_type(type); 407 res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 408 } 409 } 410 411 return res; 412} 413 414 415LLVMValueRef 416lp_build_select_aos(struct lp_build_context *bld, 417 LLVMValueRef a, 418 LLVMValueRef b, 419 const boolean cond[4]) 420{ 421 const struct lp_type type = bld->type; 422 const unsigned n = type.length; 423 unsigned i, j; 424 425 if(a == b) 426 return a; 427 if(cond[0] && cond[1] && cond[2] && cond[3]) 428 return a; 429 if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) 430 return b; 431 if(a == bld->undef || b == bld->undef) 432 return bld->undef; 433 434 /* 435 * There are three major ways of accomplishing this: 436 * - with a shuffle, 437 * - with a select, 438 * - or with a bit mask. 439 * 440 * Select isn't supported for vector types yet. 441 * The flip between these is empirical and might need to be. 442 */ 443 if (n <= 4) { 444 /* 445 * Shuffle. 446 */ 447 LLVMTypeRef elem_type = LLVMInt32Type(); 448 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 449 450 for(j = 0; j < n; j += 4) 451 for(i = 0; i < 4; ++i) 452 shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); 453 454 return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 455 } 456 else { 457#if 0 458 /* XXX: Unfortunately select of vectors do not work */ 459 /* Use a select */ 460 LLVMTypeRef elem_type = LLVMInt1Type(); 461 LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; 462 463 for(j = 0; j < n; j += 4) 464 for(i = 0; i < 4; ++i) 465 cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); 466 467 return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); 468#else 469 LLVMValueRef mask = lp_build_const_mask_aos(type, cond); 470 return lp_build_select(bld, mask, a, b); 471#endif 472 } 473} 474 475LLVMValueRef 476lp_build_alloca(struct lp_build_context *bld) 477{ 478 const struct lp_type type = bld->type; 479 480 if (type.length > 1) { /*vector*/ 481 return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), ""); 482 } else { /*scalar*/ 483 return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); 484 } 485} 486