lp_bld_logic.c revision 2297bc9233be014b7b5aa037769209fbe9f6a66c
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Helper functions for logical operations. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_cpu_detect.h" 37 38#include "lp_bld_type.h" 39#include "lp_bld_const.h" 40#include "lp_bld_intr.h" 41#include "lp_bld_logic.h" 42 43 44/** 45 * Build code to compare two values 'a' and 'b' of 'type' using the given func. 46 * \param func one of PIPE_FUNC_x 47 */ 48LLVMValueRef 49lp_build_compare(LLVMBuilderRef builder, 50 const struct lp_type type, 51 unsigned func, 52 LLVMValueRef a, 53 LLVMValueRef b) 54{ 55 LLVMTypeRef vec_type = lp_build_vec_type(type); 56 LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 57 LLVMValueRef zeros = LLVMConstNull(int_vec_type); 58 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 59 LLVMValueRef cond; 60 LLVMValueRef res; 61 unsigned i; 62 63 assert(func >= PIPE_FUNC_NEVER); 64 assert(func <= PIPE_FUNC_ALWAYS); 65 66 if(func == PIPE_FUNC_NEVER) 67 return zeros; 68 if(func == PIPE_FUNC_ALWAYS) 69 return ones; 70 71 /* TODO: optimize the constant case */ 72 73 /* XXX: It is not clear if we should use the ordered or unordered operators */ 74 75#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 76 if(type.width * type.length == 128) { 77 if(type.floating && util_cpu_caps.has_sse) { 78 /* float[4] comparison */ 79 LLVMValueRef args[3]; 80 unsigned cc; 81 boolean swap; 82 83 swap = FALSE; 84 switch(func) { 85 case PIPE_FUNC_EQUAL: 86 cc = 0; 87 break; 88 case PIPE_FUNC_NOTEQUAL: 89 cc = 4; 90 break; 91 case PIPE_FUNC_LESS: 92 cc = 1; 93 break; 94 case PIPE_FUNC_LEQUAL: 95 cc = 2; 96 break; 97 case PIPE_FUNC_GREATER: 98 cc = 1; 99 swap = TRUE; 100 break; 101 case PIPE_FUNC_GEQUAL: 102 cc = 2; 103 swap = TRUE; 104 break; 105 default: 106 assert(0); 107 return lp_build_undef(type); 108 } 109 110 if(swap) { 111 args[0] = b; 112 args[1] = a; 113 } 114 else { 115 args[0] = a; 116 args[1] = b; 117 } 118 119 args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); 120 res = lp_build_intrinsic(builder, 121 "llvm.x86.sse.cmp.ps", 122 vec_type, 123 args, 3); 124 res = LLVMBuildBitCast(builder, res, int_vec_type, ""); 125 return res; 126 } 127 else if(util_cpu_caps.has_sse2) { 128 /* int[4] comparison */ 129 static const struct { 130 unsigned swap:1; 131 unsigned eq:1; 132 unsigned gt:1; 133 unsigned not:1; 134 } table[] = { 135 {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ 136 {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ 137 {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ 138 {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ 139 {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ 140 {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ 141 {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ 142 {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ 143 }; 144 const char *pcmpeq; 145 const char *pcmpgt; 146 LLVMValueRef args[2]; 147 LLVMValueRef res; 148 149 switch (type.width) { 150 case 8: 151 pcmpeq = "llvm.x86.sse2.pcmpeq.b"; 152 pcmpgt = "llvm.x86.sse2.pcmpgt.b"; 153 break; 154 case 16: 155 pcmpeq = "llvm.x86.sse2.pcmpeq.w"; 156 pcmpgt = "llvm.x86.sse2.pcmpgt.w"; 157 break; 158 case 32: 159 pcmpeq = "llvm.x86.sse2.pcmpeq.d"; 160 pcmpgt = "llvm.x86.sse2.pcmpgt.d"; 161 break; 162 default: 163 assert(0); 164 return lp_build_undef(type); 165 } 166 167 /* There are no signed byte and unsigned word/dword comparison 168 * instructions. So flip the sign bit so that the results match. 169 */ 170 if(table[func].gt && 171 ((type.width == 8 && type.sign) || 172 (type.width != 8 && !type.sign))) { 173 LLVMValueRef msb = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1)); 174 a = LLVMBuildXor(builder, a, msb, ""); 175 b = LLVMBuildXor(builder, b, msb, ""); 176 } 177 178 if(table[func].swap) { 179 args[0] = b; 180 args[1] = a; 181 } 182 else { 183 args[0] = a; 184 args[1] = b; 185 } 186 187 if(table[func].eq) 188 res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); 189 else if (table[func].gt) 190 res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); 191 else 192 res = LLVMConstNull(vec_type); 193 194 if(table[func].not) 195 res = LLVMBuildNot(builder, res, ""); 196 197 return res; 198 } 199 } 200#endif 201 202 if(type.floating) { 203 LLVMRealPredicate op; 204 switch(func) { 205 case PIPE_FUNC_NEVER: 206 op = LLVMRealPredicateFalse; 207 break; 208 case PIPE_FUNC_ALWAYS: 209 op = LLVMRealPredicateTrue; 210 break; 211 case PIPE_FUNC_EQUAL: 212 op = LLVMRealUEQ; 213 break; 214 case PIPE_FUNC_NOTEQUAL: 215 op = LLVMRealUNE; 216 break; 217 case PIPE_FUNC_LESS: 218 op = LLVMRealULT; 219 break; 220 case PIPE_FUNC_LEQUAL: 221 op = LLVMRealULE; 222 break; 223 case PIPE_FUNC_GREATER: 224 op = LLVMRealUGT; 225 break; 226 case PIPE_FUNC_GEQUAL: 227 op = LLVMRealUGE; 228 break; 229 default: 230 assert(0); 231 return lp_build_undef(type); 232 } 233 234#if 0 235 /* XXX: Although valid IR, no LLVM target currently support this */ 236 cond = LLVMBuildFCmp(builder, op, a, b, ""); 237 res = LLVMBuildSelect(builder, cond, ones, zeros, ""); 238#else 239 debug_printf("%s: warning: using slow element-wise vector comparison\n", 240 __FUNCTION__); 241 res = LLVMGetUndef(int_vec_type); 242 for(i = 0; i < type.length; ++i) { 243 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 244 cond = LLVMBuildFCmp(builder, op, 245 LLVMBuildExtractElement(builder, a, index, ""), 246 LLVMBuildExtractElement(builder, b, index, ""), 247 ""); 248 cond = LLVMBuildSelect(builder, cond, 249 LLVMConstExtractElement(ones, index), 250 LLVMConstExtractElement(zeros, index), 251 ""); 252 res = LLVMBuildInsertElement(builder, res, cond, index, ""); 253 } 254#endif 255 } 256 else { 257 LLVMIntPredicate op; 258 switch(func) { 259 case PIPE_FUNC_EQUAL: 260 op = LLVMIntEQ; 261 break; 262 case PIPE_FUNC_NOTEQUAL: 263 op = LLVMIntNE; 264 break; 265 case PIPE_FUNC_LESS: 266 op = type.sign ? LLVMIntSLT : LLVMIntULT; 267 break; 268 case PIPE_FUNC_LEQUAL: 269 op = type.sign ? LLVMIntSLE : LLVMIntULE; 270 break; 271 case PIPE_FUNC_GREATER: 272 op = type.sign ? LLVMIntSGT : LLVMIntUGT; 273 break; 274 case PIPE_FUNC_GEQUAL: 275 op = type.sign ? LLVMIntSGE : LLVMIntUGE; 276 break; 277 default: 278 assert(0); 279 return lp_build_undef(type); 280 } 281 282#if 0 283 /* XXX: Although valid IR, no LLVM target currently support this */ 284 cond = LLVMBuildICmp(builder, op, a, b, ""); 285 res = LLVMBuildSelect(builder, cond, ones, zeros, ""); 286#else 287 debug_printf("%s: warning: using slow element-wise int vector comparison\n", 288 __FUNCTION__); 289 res = LLVMGetUndef(int_vec_type); 290 for(i = 0; i < type.length; ++i) { 291 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); 292 cond = LLVMBuildICmp(builder, op, 293 LLVMBuildExtractElement(builder, a, index, ""), 294 LLVMBuildExtractElement(builder, b, index, ""), 295 ""); 296 cond = LLVMBuildSelect(builder, cond, 297 LLVMConstExtractElement(ones, index), 298 LLVMConstExtractElement(zeros, index), 299 ""); 300 res = LLVMBuildInsertElement(builder, res, cond, index, ""); 301 } 302#endif 303 } 304 305 return res; 306} 307 308 309 310/** 311 * Build code to compare two values 'a' and 'b' using the given func. 312 * \param func one of PIPE_FUNC_x 313 */ 314LLVMValueRef 315lp_build_cmp(struct lp_build_context *bld, 316 unsigned func, 317 LLVMValueRef a, 318 LLVMValueRef b) 319{ 320 return lp_build_compare(bld->builder, bld->type, func, a, b); 321} 322 323 324LLVMValueRef 325lp_build_select(struct lp_build_context *bld, 326 LLVMValueRef mask, 327 LLVMValueRef a, 328 LLVMValueRef b) 329{ 330 struct lp_type type = bld->type; 331 LLVMValueRef res; 332 333 if(a == b) 334 return a; 335 336 if(type.floating) { 337 LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); 338 a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); 339 b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); 340 } 341 342 a = LLVMBuildAnd(bld->builder, a, mask, ""); 343 344 /* This often gets translated to PANDN, but sometimes the NOT is 345 * pre-computed and stored in another constant. The best strategy depends 346 * on available registers, so it is not a big deal -- hopefully LLVM does 347 * the right decision attending the rest of the program. 348 */ 349 b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); 350 351 res = LLVMBuildOr(bld->builder, a, b, ""); 352 353 if(type.floating) { 354 LLVMTypeRef vec_type = lp_build_vec_type(type); 355 res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); 356 } 357 358 return res; 359} 360 361 362LLVMValueRef 363lp_build_select_aos(struct lp_build_context *bld, 364 LLVMValueRef a, 365 LLVMValueRef b, 366 const boolean cond[4]) 367{ 368 const struct lp_type type = bld->type; 369 const unsigned n = type.length; 370 unsigned i, j; 371 372 if(a == b) 373 return a; 374 if(cond[0] && cond[1] && cond[2] && cond[3]) 375 return a; 376 if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) 377 return b; 378 if(a == bld->undef || b == bld->undef) 379 return bld->undef; 380 381 /* 382 * There are three major ways of accomplishing this: 383 * - with a shuffle, 384 * - with a select, 385 * - or with a bit mask. 386 * 387 * Select isn't supported for vector types yet. 388 * The flip between these is empirical and might need to be. 389 */ 390 if (n <= 4) { 391 /* 392 * Shuffle. 393 */ 394 LLVMTypeRef elem_type = LLVMInt32Type(); 395 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 396 397 for(j = 0; j < n; j += 4) 398 for(i = 0; i < 4; ++i) 399 shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); 400 401 return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); 402 } 403 else { 404#if 0 405 /* XXX: Unfortunately select of vectors do not work */ 406 /* Use a select */ 407 LLVMTypeRef elem_type = LLVMInt1Type(); 408 LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; 409 410 for(j = 0; j < n; j += 4) 411 for(i = 0; i < 4; ++i) 412 cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); 413 414 return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); 415#else 416 LLVMValueRef mask = lp_build_const_mask_aos(type, cond); 417 return lp_build_select(bld, mask, a, b); 418#endif 419 } 420} 421