lp_bld_swizzle.c revision efc82aef35a2aac5d2ed9774f6d28f2626796416
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Helper functions for swizzling/shuffling. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_debug.h" 37 38#include "lp_bld_type.h" 39#include "lp_bld_const.h" 40#include "lp_bld_init.h" 41#include "lp_bld_logic.h" 42#include "lp_bld_swizzle.h" 43 44 45LLVMValueRef 46lp_build_broadcast(struct gallivm_state *gallivm, 47 LLVMTypeRef vec_type, 48 LLVMValueRef scalar) 49{ 50 const unsigned n = LLVMGetVectorSize(vec_type); 51 LLVMValueRef res; 52 unsigned i; 53 54 res = LLVMGetUndef(vec_type); 55 for(i = 0; i < n; ++i) { 56 LLVMValueRef index = lp_build_const_int32(gallivm, i); 57 res = LLVMBuildInsertElement(gallivm->builder, res, scalar, index, ""); 58 } 59 60 return res; 61} 62 63 64/** 65 * Broadcast 66 */ 67LLVMValueRef 68lp_build_broadcast_scalar(struct lp_build_context *bld, 69 LLVMValueRef scalar) 70{ 71 const struct lp_type type = bld->type; 72 73 assert(lp_check_elem_type(type, LLVMTypeOf(scalar))); 74 75 if (type.length == 1) { 76 return scalar; 77 } 78 else { 79 LLVMValueRef res; 80 81#if HAVE_LLVM >= 0x207 82 /* The shuffle vector is always made of int32 elements */ 83 struct lp_type i32_vec_type = lp_type_int_vec(32); 84 i32_vec_type.length = type.length; 85 86 res = LLVMBuildInsertElement(bld->builder, bld->undef, scalar, 87 lp_build_const_int32(bld->gallivm, 0), ""); 88 res = LLVMBuildShuffleVector(bld->builder, res, bld->undef, 89 lp_build_const_int_vec(bld->gallivm, i32_vec_type, 0), ""); 90#else 91 /* XXX: The above path provokes a bug in LLVM 2.6 */ 92 unsigned i; 93 res = bld->undef; 94 for(i = 0; i < type.length; ++i) { 95 LLVMValueRef index = lp_build_const_int32(bld->gallivm, i); 96 res = LLVMBuildInsertElement(bld->builder, res, scalar, index, ""); 97 } 98#endif 99 return res; 100 } 101} 102 103 104/** 105 * Combined extract and broadcast (or a mere shuffle when the two types match) 106 */ 107LLVMValueRef 108lp_build_extract_broadcast(struct gallivm_state *gallivm, 109 struct lp_type src_type, 110 struct lp_type dst_type, 111 LLVMValueRef vector, 112 LLVMValueRef index) 113{ 114 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 115 LLVMValueRef res; 116 117 assert(src_type.floating == dst_type.floating); 118 assert(src_type.width == dst_type.width); 119 120 assert(lp_check_value(src_type, vector)); 121 assert(LLVMTypeOf(index) == i32t); 122 123 if (src_type.length == 1) { 124 if (dst_type.length == 1) { 125 /* 126 * Trivial scalar -> scalar. 127 */ 128 129 res = vector; 130 } 131 else { 132 /* 133 * Broadcast scalar -> vector. 134 */ 135 136 res = lp_build_broadcast(gallivm, 137 lp_build_vec_type(gallivm, dst_type), 138 vector); 139 } 140 } 141 else { 142 if (dst_type.length == src_type.length) { 143 /* 144 * Special shuffle of the same size. 145 */ 146 147 LLVMValueRef shuffle; 148 shuffle = lp_build_broadcast(gallivm, 149 LLVMVectorType(i32t, dst_type.length), 150 index); 151 res = LLVMBuildShuffleVector(gallivm->builder, vector, 152 LLVMGetUndef(lp_build_vec_type(gallivm, dst_type)), 153 shuffle, ""); 154 } 155 else { 156 LLVMValueRef scalar; 157 scalar = LLVMBuildExtractElement(gallivm->builder, vector, index, ""); 158 if (dst_type.length == 1) { 159 /* 160 * Trivial extract scalar from vector. 161 */ 162 163 res = scalar; 164 } 165 else { 166 /* 167 * General case of different sized vectors. 168 */ 169 170 res = lp_build_broadcast(gallivm, 171 lp_build_vec_type(gallivm, dst_type), 172 vector); 173 } 174 } 175 } 176 177 return res; 178} 179 180 181/** 182 * Swizzle one channel into all other three channels. 183 */ 184LLVMValueRef 185lp_build_swizzle_scalar_aos(struct lp_build_context *bld, 186 LLVMValueRef a, 187 unsigned channel) 188{ 189 const struct lp_type type = bld->type; 190 const unsigned n = type.length; 191 unsigned i, j; 192 193 if(a == bld->undef || a == bld->zero || a == bld->one) 194 return a; 195 196 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing 197 * using shuffles here actually causes worst results. More investigation is 198 * needed. */ 199 if (type.width >= 16) { 200 /* 201 * Shuffle. 202 */ 203 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 204 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 205 206 for(j = 0; j < n; j += 4) 207 for(i = 0; i < 4; ++i) 208 shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); 209 210 return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); 211 } 212 else { 213 /* 214 * Bit mask and recursive shifts 215 * 216 * XYZW XYZW .... XYZW <= input 217 * 0Y00 0Y00 .... 0Y00 218 * YY00 YY00 .... YY00 219 * YYYY YYYY .... YYYY <= output 220 */ 221 struct lp_type type4; 222 const char shifts[4][2] = { 223 { 1, 2}, 224 {-1, 2}, 225 { 1, -2}, 226 {-1, -2} 227 }; 228 unsigned i; 229 230 a = LLVMBuildAnd(bld->builder, a, 231 lp_build_const_mask_aos(bld->gallivm, 232 type, 1 << channel), ""); 233 234 /* 235 * Build a type where each element is an integer that cover the four 236 * channels. 237 */ 238 239 type4 = type; 240 type4.floating = FALSE; 241 type4.width *= 4; 242 type4.length /= 4; 243 244 a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 245 246 for(i = 0; i < 2; ++i) { 247 LLVMValueRef tmp = NULL; 248 int shift = shifts[channel][i]; 249 250#ifdef PIPE_ARCH_LITTLE_ENDIAN 251 shift = -shift; 252#endif 253 254 if(shift > 0) 255 tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 256 if(shift < 0) 257 tmp = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 258 259 assert(tmp); 260 if(tmp) 261 a = LLVMBuildOr(bld->builder, a, tmp, ""); 262 } 263 264 return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->gallivm, type), ""); 265 } 266} 267 268 269LLVMValueRef 270lp_build_swizzle_aos(struct lp_build_context *bld, 271 LLVMValueRef a, 272 const unsigned char swizzles[4]) 273{ 274 const struct lp_type type = bld->type; 275 const unsigned n = type.length; 276 unsigned i, j; 277 278 if (swizzles[0] == PIPE_SWIZZLE_RED && 279 swizzles[1] == PIPE_SWIZZLE_GREEN && 280 swizzles[2] == PIPE_SWIZZLE_BLUE && 281 swizzles[3] == PIPE_SWIZZLE_ALPHA) { 282 return a; 283 } 284 285 if (swizzles[0] == swizzles[1] && 286 swizzles[1] == swizzles[2] && 287 swizzles[2] == swizzles[3]) { 288 switch (swizzles[0]) { 289 case PIPE_SWIZZLE_RED: 290 case PIPE_SWIZZLE_GREEN: 291 case PIPE_SWIZZLE_BLUE: 292 case PIPE_SWIZZLE_ALPHA: 293 return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]); 294 case PIPE_SWIZZLE_ZERO: 295 return bld->zero; 296 case PIPE_SWIZZLE_ONE: 297 return bld->one; 298 default: 299 assert(0); 300 return bld->undef; 301 } 302 } 303 304 if (type.width >= 16) { 305 /* 306 * Shuffle. 307 */ 308 LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); 309 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); 310 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 311 LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; 312 313 memset(aux, 0, sizeof aux); 314 315 for(j = 0; j < n; j += 4) { 316 for(i = 0; i < 4; ++i) { 317 unsigned shuffle; 318 switch (swizzles[i]) { 319 default: 320 assert(0); 321 /* fall through */ 322 case PIPE_SWIZZLE_RED: 323 case PIPE_SWIZZLE_GREEN: 324 case PIPE_SWIZZLE_BLUE: 325 case PIPE_SWIZZLE_ALPHA: 326 shuffle = j + swizzles[i]; 327 break; 328 case PIPE_SWIZZLE_ZERO: 329 shuffle = type.length + 0; 330 if (!aux[0]) { 331 aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); 332 } 333 break; 334 case PIPE_SWIZZLE_ONE: 335 shuffle = type.length + 1; 336 if (!aux[1]) { 337 aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); 338 } 339 break; 340 } 341 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 342 } 343 } 344 345 for (i = 0; i < n; ++i) { 346 if (!aux[i]) { 347 aux[i] = undef; 348 } 349 } 350 351 return LLVMBuildShuffleVector(bld->builder, a, 352 LLVMConstVector(aux, n), 353 LLVMConstVector(shuffles, n), ""); 354 } else { 355 /* 356 * Bit mask and shifts. 357 * 358 * For example, this will convert BGRA to RGBA by doing 359 * 360 * rgba = (bgra & 0x00ff0000) >> 16 361 * | (bgra & 0xff00ff00) 362 * | (bgra & 0x000000ff) << 16 363 * 364 * This is necessary not only for faster cause, but because X86 backend 365 * will refuse shuffles of <4 x i8> vectors 366 */ 367 LLVMValueRef res; 368 struct lp_type type4; 369 unsigned cond = 0; 370 unsigned chan; 371 int shift; 372 373 /* 374 * Start with a mixture of 1 and 0. 375 */ 376 for (chan = 0; chan < 4; ++chan) { 377 if (swizzles[chan] == PIPE_SWIZZLE_ONE) { 378 cond |= 1 << chan; 379 } 380 } 381 res = lp_build_select_aos(bld, cond, bld->one, bld->zero); 382 383 /* 384 * Build a type where each element is an integer that cover the four 385 * channels. 386 */ 387 type4 = type; 388 type4.floating = FALSE; 389 type4.width *= 4; 390 type4.length /= 4; 391 392 a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 393 res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(bld->gallivm, type4), ""); 394 395 /* 396 * Mask and shift the channels, trying to group as many channels in the 397 * same shift as possible 398 */ 399 for (shift = -3; shift <= 3; ++shift) { 400 unsigned long long mask = 0; 401 402 assert(type4.width <= sizeof(mask)*8); 403 404 for (chan = 0; chan < 4; ++chan) { 405 /* FIXME: big endian */ 406 if (swizzles[chan] < 4 && 407 chan - swizzles[chan] == shift) { 408 mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); 409 } 410 } 411 412 if (mask) { 413 LLVMValueRef masked; 414 LLVMValueRef shifted; 415 416 if (0) 417 debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask); 418 419 masked = LLVMBuildAnd(bld->builder, a, 420 lp_build_const_int_vec(bld->gallivm, type4, mask), ""); 421 if (shift > 0) { 422 shifted = LLVMBuildShl(bld->builder, masked, 423 lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 424 } else if (shift < 0) { 425 shifted = LLVMBuildLShr(bld->builder, masked, 426 lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 427 } else { 428 shifted = masked; 429 } 430 431 res = LLVMBuildOr(bld->builder, res, shifted, ""); 432 } 433 } 434 435 return LLVMBuildBitCast(bld->builder, res, 436 lp_build_vec_type(bld->gallivm, type), ""); 437 } 438} 439 440 441/** 442 * Extended swizzle of a single channel of a SoA vector. 443 * 444 * @param bld building context 445 * @param unswizzled array with the 4 unswizzled values 446 * @param swizzle one of the PIPE_SWIZZLE_* 447 * 448 * @return the swizzled value. 449 */ 450LLVMValueRef 451lp_build_swizzle_soa_channel(struct lp_build_context *bld, 452 const LLVMValueRef *unswizzled, 453 unsigned swizzle) 454{ 455 switch (swizzle) { 456 case PIPE_SWIZZLE_RED: 457 case PIPE_SWIZZLE_GREEN: 458 case PIPE_SWIZZLE_BLUE: 459 case PIPE_SWIZZLE_ALPHA: 460 return unswizzled[swizzle]; 461 case PIPE_SWIZZLE_ZERO: 462 return bld->zero; 463 case PIPE_SWIZZLE_ONE: 464 return bld->one; 465 default: 466 assert(0); 467 return bld->undef; 468 } 469} 470 471 472/** 473 * Extended swizzle of a SoA vector. 474 * 475 * @param bld building context 476 * @param unswizzled array with the 4 unswizzled values 477 * @param swizzles array of PIPE_SWIZZLE_* 478 * @param swizzled output swizzled values 479 */ 480void 481lp_build_swizzle_soa(struct lp_build_context *bld, 482 const LLVMValueRef *unswizzled, 483 const unsigned char swizzles[4], 484 LLVMValueRef *swizzled) 485{ 486 unsigned chan; 487 488 for (chan = 0; chan < 4; ++chan) { 489 swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, 490 swizzles[chan]); 491 } 492} 493 494 495/** 496 * Do an extended swizzle of a SoA vector inplace. 497 * 498 * @param bld building context 499 * @param values intput/output array with the 4 values 500 * @param swizzles array of PIPE_SWIZZLE_* 501 */ 502void 503lp_build_swizzle_soa_inplace(struct lp_build_context *bld, 504 LLVMValueRef *values, 505 const unsigned char swizzles[4]) 506{ 507 LLVMValueRef unswizzled[4]; 508 unsigned chan; 509 510 for (chan = 0; chan < 4; ++chan) { 511 unswizzled[chan] = values[chan]; 512 } 513 514 lp_build_swizzle_soa(bld, unswizzled, swizzles, values); 515} 516