lp_bld_format_soa.c revision eb20c57f03f7f6a43dedb9c317f3648087e6d1d7
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_defines.h" 30 31#include "util/u_format.h" 32#include "util/u_memory.h" 33#include "util/u_string.h" 34 35#include "lp_bld_type.h" 36#include "lp_bld_const.h" 37#include "lp_bld_conv.h" 38#include "lp_bld_swizzle.h" 39#include "lp_bld_gather.h" 40#include "lp_bld_format.h" 41 42 43void 44lp_build_format_swizzle_soa(const struct util_format_description *format_desc, 45 struct lp_build_context *bld, 46 const LLVMValueRef *unswizzled, 47 LLVMValueRef swizzled_out[4]) 48{ 49 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO); 50 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE); 51 52 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 53 /* 54 * Return zzz1 for depth-stencil formats. 55 * 56 * XXX: Allow to control the depth swizzle with an additional parameter, 57 * as the caller may wish another depth swizzle, or retain the stencil 58 * value. 59 */ 60 enum util_format_swizzle swizzle = format_desc->swizzle[0]; 61 LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); 62 swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth; 63 swizzled_out[3] = bld->one; 64 } 65 else { 66 unsigned chan; 67 for (chan = 0; chan < 4; ++chan) { 68 enum util_format_swizzle swizzle = format_desc->swizzle[chan]; 69 swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); 70 } 71 } 72} 73 74 75/** 76 * Unpack several pixels in SoA. 77 * 78 * It takes a vector of packed pixels: 79 * 80 * packed = {P0, P1, P2, P3, ..., Pn} 81 * 82 * And will produce four vectors: 83 * 84 * red = {R0, R1, R2, R3, ..., Rn} 85 * green = {G0, G1, G2, G3, ..., Gn} 86 * blue = {B0, B1, B2, B3, ..., Bn} 87 * alpha = {A0, A1, A2, A3, ..., An} 88 * 89 * It requires that a packed pixel fits into an element of the output 90 * channels. The common case is when converting pixel with a depth of 32 bit or 91 * less into floats. 92 * 93 * \param format_desc the format of the 'packed' incoming pixel vector 94 * \param type the desired type for rgba_out (type.length = n, above) 95 * \param packed the incoming vector of packed pixels 96 * \param rgba_out returns the SoA R,G,B,A vectors 97 */ 98void 99lp_build_unpack_rgba_soa(LLVMBuilderRef builder, 100 const struct util_format_description *format_desc, 101 struct lp_type type, 102 LLVMValueRef packed, 103 LLVMValueRef rgba_out[4]) 104{ 105 struct lp_build_context bld; 106 LLVMValueRef inputs[4]; 107 unsigned start; 108 unsigned chan; 109 110 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 111 assert(format_desc->block.width == 1); 112 assert(format_desc->block.height == 1); 113 assert(format_desc->block.bits <= type.width); 114 /* FIXME: Support more output types */ 115 assert(type.floating); 116 assert(type.width == 32); 117 118 lp_build_context_init(&bld, builder, type); 119 120 /* Decode the input vector components */ 121 start = 0; 122 for (chan = 0; chan < format_desc->nr_channels; ++chan) { 123 const unsigned width = format_desc->channel[chan].size; 124 const unsigned stop = start + width; 125 LLVMValueRef input; 126 127 input = packed; 128 129 switch(format_desc->channel[chan].type) { 130 case UTIL_FORMAT_TYPE_VOID: 131 input = lp_build_undef(type); 132 break; 133 134 case UTIL_FORMAT_TYPE_UNSIGNED: 135 /* 136 * Align the LSB 137 */ 138 139 if (start) { 140 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), ""); 141 } 142 143 /* 144 * Zero the MSBs 145 */ 146 147 if (stop < format_desc->block.bits) { 148 unsigned mask = ((unsigned long long)1 << width) - 1; 149 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), ""); 150 } 151 152 /* 153 * Type conversion 154 */ 155 156 if (type.floating) { 157 if(format_desc->channel[chan].normalized) 158 input = lp_build_unsigned_norm_to_float(builder, width, type, input); 159 else 160 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); 161 } 162 else { 163 /* FIXME */ 164 assert(0); 165 input = lp_build_undef(type); 166 } 167 168 break; 169 170 case UTIL_FORMAT_TYPE_SIGNED: 171 /* 172 * Align the sign bit first. 173 */ 174 175 if (stop < type.width) { 176 unsigned bits = type.width - stop; 177 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits); 178 input = LLVMBuildShl(builder, input, bits_val, ""); 179 } 180 181 /* 182 * Align the LSB (with an arithmetic shift to preserve the sign) 183 */ 184 185 if (format_desc->channel[chan].size < type.width) { 186 unsigned bits = type.width - format_desc->channel[chan].size; 187 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits); 188 input = LLVMBuildAShr(builder, input, bits_val, ""); 189 } 190 191 /* 192 * Type conversion 193 */ 194 195 if (type.floating) { 196 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); 197 if (format_desc->channel[chan].normalized) { 198 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); 199 LLVMValueRef scale_val = lp_build_const_vec(type, scale); 200 input = LLVMBuildMul(builder, input, scale_val, ""); 201 } 202 } 203 else { 204 /* FIXME */ 205 assert(0); 206 input = lp_build_undef(type); 207 } 208 209 break; 210 211 case UTIL_FORMAT_TYPE_FLOAT: 212 if (type.floating) { 213 assert(start == 0); 214 assert(stop == 32); 215 assert(type.width == 32); 216 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), ""); 217 } 218 else { 219 /* FIXME */ 220 assert(0); 221 input = lp_build_undef(type); 222 } 223 break; 224 225 case UTIL_FORMAT_TYPE_FIXED: 226 if (type.floating) { 227 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); 228 LLVMValueRef scale_val = lp_build_const_vec(type, scale); 229 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); 230 input = LLVMBuildMul(builder, input, scale_val, ""); 231 } 232 else { 233 /* FIXME */ 234 assert(0); 235 input = lp_build_undef(type); 236 } 237 break; 238 239 default: 240 assert(0); 241 input = lp_build_undef(type); 242 break; 243 } 244 245 inputs[chan] = input; 246 247 start = stop; 248 } 249 250 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); 251} 252 253 254void 255lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, 256 struct lp_type dst_type, 257 LLVMValueRef packed, 258 LLVMValueRef *rgba) 259{ 260 LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff); 261 unsigned chan; 262 263 packed = LLVMBuildBitCast(builder, packed, 264 lp_build_int_vec_type(dst_type), ""); 265 266 /* Decode the input vector components */ 267 for (chan = 0; chan < 4; ++chan) { 268 unsigned start = chan*8; 269 unsigned stop = start + 8; 270 LLVMValueRef input; 271 272 input = packed; 273 274 if (start) 275 input = LLVMBuildLShr(builder, input, 276 lp_build_const_int_vec(dst_type, start), ""); 277 278 if (stop < 32) 279 input = LLVMBuildAnd(builder, input, mask, ""); 280 281 input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); 282 283 rgba[chan] = input; 284 } 285} 286 287 288 289/** 290 * Fetch a texels from a texture, returning them in SoA layout. 291 * 292 * \param type the desired return type for 'rgba'. The vector length 293 * is the number of texels to fetch 294 * 295 * \param base_ptr points to start of the texture image block. For non- 296 * compressed formats, this simply points to the texel. 297 * For compressed formats, it points to the start of the 298 * compressed data block. 299 * 300 * \param i, j the sub-block pixel coordinates. For non-compressed formats 301 * these will always be (0,0). For compressed formats, i will 302 * be in [0, block_width-1] and j will be in [0, block_height-1]. 303 */ 304void 305lp_build_fetch_rgba_soa(LLVMBuilderRef builder, 306 const struct util_format_description *format_desc, 307 struct lp_type type, 308 LLVMValueRef base_ptr, 309 LLVMValueRef offset, 310 LLVMValueRef i, 311 LLVMValueRef j, 312 LLVMValueRef rgba_out[4]) 313{ 314 315 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 316 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 317 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 318 format_desc->block.width == 1 && 319 format_desc->block.height == 1 && 320 format_desc->block.bits <= type.width && 321 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || 322 format_desc->channel[0].size == 32)) 323 { 324 /* 325 * The packed pixel fits into an element of the destination format. Put 326 * the packed pixels into a vector and extract each component for all 327 * vector elements in parallel. 328 */ 329 330 LLVMValueRef packed; 331 332 /* 333 * gather the texels from the texture 334 * Ex: packed = {BGRA, BGRA, BGRA, BGRA}. 335 */ 336 packed = lp_build_gather(builder, 337 type.length, 338 format_desc->block.bits, 339 type.width, 340 base_ptr, offset); 341 342 /* 343 * convert texels to float rgba 344 */ 345 lp_build_unpack_rgba_soa(builder, 346 format_desc, 347 type, 348 packed, rgba_out); 349 } 350 else { 351 /* 352 * Fallback to calling lp_build_fetch_rgba_aos for each pixel. 353 * 354 * This is not the most efficient way of fetching pixels, as we 355 * miss some opportunities to do vectorization, but this is 356 * convenient for formats or scenarios for which there was no 357 * opportunity or incentive to optimize. 358 */ 359 360 unsigned k, chan; 361 362 for (chan = 0; chan < 4; ++chan) { 363 rgba_out[chan] = lp_build_undef(type); 364 } 365 366 /* loop over number of pixels */ 367 for(k = 0; k < type.length; ++k) { 368 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); 369 LLVMValueRef offset_elem; 370 LLVMValueRef ptr; 371 LLVMValueRef i_elem, j_elem; 372 LLVMValueRef tmp; 373 374 offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); 375 ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, ""); 376 377 i_elem = LLVMBuildExtractElement(builder, i, index, ""); 378 j_elem = LLVMBuildExtractElement(builder, j, index, ""); 379 380 /* Get a single float[4]={R,G,B,A} pixel */ 381 tmp = lp_build_fetch_rgba_aos(builder, format_desc, type, ptr, 382 i_elem, j_elem); 383 384 /* 385 * Insert the AoS tmp value channels into the SoA result vectors at 386 * position = 'index'. 387 */ 388 for (chan = 0; chan < 4; ++chan) { 389 LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0), 390 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); 391 rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan], 392 tmp_chan, index, ""); 393 } 394 } 395 } 396} 397