lp_bld_format_soa.c revision 2c2debaea71eb99322c2371f1c581e9748cda91f
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_defines.h" 30 31#include "util/u_format.h" 32#include "util/u_memory.h" 33#include "util/u_string.h" 34 35#include "lp_bld_type.h" 36#include "lp_bld_const.h" 37#include "lp_bld_conv.h" 38#include "lp_bld_swizzle.h" 39#include "lp_bld_sample.h" /* for lp_build_gather */ 40#include "lp_bld_format.h" 41 42 43void 44lp_build_format_swizzle_soa(const struct util_format_description *format_desc, 45 struct lp_build_context *bld, 46 const LLVMValueRef *unswizzled, 47 LLVMValueRef *swizzled) 48{ 49 assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO); 50 assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE); 51 52 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 53 /* 54 * Return zzz1 for depth-stencil formats. 55 * 56 * XXX: Allow to control the depth swizzle with an additional parameter, 57 * as the caller may wish another depth swizzle, or retain the stencil 58 * value. 59 */ 60 enum util_format_swizzle swizzle = format_desc->swizzle[0]; 61 LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); 62 swizzled[2] = swizzled[1] = swizzled[0] = depth; 63 swizzled[3] = bld->one; 64 } 65 else { 66 unsigned chan; 67 for (chan = 0; chan < 4; ++chan) { 68 enum util_format_swizzle swizzle = format_desc->swizzle[chan]; 69 swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle); 70 } 71 } 72} 73 74 75/** 76 * Unpack several pixels in SoA. 77 * 78 * It takes a vector of packed pixels: 79 * 80 * packed = {P0, P1, P2, P3, ..., Pn} 81 * 82 * And will produce four vectors: 83 * 84 * red = {R0, R1, R2, R3, ..., Rn} 85 * green = {G0, G1, G2, G3, ..., Gn} 86 * blue = {B0, B1, B2, B3, ..., Bn} 87 * alpha = {A0, A1, A2, A3, ..., An} 88 * 89 * It requires that a packed pixel fits into an element of the output 90 * channels. The common case is when converting pixel with a depth of 32 bit or 91 * less into floats. 92 */ 93void 94lp_build_unpack_rgba_soa(LLVMBuilderRef builder, 95 const struct util_format_description *format_desc, 96 struct lp_type type, 97 LLVMValueRef packed, 98 LLVMValueRef *rgba) 99{ 100 struct lp_build_context bld; 101 LLVMValueRef inputs[4]; 102 unsigned start; 103 unsigned chan; 104 105 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 106 assert(format_desc->block.width == 1); 107 assert(format_desc->block.height == 1); 108 assert(format_desc->block.bits <= type.width); 109 /* FIXME: Support more output types */ 110 assert(type.floating); 111 assert(type.width == 32); 112 113 lp_build_context_init(&bld, builder, type); 114 115 /* Decode the input vector components */ 116 start = 0; 117 for (chan = 0; chan < format_desc->nr_channels; ++chan) { 118 unsigned width = format_desc->channel[chan].size; 119 unsigned stop = start + width; 120 LLVMValueRef input; 121 122 input = packed; 123 124 switch(format_desc->channel[chan].type) { 125 case UTIL_FORMAT_TYPE_VOID: 126 input = lp_build_undef(type); 127 break; 128 129 case UTIL_FORMAT_TYPE_UNSIGNED: 130 /* 131 * Align the LSB 132 */ 133 134 if (start) { 135 input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), ""); 136 } 137 138 /* 139 * Zero the MSBs 140 */ 141 142 if (stop < format_desc->block.bits) { 143 unsigned mask = ((unsigned long long)1 << width) - 1; 144 input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), ""); 145 } 146 147 /* 148 * Type conversion 149 */ 150 151 if (type.floating) { 152 if(format_desc->channel[chan].normalized) 153 input = lp_build_unsigned_norm_to_float(builder, width, type, input); 154 else 155 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); 156 } 157 else { 158 /* FIXME */ 159 assert(0); 160 input = lp_build_undef(type); 161 } 162 163 break; 164 165 case UTIL_FORMAT_TYPE_SIGNED: 166 /* 167 * Align the sign bit first. 168 */ 169 170 if (stop < type.width) { 171 unsigned bits = type.width - stop; 172 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits); 173 input = LLVMBuildShl(builder, input, bits_val, ""); 174 } 175 176 /* 177 * Align the LSB (with an arithmetic shift to preserve the sign) 178 */ 179 180 if (format_desc->channel[chan].size < type.width) { 181 unsigned bits = type.width - format_desc->channel[chan].size; 182 LLVMValueRef bits_val = lp_build_const_int_vec(type, bits); 183 input = LLVMBuildAShr(builder, input, bits_val, ""); 184 } 185 186 /* 187 * Type conversion 188 */ 189 190 if (type.floating) { 191 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); 192 if (format_desc->channel[chan].normalized) { 193 double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); 194 LLVMValueRef scale_val = lp_build_const_vec(type, scale); 195 input = LLVMBuildMul(builder, input, scale_val, ""); 196 } 197 } 198 else { 199 /* FIXME */ 200 assert(0); 201 input = lp_build_undef(type); 202 } 203 204 break; 205 206 case UTIL_FORMAT_TYPE_FLOAT: 207 if (type.floating) { 208 assert(start == 0); 209 assert(stop == 32); 210 assert(type.width == 32); 211 input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), ""); 212 } 213 else { 214 /* FIXME */ 215 assert(0); 216 input = lp_build_undef(type); 217 } 218 break; 219 220 case UTIL_FORMAT_TYPE_FIXED: 221 if (type.floating) { 222 double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); 223 LLVMValueRef scale_val = lp_build_const_vec(type, scale); 224 input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), ""); 225 input = LLVMBuildMul(builder, input, scale_val, ""); 226 } 227 else { 228 /* FIXME */ 229 assert(0); 230 input = lp_build_undef(type); 231 } 232 break; 233 234 default: 235 assert(0); 236 input = lp_build_undef(type); 237 break; 238 } 239 240 inputs[chan] = input; 241 242 start = stop; 243 } 244 245 lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba); 246} 247 248 249/** 250 * Fetch a pixel into a SoA. 251 * 252 * i and j are the sub-block pixel coordinates. 253 */ 254void 255lp_build_fetch_rgba_soa(LLVMBuilderRef builder, 256 const struct util_format_description *format_desc, 257 struct lp_type type, 258 LLVMValueRef base_ptr, 259 LLVMValueRef offset, 260 LLVMValueRef i, 261 LLVMValueRef j, 262 LLVMValueRef *rgba) 263{ 264 265 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 266 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 267 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 268 format_desc->block.width == 1 && 269 format_desc->block.height == 1 && 270 format_desc->block.bits <= type.width && 271 (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || 272 format_desc->channel[0].size == 32)) 273 { 274 /* 275 * The packed pixel fits into an element of the destination format. Put 276 * the packed pixels into a vector and estract each component for all 277 * vector elements in parallel. 278 */ 279 280 LLVMValueRef packed; 281 282 /* 283 * gather the texels from the texture 284 */ 285 packed = lp_build_gather(builder, 286 type.length, 287 format_desc->block.bits, 288 type.width, 289 base_ptr, offset); 290 291 /* 292 * convert texels to float rgba 293 */ 294 lp_build_unpack_rgba_soa(builder, 295 format_desc, 296 type, 297 packed, rgba); 298 } 299 else { 300 /* 301 * Fallback to calling lp_build_fetch_rgba_aos for each pixel. 302 * 303 * This is not the most efficient way of fetching pixels, as 304 * we miss some opportunities to do vectorization, but this it is a 305 * convenient for formats or scenarios for which there was no opportunity 306 * or incentive to optimize. 307 */ 308 309 unsigned k, chan; 310 311 assert(type.floating); 312 313 for (chan = 0; chan < 4; ++chan) { 314 rgba[chan] = lp_build_undef(type); 315 } 316 317 for(k = 0; k < type.length; ++k) { 318 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); 319 LLVMValueRef offset_elem; 320 LLVMValueRef ptr; 321 LLVMValueRef i_elem, j_elem; 322 LLVMValueRef tmp; 323 324 offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); 325 ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, ""); 326 327 i_elem = LLVMBuildExtractElement(builder, i, index, ""); 328 j_elem = LLVMBuildExtractElement(builder, j, index, ""); 329 330 tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem); 331 332 /* 333 * AoS to SoA 334 */ 335 336 for (chan = 0; chan < 4; ++chan) { 337 LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0), 338 tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); 339 rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, ""); 340 } 341 } 342 } 343} 344