lp_bld_format_aos.c revision a18c210a95794c79c6f26dbf4c66d4a85e29169d
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * AoS pixel format manipulation. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_format.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include "util/u_string.h" 40 41#include "lp_bld_init.h" 42#include "lp_bld_type.h" 43#include "lp_bld_flow.h" 44#include "lp_bld_format.h" 45 46 47/** 48 * Unpack a single pixel into its RGBA components. 49 * 50 * @param packed integer. 51 * 52 * @return RGBA in a 4 floats vector. 53 */ 54LLVMValueRef 55lp_build_unpack_rgba_aos(LLVMBuilderRef builder, 56 const struct util_format_description *desc, 57 LLVMValueRef packed) 58{ 59 LLVMValueRef shifted, casted, scaled, masked; 60 LLVMValueRef shifts[4]; 61 LLVMValueRef masks[4]; 62 LLVMValueRef scales[4]; 63 LLVMValueRef swizzles[4]; 64 LLVMValueRef aux[4]; 65 bool normalized; 66 int empty_channel; 67 bool needs_uitofp; 68 unsigned shift; 69 unsigned i; 70 71 /* TODO: Support more formats */ 72 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 73 assert(desc->block.width == 1); 74 assert(desc->block.height == 1); 75 assert(desc->block.bits <= 32); 76 77 /* Do the intermediate integer computations with 32bit integers since it 78 * matches floating point size */ 79 if (desc->block.bits < 32) 80 packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); 81 82 /* Broadcast the packed value to all four channels */ 83 packed = LLVMBuildInsertElement(builder, 84 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), 85 packed, 86 LLVMConstNull(LLVMInt32Type()), 87 ""); 88 packed = LLVMBuildShuffleVector(builder, 89 packed, 90 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), 91 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)), 92 ""); 93 94 /* Initialize vector constants */ 95 normalized = FALSE; 96 needs_uitofp = FALSE; 97 empty_channel = -1; 98 shift = 0; 99 for (i = 0; i < 4; ++i) { 100 unsigned bits = desc->channel[i].size; 101 102 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 103 shifts[i] = LLVMGetUndef(LLVMInt32Type()); 104 masks[i] = LLVMConstNull(LLVMInt32Type()); 105 scales[i] = LLVMConstNull(LLVMFloatType()); 106 empty_channel = i; 107 } 108 else { 109 unsigned long long mask = (1ULL << bits) - 1; 110 111 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 112 113 if (bits == 32) { 114 needs_uitofp = TRUE; 115 } 116 117 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); 118 masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0); 119 120 if (desc->channel[i].normalized) { 121 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask); 122 normalized = TRUE; 123 } 124 else 125 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); 126 } 127 128 shift += bits; 129 } 130 131 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); 132 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); 133 if (!needs_uitofp) { 134 /* UIToFP can't be expressed in SSE2 */ 135 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); 136 } else { 137 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); 138 } 139 140 if (normalized) 141 scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); 142 else 143 scaled = casted; 144 145 for (i = 0; i < 4; ++i) 146 aux[i] = LLVMGetUndef(LLVMFloatType()); 147 148 for (i = 0; i < 4; ++i) { 149 enum util_format_swizzle swizzle; 150 151 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 152 /* 153 * For ZS formats do RGBA = ZZZ1 154 */ 155 if (i == 3) { 156 swizzle = UTIL_FORMAT_SWIZZLE_1; 157 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { 158 swizzle = UTIL_FORMAT_SWIZZLE_0; 159 } else { 160 swizzle = desc->swizzle[0]; 161 } 162 } else { 163 swizzle = desc->swizzle[i]; 164 } 165 166 switch (swizzle) { 167 case UTIL_FORMAT_SWIZZLE_X: 168 case UTIL_FORMAT_SWIZZLE_Y: 169 case UTIL_FORMAT_SWIZZLE_Z: 170 case UTIL_FORMAT_SWIZZLE_W: 171 swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); 172 break; 173 case UTIL_FORMAT_SWIZZLE_0: 174 assert(empty_channel >= 0); 175 swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0); 176 break; 177 case UTIL_FORMAT_SWIZZLE_1: 178 swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0); 179 aux[0] = LLVMConstReal(LLVMFloatType(), 1.0); 180 break; 181 case UTIL_FORMAT_SWIZZLE_NONE: 182 swizzles[i] = LLVMGetUndef(LLVMFloatType()); 183 assert(0); 184 break; 185 } 186 } 187 188 return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), ""); 189} 190 191 192/** 193 * Pack a single pixel. 194 * 195 * @param rgba 4 float vector with the unpacked components. 196 * 197 * XXX: This is mostly for reference and testing -- operating a single pixel at 198 * a time is rarely if ever needed. 199 */ 200LLVMValueRef 201lp_build_pack_rgba_aos(LLVMBuilderRef builder, 202 const struct util_format_description *desc, 203 LLVMValueRef rgba) 204{ 205 LLVMTypeRef type; 206 LLVMValueRef packed = NULL; 207 LLVMValueRef swizzles[4]; 208 LLVMValueRef shifted, casted, scaled, unswizzled; 209 LLVMValueRef shifts[4]; 210 LLVMValueRef scales[4]; 211 bool normalized; 212 unsigned shift; 213 unsigned i, j; 214 215 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 216 assert(desc->block.width == 1); 217 assert(desc->block.height == 1); 218 219 type = LLVMIntType(desc->block.bits); 220 221 /* Unswizzle the color components into the source vector. */ 222 for (i = 0; i < 4; ++i) { 223 for (j = 0; j < 4; ++j) { 224 if (desc->swizzle[j] == i) 225 break; 226 } 227 if (j < 4) 228 swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0); 229 else 230 swizzles[i] = LLVMGetUndef(LLVMInt32Type()); 231 } 232 233 unswizzled = LLVMBuildShuffleVector(builder, rgba, 234 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)), 235 LLVMConstVector(swizzles, 4), ""); 236 237 normalized = FALSE; 238 shift = 0; 239 for (i = 0; i < 4; ++i) { 240 unsigned bits = desc->channel[i].size; 241 242 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 243 shifts[i] = LLVMGetUndef(LLVMInt32Type()); 244 scales[i] = LLVMGetUndef(LLVMFloatType()); 245 } 246 else { 247 unsigned mask = (1 << bits) - 1; 248 249 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 250 assert(bits < 32); 251 252 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); 253 254 if (desc->channel[i].normalized) { 255 scales[i] = LLVMConstReal(LLVMFloatType(), mask); 256 normalized = TRUE; 257 } 258 else 259 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); 260 } 261 262 shift += bits; 263 } 264 265 if (normalized) 266 scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); 267 else 268 scaled = unswizzled; 269 270 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), ""); 271 272 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); 273 274 /* Bitwise or all components */ 275 for (i = 0; i < 4; ++i) { 276 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 277 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), ""); 278 if (packed) 279 packed = LLVMBuildOr(builder, packed, component, ""); 280 else 281 packed = component; 282 } 283 } 284 285 if (!packed) 286 packed = LLVMGetUndef(LLVMInt32Type()); 287 288 if (desc->block.bits < 32) 289 packed = LLVMBuildTrunc(builder, packed, type, ""); 290 291 return packed; 292} 293 294 295/** 296 * Fetch a pixel into a 4 float AoS. 297 * 298 * i and j are the sub-block pixel coordinates. 299 */ 300LLVMValueRef 301lp_build_fetch_rgba_aos(LLVMBuilderRef builder, 302 const struct util_format_description *format_desc, 303 LLVMValueRef ptr, 304 LLVMValueRef i, 305 LLVMValueRef j) 306{ 307 308 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 309 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 310 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 311 format_desc->block.width == 1 && 312 format_desc->block.height == 1 && 313 util_is_pot(format_desc->block.bits) && 314 format_desc->block.bits <= 32 && 315 format_desc->is_bitmask && 316 !format_desc->is_mixed && 317 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || 318 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) 319 { 320 LLVMValueRef packed; 321 322 ptr = LLVMBuildBitCast(builder, ptr, 323 LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) , 324 ""); 325 326 packed = LLVMBuildLoad(builder, ptr, "packed"); 327 328 return lp_build_unpack_rgba_aos(builder, format_desc, packed); 329 } 330 else if (format_desc->fetch_rgba_float) { 331 /* 332 * Fallback to calling util_format_description::fetch_rgba_float. 333 * 334 * This is definitely not the most efficient way of fetching pixels, as 335 * we miss the opportunity to do vectorization, but this it is a 336 * convenient for formats or scenarios for which there was no opportunity 337 * or incentive to optimize. 338 */ 339 340 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); 341 char name[256]; 342 LLVMValueRef function; 343 LLVMValueRef tmp; 344 LLVMValueRef args[4]; 345 346 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", 347 format_desc->short_name); 348 349 /* 350 * Declare and bind format_desc->fetch_rgba_float(). 351 */ 352 353 function = LLVMGetNamedFunction(module, name); 354 if (!function) { 355 LLVMTypeRef ret_type; 356 LLVMTypeRef arg_types[4]; 357 LLVMTypeRef function_type; 358 359 ret_type = LLVMVoidType(); 360 arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); 361 arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); 362 arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); 363 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); 364 function = LLVMAddFunction(module, name, function_type); 365 366 LLVMSetFunctionCallConv(function, LLVMCCallConv); 367 LLVMSetLinkage(function, LLVMExternalLinkage); 368 369 assert(LLVMIsDeclaration(function)); 370 371 LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); 372 } 373 374 tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); 375 376 /* 377 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result 378 * in the SoA vectors. 379 */ 380 381 args[0] = LLVMBuildBitCast(builder, tmp, 382 LLVMPointerType(LLVMFloatType(), 0), ""); 383 args[1] = ptr; 384 args[2] = i; 385 args[3] = j; 386 387 LLVMBuildCall(builder, function, args, 4, ""); 388 389 return LLVMBuildLoad(builder, tmp, ""); 390 } 391 else { 392 assert(0); 393 return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); 394 } 395} 396