lp_bld_format_aos.c revision bd91f665a7c12f114619a4f6f1e00059e4f4cb5e
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * AoS pixel format manipulation. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_format.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include "util/u_string.h" 40 41#include "lp_bld_init.h" 42#include "lp_bld_type.h" 43#include "lp_bld_format.h" 44 45 46/** 47 * Unpack a single pixel into its RGBA components. 48 * 49 * @param packed integer. 50 * 51 * @return RGBA in a 4 floats vector. 52 */ 53LLVMValueRef 54lp_build_unpack_rgba_aos(LLVMBuilderRef builder, 55 const struct util_format_description *desc, 56 LLVMValueRef packed) 57{ 58 LLVMValueRef shifted, casted, scaled, masked; 59 LLVMValueRef shifts[4]; 60 LLVMValueRef masks[4]; 61 LLVMValueRef scales[4]; 62 LLVMValueRef swizzles[4]; 63 LLVMValueRef aux[4]; 64 bool normalized; 65 int empty_channel; 66 bool needs_uitofp; 67 unsigned shift; 68 unsigned i; 69 70 /* TODO: Support more formats */ 71 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 72 assert(desc->block.width == 1); 73 assert(desc->block.height == 1); 74 assert(desc->block.bits <= 32); 75 76 /* Do the intermediate integer computations with 32bit integers since it 77 * matches floating point size */ 78 if (desc->block.bits < 32) 79 packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); 80 81 /* Broadcast the packed value to all four channels */ 82 packed = LLVMBuildInsertElement(builder, 83 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), 84 packed, 85 LLVMConstNull(LLVMInt32Type()), 86 ""); 87 packed = LLVMBuildShuffleVector(builder, 88 packed, 89 LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), 90 LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)), 91 ""); 92 93 /* Initialize vector constants */ 94 normalized = FALSE; 95 needs_uitofp = FALSE; 96 empty_channel = -1; 97 shift = 0; 98 for (i = 0; i < 4; ++i) { 99 unsigned bits = desc->channel[i].size; 100 101 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 102 shifts[i] = LLVMGetUndef(LLVMInt32Type()); 103 masks[i] = LLVMConstNull(LLVMInt32Type()); 104 scales[i] = LLVMConstNull(LLVMFloatType()); 105 empty_channel = i; 106 } 107 else { 108 unsigned long long mask = (1ULL << bits) - 1; 109 110 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 111 112 if (bits == 32) { 113 needs_uitofp = TRUE; 114 } 115 116 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); 117 masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0); 118 119 if (desc->channel[i].normalized) { 120 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask); 121 normalized = TRUE; 122 } 123 else 124 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); 125 } 126 127 shift += bits; 128 } 129 130 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); 131 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); 132 if (!needs_uitofp) { 133 /* UIToFP can't be expressed in SSE2 */ 134 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); 135 } else { 136 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); 137 } 138 139 if (normalized) 140 scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); 141 else 142 scaled = casted; 143 144 for (i = 0; i < 4; ++i) 145 aux[i] = LLVMGetUndef(LLVMFloatType()); 146 147 for (i = 0; i < 4; ++i) { 148 enum util_format_swizzle swizzle; 149 150 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 151 /* 152 * For ZS formats do RGBA = ZZZ1 153 */ 154 if (i == 3) { 155 swizzle = UTIL_FORMAT_SWIZZLE_1; 156 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { 157 swizzle = UTIL_FORMAT_SWIZZLE_0; 158 } else { 159 swizzle = desc->swizzle[0]; 160 } 161 } else { 162 swizzle = desc->swizzle[i]; 163 } 164 165 switch (swizzle) { 166 case UTIL_FORMAT_SWIZZLE_X: 167 case UTIL_FORMAT_SWIZZLE_Y: 168 case UTIL_FORMAT_SWIZZLE_Z: 169 case UTIL_FORMAT_SWIZZLE_W: 170 swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); 171 break; 172 case UTIL_FORMAT_SWIZZLE_0: 173 assert(empty_channel >= 0); 174 swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0); 175 break; 176 case UTIL_FORMAT_SWIZZLE_1: 177 swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0); 178 aux[0] = LLVMConstReal(LLVMFloatType(), 1.0); 179 break; 180 case UTIL_FORMAT_SWIZZLE_NONE: 181 swizzles[i] = LLVMGetUndef(LLVMFloatType()); 182 assert(0); 183 break; 184 } 185 } 186 187 return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), LLVMConstVector(swizzles, 4), ""); 188} 189 190 191/** 192 * Pack a single pixel. 193 * 194 * @param rgba 4 float vector with the unpacked components. 195 * 196 * XXX: This is mostly for reference and testing -- operating a single pixel at 197 * a time is rarely if ever needed. 198 */ 199LLVMValueRef 200lp_build_pack_rgba_aos(LLVMBuilderRef builder, 201 const struct util_format_description *desc, 202 LLVMValueRef rgba) 203{ 204 LLVMTypeRef type; 205 LLVMValueRef packed = NULL; 206 LLVMValueRef swizzles[4]; 207 LLVMValueRef shifted, casted, scaled, unswizzled; 208 LLVMValueRef shifts[4]; 209 LLVMValueRef scales[4]; 210 bool normalized; 211 unsigned shift; 212 unsigned i, j; 213 214 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 215 assert(desc->block.width == 1); 216 assert(desc->block.height == 1); 217 218 type = LLVMIntType(desc->block.bits); 219 220 /* Unswizzle the color components into the source vector. */ 221 for (i = 0; i < 4; ++i) { 222 for (j = 0; j < 4; ++j) { 223 if (desc->swizzle[j] == i) 224 break; 225 } 226 if (j < 4) 227 swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0); 228 else 229 swizzles[i] = LLVMGetUndef(LLVMInt32Type()); 230 } 231 232 unswizzled = LLVMBuildShuffleVector(builder, rgba, 233 LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)), 234 LLVMConstVector(swizzles, 4), ""); 235 236 normalized = FALSE; 237 shift = 0; 238 for (i = 0; i < 4; ++i) { 239 unsigned bits = desc->channel[i].size; 240 241 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 242 shifts[i] = LLVMGetUndef(LLVMInt32Type()); 243 scales[i] = LLVMGetUndef(LLVMFloatType()); 244 } 245 else { 246 unsigned mask = (1 << bits) - 1; 247 248 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 249 assert(bits < 32); 250 251 shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); 252 253 if (desc->channel[i].normalized) { 254 scales[i] = LLVMConstReal(LLVMFloatType(), mask); 255 normalized = TRUE; 256 } 257 else 258 scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); 259 } 260 261 shift += bits; 262 } 263 264 if (normalized) 265 scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); 266 else 267 scaled = unswizzled; 268 269 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), ""); 270 271 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); 272 273 /* Bitwise or all components */ 274 for (i = 0; i < 4; ++i) { 275 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 276 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), ""); 277 if (packed) 278 packed = LLVMBuildOr(builder, packed, component, ""); 279 else 280 packed = component; 281 } 282 } 283 284 if (!packed) 285 packed = LLVMGetUndef(LLVMInt32Type()); 286 287 if (desc->block.bits < 32) 288 packed = LLVMBuildTrunc(builder, packed, type, ""); 289 290 return packed; 291} 292 293 294/** 295 * Fetch a pixel into a 4 float AoS. 296 * 297 * i and j are the sub-block pixel coordinates. 298 */ 299LLVMValueRef 300lp_build_fetch_rgba_aos(LLVMBuilderRef builder, 301 const struct util_format_description *format_desc, 302 LLVMValueRef ptr, 303 LLVMValueRef i, 304 LLVMValueRef j) 305{ 306 307 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 308 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 309 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 310 format_desc->block.width == 1 && 311 format_desc->block.height == 1 && 312 util_is_pot(format_desc->block.bits) && 313 format_desc->block.bits <= 32 && 314 format_desc->is_bitmask && 315 !format_desc->is_mixed && 316 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || 317 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) 318 { 319 LLVMValueRef packed; 320 321 ptr = LLVMBuildBitCast(builder, ptr, 322 LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) , 323 ""); 324 325 packed = LLVMBuildLoad(builder, ptr, "packed"); 326 327 return lp_build_unpack_rgba_aos(builder, format_desc, packed); 328 } 329 else if (format_desc->fetch_rgba_float) { 330 /* 331 * Fallback to calling util_format_description::fetch_rgba_float. 332 * 333 * This is definitely not the most efficient way of fetching pixels, as 334 * we miss the opportunity to do vectorization, but this it is a 335 * convenient for formats or scenarios for which there was no opportunity 336 * or incentive to optimize. 337 */ 338 339 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); 340 char name[256]; 341 LLVMValueRef function; 342 LLVMValueRef tmp; 343 LLVMValueRef args[4]; 344 345 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", 346 format_desc->short_name); 347 348 /* 349 * Declare and bind format_desc->fetch_rgba_float(). 350 */ 351 352 function = LLVMGetNamedFunction(module, name); 353 if (!function) { 354 LLVMTypeRef ret_type; 355 LLVMTypeRef arg_types[4]; 356 LLVMTypeRef function_type; 357 358 ret_type = LLVMVoidType(); 359 arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); 360 arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); 361 arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); 362 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); 363 function = LLVMAddFunction(module, name, function_type); 364 365 LLVMSetFunctionCallConv(function, LLVMCCallConv); 366 LLVMSetLinkage(function, LLVMExternalLinkage); 367 368 assert(LLVMIsDeclaration(function)); 369 370 LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); 371 } 372 373 /* 374 * XXX: this should better go to the first block in the function 375 */ 376 377 tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); 378 379 /* 380 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result 381 * in the SoA vectors. 382 */ 383 384 args[0] = LLVMBuildBitCast(builder, tmp, 385 LLVMPointerType(LLVMFloatType(), 0), ""); 386 args[1] = ptr; 387 args[2] = i; 388 args[3] = j; 389 390 LLVMBuildCall(builder, function, args, 4, ""); 391 392 return LLVMBuildLoad(builder, tmp, ""); 393 } 394 else { 395 assert(0); 396 return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); 397 } 398} 399