lp_bld_format_aos.c revision efc82aef35a2aac5d2ed9774f6d28f2626796416
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * AoS pixel format manipulation. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_format.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include "util/u_string.h" 40 41#include "lp_bld_arit.h" 42#include "lp_bld_init.h" 43#include "lp_bld_type.h" 44#include "lp_bld_flow.h" 45#include "lp_bld_const.h" 46#include "lp_bld_conv.h" 47#include "lp_bld_swizzle.h" 48#include "lp_bld_gather.h" 49#include "lp_bld_debug.h" 50#include "lp_bld_format.h" 51 52 53/** 54 * Basic swizzling. Rearrange the order of the unswizzled array elements 55 * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported 56 * too. 57 * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. 58 */ 59LLVMValueRef 60lp_build_format_swizzle_aos(const struct util_format_description *desc, 61 struct lp_build_context *bld, 62 LLVMValueRef unswizzled) 63{ 64 unsigned char swizzles[4]; 65 unsigned chan; 66 67 assert(bld->type.length % 4 == 0); 68 69 for (chan = 0; chan < 4; ++chan) { 70 enum util_format_swizzle swizzle; 71 72 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 73 /* 74 * For ZS formats do RGBA = ZZZ1 75 */ 76 if (chan == 3) { 77 swizzle = UTIL_FORMAT_SWIZZLE_1; 78 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { 79 swizzle = UTIL_FORMAT_SWIZZLE_0; 80 } else { 81 swizzle = desc->swizzle[0]; 82 } 83 } else { 84 swizzle = desc->swizzle[chan]; 85 } 86 swizzles[chan] = swizzle; 87 } 88 89 return lp_build_swizzle_aos(bld, unswizzled, swizzles); 90} 91 92 93/** 94 * Whether the format matches the vector type, apart of swizzles. 95 */ 96static INLINE boolean 97format_matches_type(const struct util_format_description *desc, 98 struct lp_type type) 99{ 100 enum util_format_type chan_type; 101 unsigned chan; 102 103 assert(type.length % 4 == 0); 104 105 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || 106 desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || 107 desc->block.width != 1 || 108 desc->block.height != 1) { 109 return FALSE; 110 } 111 112 if (type.floating) { 113 chan_type = UTIL_FORMAT_TYPE_FLOAT; 114 } else if (type.fixed) { 115 chan_type = UTIL_FORMAT_TYPE_FIXED; 116 } else if (type.sign) { 117 chan_type = UTIL_FORMAT_TYPE_SIGNED; 118 } else { 119 chan_type = UTIL_FORMAT_TYPE_UNSIGNED; 120 } 121 122 for (chan = 0; chan < desc->nr_channels; ++chan) { 123 if (desc->channel[chan].size != type.width) { 124 return FALSE; 125 } 126 127 if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { 128 if (desc->channel[chan].type != chan_type || 129 desc->channel[chan].normalized != type.norm) { 130 return FALSE; 131 } 132 } 133 } 134 135 return TRUE; 136} 137 138 139/** 140 * Unpack a single pixel into its RGBA components. 141 * 142 * @param desc the pixel format for the packed pixel value 143 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM 144 * 145 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. 146 */ 147static INLINE LLVMValueRef 148lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, 149 const struct util_format_description *desc, 150 LLVMValueRef packed) 151{ 152 LLVMBuilderRef builder = gallivm->builder; 153 LLVMValueRef shifted, casted, scaled, masked; 154 LLVMValueRef shifts[4]; 155 LLVMValueRef masks[4]; 156 LLVMValueRef scales[4]; 157 158 boolean normalized; 159 boolean needs_uitofp; 160 unsigned shift; 161 unsigned i; 162 163 /* TODO: Support more formats */ 164 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 165 assert(desc->block.width == 1); 166 assert(desc->block.height == 1); 167 assert(desc->block.bits <= 32); 168 169 /* Do the intermediate integer computations with 32bit integers since it 170 * matches floating point size */ 171 assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); 172 173 /* Broadcast the packed value to all four channels 174 * before: packed = BGRA 175 * after: packed = {BGRA, BGRA, BGRA, BGRA} 176 */ 177 packed = LLVMBuildInsertElement(builder, 178 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 179 packed, 180 LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), 181 ""); 182 packed = LLVMBuildShuffleVector(builder, 183 packed, 184 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 185 LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 186 ""); 187 188 /* Initialize vector constants */ 189 normalized = FALSE; 190 needs_uitofp = FALSE; 191 shift = 0; 192 193 /* Loop over 4 color components */ 194 for (i = 0; i < 4; ++i) { 195 unsigned bits = desc->channel[i].size; 196 197 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 198 shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 199 masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); 200 scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); 201 } 202 else { 203 unsigned long long mask = (1ULL << bits) - 1; 204 205 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 206 207 if (bits == 32) { 208 needs_uitofp = TRUE; 209 } 210 211 shifts[i] = lp_build_const_int32(gallivm, shift); 212 masks[i] = lp_build_const_int32(gallivm, mask); 213 214 if (desc->channel[i].normalized) { 215 scales[i] = lp_build_const_float(gallivm, 1.0 / mask); 216 normalized = TRUE; 217 } 218 else 219 scales[i] = lp_build_const_float(gallivm, 1.0); 220 } 221 222 shift += bits; 223 } 224 225 /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA} 226 * into masked = {B, G, R, A} 227 */ 228 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); 229 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); 230 231 232 if (!needs_uitofp) { 233 /* UIToFP can't be expressed in SSE2 */ 234 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); 235 } else { 236 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); 237 } 238 239 /* At this point 'casted' may be a vector of floats such as 240 * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized 241 * we'll scale this to {1.0, 1.0, 1.0, 1.0}. 242 */ 243 244 if (normalized) 245 scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); 246 else 247 scaled = casted; 248 249 return scaled; 250} 251 252 253/** 254 * Pack a single pixel. 255 * 256 * @param rgba 4 float vector with the unpacked components. 257 * 258 * XXX: This is mostly for reference and testing -- operating a single pixel at 259 * a time is rarely if ever needed. 260 */ 261LLVMValueRef 262lp_build_pack_rgba_aos(struct gallivm_state *gallivm, 263 const struct util_format_description *desc, 264 LLVMValueRef rgba) 265{ 266 LLVMBuilderRef builder = gallivm->builder; 267 LLVMTypeRef type; 268 LLVMValueRef packed = NULL; 269 LLVMValueRef swizzles[4]; 270 LLVMValueRef shifted, casted, scaled, unswizzled; 271 LLVMValueRef shifts[4]; 272 LLVMValueRef scales[4]; 273 boolean normalized; 274 unsigned shift; 275 unsigned i, j; 276 277 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 278 assert(desc->block.width == 1); 279 assert(desc->block.height == 1); 280 281 type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); 282 283 /* Unswizzle the color components into the source vector. */ 284 for (i = 0; i < 4; ++i) { 285 for (j = 0; j < 4; ++j) { 286 if (desc->swizzle[j] == i) 287 break; 288 } 289 if (j < 4) 290 swizzles[i] = lp_build_const_int32(gallivm, j); 291 else 292 swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 293 } 294 295 unswizzled = LLVMBuildShuffleVector(builder, rgba, 296 LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), 297 LLVMConstVector(swizzles, 4), ""); 298 299 normalized = FALSE; 300 shift = 0; 301 for (i = 0; i < 4; ++i) { 302 unsigned bits = desc->channel[i].size; 303 304 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 305 shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 306 scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); 307 } 308 else { 309 unsigned mask = (1 << bits) - 1; 310 311 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 312 assert(bits < 32); 313 314 shifts[i] = lp_build_const_int32(gallivm, shift); 315 316 if (desc->channel[i].normalized) { 317 scales[i] = lp_build_const_float(gallivm, mask); 318 normalized = TRUE; 319 } 320 else 321 scales[i] = lp_build_const_float(gallivm, 1.0); 322 } 323 324 shift += bits; 325 } 326 327 if (normalized) 328 scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); 329 else 330 scaled = unswizzled; 331 332 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); 333 334 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); 335 336 /* Bitwise or all components */ 337 for (i = 0; i < 4; ++i) { 338 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 339 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, 340 lp_build_const_int32(gallivm, i), ""); 341 if (packed) 342 packed = LLVMBuildOr(builder, packed, component, ""); 343 else 344 packed = component; 345 } 346 } 347 348 if (!packed) 349 packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 350 351 if (desc->block.bits < 32) 352 packed = LLVMBuildTrunc(builder, packed, type, ""); 353 354 return packed; 355} 356 357 358 359 360/** 361 * Fetch a pixel into a 4 float AoS. 362 * 363 * \param format_desc describes format of the image we're fetching from 364 * \param ptr address of the pixel block (or the texel if uncompressed) 365 * \param i, j the sub-block pixel coordinates. For non-compressed formats 366 * these will always be (0, 0). 367 * \return a 4 element vector with the pixel's RGBA values. 368 */ 369LLVMValueRef 370lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, 371 const struct util_format_description *format_desc, 372 struct lp_type type, 373 LLVMValueRef base_ptr, 374 LLVMValueRef offset, 375 LLVMValueRef i, 376 LLVMValueRef j) 377{ 378 LLVMBuilderRef builder = gallivm->builder; 379 unsigned num_pixels = type.length / 4; 380 struct lp_build_context bld; 381 382 assert(type.length <= LP_MAX_VECTOR_LENGTH); 383 assert(type.length % 4 == 0); 384 385 lp_build_context_init(&bld, gallivm, type); 386 387 /* 388 * Trivial case 389 * 390 * The format matches the type (apart of a swizzle) so no need for 391 * scaling or converting. 392 */ 393 394 if (format_matches_type(format_desc, type) && 395 format_desc->block.bits <= type.width * 4 && 396 util_is_power_of_two(format_desc->block.bits)) { 397 LLVMValueRef packed; 398 399 /* 400 * The format matches the type (apart of a swizzle) so no need for 401 * scaling or converting. 402 */ 403 404 packed = lp_build_gather(gallivm, type.length/4, 405 format_desc->block.bits, type.width*4, 406 base_ptr, offset); 407 408 assert(format_desc->block.bits <= type.width * type.length); 409 410 packed = LLVMBuildBitCast(gallivm->builder, packed, 411 lp_build_vec_type(gallivm, type), ""); 412 413 return lp_build_format_swizzle_aos(format_desc, &bld, packed); 414 } 415 416 /* 417 * Bit arithmetic 418 */ 419 420 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 421 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 422 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 423 format_desc->block.width == 1 && 424 format_desc->block.height == 1 && 425 util_is_power_of_two(format_desc->block.bits) && 426 format_desc->block.bits <= 32 && 427 format_desc->is_bitmask && 428 !format_desc->is_mixed && 429 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || 430 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { 431 432 LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; 433 LLVMValueRef res; 434 unsigned k; 435 436 /* 437 * Unpack a pixel at a time into a <4 x float> RGBA vector 438 */ 439 440 for (k = 0; k < num_pixels; ++k) { 441 LLVMValueRef packed; 442 443 packed = lp_build_gather_elem(gallivm, num_pixels, 444 format_desc->block.bits, 32, 445 base_ptr, offset, k); 446 447 tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, 448 format_desc, 449 packed); 450 } 451 452 /* 453 * Type conversion. 454 * 455 * TODO: We could avoid floating conversion for integer to 456 * integer conversions. 457 */ 458 459 if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { 460 debug_printf("%s: unpacking %s with floating point\n", 461 __FUNCTION__, format_desc->short_name); 462 } 463 464 lp_build_conv(gallivm, 465 lp_float32_vec4_type(), 466 type, 467 tmps, num_pixels, &res, 1); 468 469 return lp_build_format_swizzle_aos(format_desc, &bld, res); 470 } 471 472 /* 473 * YUV / subsampled formats 474 */ 475 476 if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 477 struct lp_type tmp_type; 478 LLVMValueRef tmp; 479 480 memset(&tmp_type, 0, sizeof tmp_type); 481 tmp_type.width = 8; 482 tmp_type.length = num_pixels * 4; 483 tmp_type.norm = TRUE; 484 485 tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, 486 format_desc, 487 num_pixels, 488 base_ptr, 489 offset, 490 i, j); 491 492 lp_build_conv(gallivm, 493 tmp_type, type, 494 &tmp, 1, &tmp, 1); 495 496 return tmp; 497 } 498 499 /* 500 * Fallback to util_format_description::fetch_rgba_8unorm(). 501 */ 502 503 if (format_desc->fetch_rgba_8unorm && 504 !type.floating && type.width == 8 && !type.sign && type.norm) { 505 /* 506 * Fallback to calling util_format_description::fetch_rgba_8unorm. 507 * 508 * This is definitely not the most efficient way of fetching pixels, as 509 * we miss the opportunity to do vectorization, but this it is a 510 * convenient for formats or scenarios for which there was no opportunity 511 * or incentive to optimize. 512 */ 513 514 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 515 char name[256]; 516 LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); 517 LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); 518 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 519 LLVMValueRef function; 520 LLVMValueRef tmp_ptr; 521 LLVMValueRef tmp; 522 LLVMValueRef res; 523 unsigned k; 524 525 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", 526 format_desc->short_name); 527 528 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 529 debug_printf("%s: falling back to %s\n", __FUNCTION__, name); 530 } 531 532 /* 533 * Declare and bind format_desc->fetch_rgba_8unorm(). 534 */ 535 536 function = LLVMGetNamedFunction(module, name); 537 if (!function) { 538 LLVMTypeRef ret_type; 539 LLVMTypeRef arg_types[4]; 540 LLVMTypeRef function_type; 541 542 ret_type = LLVMVoidTypeInContext(gallivm->context); 543 arg_types[0] = pi8t; 544 arg_types[1] = pi8t; 545 arg_types[3] = arg_types[2] = LLVMIntTypeInContext(gallivm->context, sizeof(unsigned) * 8); 546 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); 547 function = LLVMAddFunction(module, name, function_type); 548 549 LLVMSetFunctionCallConv(function, LLVMCCallConv); 550 LLVMSetLinkage(function, LLVMExternalLinkage); 551 552 assert(LLVMIsDeclaration(function)); 553 554 LLVMAddGlobalMapping(gallivm->engine, function, 555 func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm)); 556 } 557 558 tmp_ptr = lp_build_alloca(gallivm, i32t, ""); 559 560 res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); 561 562 /* 563 * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result 564 * in the SoA vectors. 565 */ 566 567 for (k = 0; k < num_pixels; ++k) { 568 LLVMValueRef index = lp_build_const_int32(gallivm, k); 569 LLVMValueRef args[4]; 570 571 args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); 572 args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, 573 base_ptr, offset, k); 574 575 if (num_pixels == 1) { 576 args[2] = i; 577 args[3] = j; 578 } 579 else { 580 args[2] = LLVMBuildExtractElement(builder, i, index, ""); 581 args[3] = LLVMBuildExtractElement(builder, j, index, ""); 582 } 583 584 LLVMBuildCall(builder, function, args, Elements(args), ""); 585 586 tmp = LLVMBuildLoad(builder, tmp_ptr, ""); 587 588 if (num_pixels == 1) { 589 res = tmp; 590 } 591 else { 592 res = LLVMBuildInsertElement(builder, res, tmp, index, ""); 593 } 594 } 595 596 /* Bitcast from <n x i32> to <4n x i8> */ 597 res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); 598 599 return res; 600 } 601 602 603 /* 604 * Fallback to util_format_description::fetch_rgba_float(). 605 */ 606 607 if (format_desc->fetch_rgba_float) { 608 /* 609 * Fallback to calling util_format_description::fetch_rgba_float. 610 * 611 * This is definitely not the most efficient way of fetching pixels, as 612 * we miss the opportunity to do vectorization, but this it is a 613 * convenient for formats or scenarios for which there was no opportunity 614 * or incentive to optimize. 615 */ 616 617 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); 618 char name[256]; 619 LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); 620 LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); 621 LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); 622 LLVMValueRef function; 623 LLVMValueRef tmp_ptr; 624 LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; 625 LLVMValueRef res; 626 unsigned k; 627 628 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", 629 format_desc->short_name); 630 631 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 632 debug_printf("%s: falling back to %s\n", __FUNCTION__, name); 633 } 634 635 /* 636 * Declare and bind format_desc->fetch_rgba_float(). 637 */ 638 639 function = LLVMGetNamedFunction(module, name); 640 if (!function) { 641 LLVMTypeRef ret_type; 642 LLVMTypeRef arg_types[4]; 643 LLVMTypeRef function_type; 644 645 ret_type = LLVMVoidTypeInContext(gallivm->context); 646 arg_types[0] = pf32t; 647 arg_types[1] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); 648 arg_types[3] = arg_types[2] = LLVMIntTypeInContext(gallivm->context, sizeof(unsigned) * 8); 649 function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); 650 function = LLVMAddFunction(module, name, function_type); 651 652 LLVMSetFunctionCallConv(function, LLVMCCallConv); 653 LLVMSetLinkage(function, LLVMExternalLinkage); 654 655 assert(LLVMIsDeclaration(function)); 656 657 LLVMAddGlobalMapping(gallivm->engine, function, 658 func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); 659 } 660 661 tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); 662 663 /* 664 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result 665 * in the SoA vectors. 666 */ 667 668 for (k = 0; k < num_pixels; ++k) { 669 LLVMValueRef args[4]; 670 671 args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); 672 args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, 673 base_ptr, offset, k); 674 675 if (num_pixels == 1) { 676 args[2] = i; 677 args[3] = j; 678 } 679 else { 680 LLVMValueRef index = lp_build_const_int32(gallivm, k); 681 args[2] = LLVMBuildExtractElement(builder, i, index, ""); 682 args[3] = LLVMBuildExtractElement(builder, j, index, ""); 683 } 684 685 LLVMBuildCall(builder, function, args, Elements(args), ""); 686 687 tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); 688 } 689 690 lp_build_conv(gallivm, 691 lp_float32_vec4_type(), 692 type, 693 tmps, num_pixels, &res, 1); 694 695 return res; 696 } 697 698 assert(0); 699 return lp_build_undef(gallivm, type); 700} 701