lp_bld_format_aos.c revision 3ecf47af1252ad10f98d5ce488cc1b91fab64c25
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * AoS pixel format manipulation. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35 36#include "util/u_format.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include "util/u_pointer.h" 40#include "util/u_string.h" 41 42#include "lp_bld_arit.h" 43#include "lp_bld_init.h" 44#include "lp_bld_type.h" 45#include "lp_bld_flow.h" 46#include "lp_bld_const.h" 47#include "lp_bld_conv.h" 48#include "lp_bld_swizzle.h" 49#include "lp_bld_gather.h" 50#include "lp_bld_debug.h" 51#include "lp_bld_format.h" 52 53 54/** 55 * Basic swizzling. Rearrange the order of the unswizzled array elements 56 * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported 57 * too. 58 * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. 59 */ 60LLVMValueRef 61lp_build_format_swizzle_aos(const struct util_format_description *desc, 62 struct lp_build_context *bld, 63 LLVMValueRef unswizzled) 64{ 65 unsigned char swizzles[4]; 66 unsigned chan; 67 68 assert(bld->type.length % 4 == 0); 69 70 for (chan = 0; chan < 4; ++chan) { 71 enum util_format_swizzle swizzle; 72 73 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 74 /* 75 * For ZS formats do RGBA = ZZZ1 76 */ 77 if (chan == 3) { 78 swizzle = UTIL_FORMAT_SWIZZLE_1; 79 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { 80 swizzle = UTIL_FORMAT_SWIZZLE_0; 81 } else { 82 swizzle = desc->swizzle[0]; 83 } 84 } else { 85 swizzle = desc->swizzle[chan]; 86 } 87 swizzles[chan] = swizzle; 88 } 89 90 return lp_build_swizzle_aos(bld, unswizzled, swizzles); 91} 92 93 94/** 95 * Whether the format matches the vector type, apart of swizzles. 96 */ 97static INLINE boolean 98format_matches_type(const struct util_format_description *desc, 99 struct lp_type type) 100{ 101 enum util_format_type chan_type; 102 unsigned chan; 103 104 assert(type.length % 4 == 0); 105 106 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || 107 desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || 108 desc->block.width != 1 || 109 desc->block.height != 1) { 110 return FALSE; 111 } 112 113 if (type.floating) { 114 chan_type = UTIL_FORMAT_TYPE_FLOAT; 115 } else if (type.fixed) { 116 chan_type = UTIL_FORMAT_TYPE_FIXED; 117 } else if (type.sign) { 118 chan_type = UTIL_FORMAT_TYPE_SIGNED; 119 } else { 120 chan_type = UTIL_FORMAT_TYPE_UNSIGNED; 121 } 122 123 for (chan = 0; chan < desc->nr_channels; ++chan) { 124 if (desc->channel[chan].size != type.width) { 125 return FALSE; 126 } 127 128 if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { 129 if (desc->channel[chan].type != chan_type || 130 desc->channel[chan].normalized != type.norm) { 131 return FALSE; 132 } 133 } 134 } 135 136 return TRUE; 137} 138 139 140/** 141 * Unpack a single pixel into its RGBA components. 142 * 143 * @param desc the pixel format for the packed pixel value 144 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM 145 * 146 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. 147 */ 148static INLINE LLVMValueRef 149lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, 150 const struct util_format_description *desc, 151 LLVMValueRef packed) 152{ 153 LLVMBuilderRef builder = gallivm->builder; 154 LLVMValueRef shifted, casted, scaled, masked; 155 LLVMValueRef shifts[4]; 156 LLVMValueRef masks[4]; 157 LLVMValueRef scales[4]; 158 159 boolean normalized; 160 boolean needs_uitofp; 161 unsigned shift; 162 unsigned i; 163 164 /* TODO: Support more formats */ 165 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 166 assert(desc->block.width == 1); 167 assert(desc->block.height == 1); 168 assert(desc->block.bits <= 32); 169 170 /* Do the intermediate integer computations with 32bit integers since it 171 * matches floating point size */ 172 assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); 173 174 /* Broadcast the packed value to all four channels 175 * before: packed = BGRA 176 * after: packed = {BGRA, BGRA, BGRA, BGRA} 177 */ 178 packed = LLVMBuildInsertElement(builder, 179 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 180 packed, 181 LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), 182 ""); 183 packed = LLVMBuildShuffleVector(builder, 184 packed, 185 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 186 LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 187 ""); 188 189 /* Initialize vector constants */ 190 normalized = FALSE; 191 needs_uitofp = FALSE; 192 shift = 0; 193 194 /* Loop over 4 color components */ 195 for (i = 0; i < 4; ++i) { 196 unsigned bits = desc->channel[i].size; 197 198 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 199 shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 200 masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); 201 scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); 202 } 203 else { 204 unsigned long long mask = (1ULL << bits) - 1; 205 206 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 207 208 if (bits == 32) { 209 needs_uitofp = TRUE; 210 } 211 212 shifts[i] = lp_build_const_int32(gallivm, shift); 213 masks[i] = lp_build_const_int32(gallivm, mask); 214 215 if (desc->channel[i].normalized) { 216 scales[i] = lp_build_const_float(gallivm, 1.0 / mask); 217 normalized = TRUE; 218 } 219 else 220 scales[i] = lp_build_const_float(gallivm, 1.0); 221 } 222 223 shift += bits; 224 } 225 226 /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA} 227 * into masked = {B, G, R, A} 228 */ 229 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); 230 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); 231 232 233 if (!needs_uitofp) { 234 /* UIToFP can't be expressed in SSE2 */ 235 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); 236 } else { 237 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); 238 } 239 240 /* At this point 'casted' may be a vector of floats such as 241 * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized 242 * we'll scale this to {1.0, 1.0, 1.0, 1.0}. 243 */ 244 245 if (normalized) 246 scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); 247 else 248 scaled = casted; 249 250 return scaled; 251} 252 253 254/** 255 * Pack a single pixel. 256 * 257 * @param rgba 4 float vector with the unpacked components. 258 * 259 * XXX: This is mostly for reference and testing -- operating a single pixel at 260 * a time is rarely if ever needed. 261 */ 262LLVMValueRef 263lp_build_pack_rgba_aos(struct gallivm_state *gallivm, 264 const struct util_format_description *desc, 265 LLVMValueRef rgba) 266{ 267 LLVMBuilderRef builder = gallivm->builder; 268 LLVMTypeRef type; 269 LLVMValueRef packed = NULL; 270 LLVMValueRef swizzles[4]; 271 LLVMValueRef shifted, casted, scaled, unswizzled; 272 LLVMValueRef shifts[4]; 273 LLVMValueRef scales[4]; 274 boolean normalized; 275 unsigned shift; 276 unsigned i, j; 277 278 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 279 assert(desc->block.width == 1); 280 assert(desc->block.height == 1); 281 282 type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); 283 284 /* Unswizzle the color components into the source vector. */ 285 for (i = 0; i < 4; ++i) { 286 for (j = 0; j < 4; ++j) { 287 if (desc->swizzle[j] == i) 288 break; 289 } 290 if (j < 4) 291 swizzles[i] = lp_build_const_int32(gallivm, j); 292 else 293 swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 294 } 295 296 unswizzled = LLVMBuildShuffleVector(builder, rgba, 297 LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), 298 LLVMConstVector(swizzles, 4), ""); 299 300 normalized = FALSE; 301 shift = 0; 302 for (i = 0; i < 4; ++i) { 303 unsigned bits = desc->channel[i].size; 304 305 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 306 shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 307 scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); 308 } 309 else { 310 unsigned mask = (1 << bits) - 1; 311 312 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 313 assert(bits < 32); 314 315 shifts[i] = lp_build_const_int32(gallivm, shift); 316 317 if (desc->channel[i].normalized) { 318 scales[i] = lp_build_const_float(gallivm, mask); 319 normalized = TRUE; 320 } 321 else 322 scales[i] = lp_build_const_float(gallivm, 1.0); 323 } 324 325 shift += bits; 326 } 327 328 if (normalized) 329 scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); 330 else 331 scaled = unswizzled; 332 333 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); 334 335 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); 336 337 /* Bitwise or all components */ 338 for (i = 0; i < 4; ++i) { 339 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 340 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, 341 lp_build_const_int32(gallivm, i), ""); 342 if (packed) 343 packed = LLVMBuildOr(builder, packed, component, ""); 344 else 345 packed = component; 346 } 347 } 348 349 if (!packed) 350 packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 351 352 if (desc->block.bits < 32) 353 packed = LLVMBuildTrunc(builder, packed, type, ""); 354 355 return packed; 356} 357 358 359 360 361/** 362 * Fetch a pixel into a 4 float AoS. 363 * 364 * \param format_desc describes format of the image we're fetching from 365 * \param ptr address of the pixel block (or the texel if uncompressed) 366 * \param i, j the sub-block pixel coordinates. For non-compressed formats 367 * these will always be (0, 0). 368 * \return a 4 element vector with the pixel's RGBA values. 369 */ 370LLVMValueRef 371lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, 372 const struct util_format_description *format_desc, 373 struct lp_type type, 374 LLVMValueRef base_ptr, 375 LLVMValueRef offset, 376 LLVMValueRef i, 377 LLVMValueRef j) 378{ 379 LLVMBuilderRef builder = gallivm->builder; 380 unsigned num_pixels = type.length / 4; 381 struct lp_build_context bld; 382 383 assert(type.length <= LP_MAX_VECTOR_LENGTH); 384 assert(type.length % 4 == 0); 385 386 lp_build_context_init(&bld, gallivm, type); 387 388 /* 389 * Trivial case 390 * 391 * The format matches the type (apart of a swizzle) so no need for 392 * scaling or converting. 393 */ 394 395 if (format_matches_type(format_desc, type) && 396 format_desc->block.bits <= type.width * 4 && 397 util_is_power_of_two(format_desc->block.bits)) { 398 LLVMValueRef packed; 399 400 /* 401 * The format matches the type (apart of a swizzle) so no need for 402 * scaling or converting. 403 */ 404 405 packed = lp_build_gather(gallivm, type.length/4, 406 format_desc->block.bits, type.width*4, 407 base_ptr, offset); 408 409 assert(format_desc->block.bits <= type.width * type.length); 410 411 packed = LLVMBuildBitCast(gallivm->builder, packed, 412 lp_build_vec_type(gallivm, type), ""); 413 414 return lp_build_format_swizzle_aos(format_desc, &bld, packed); 415 } 416 417 /* 418 * Bit arithmetic 419 */ 420 421 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 422 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 423 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 424 format_desc->block.width == 1 && 425 format_desc->block.height == 1 && 426 util_is_power_of_two(format_desc->block.bits) && 427 format_desc->block.bits <= 32 && 428 format_desc->is_bitmask && 429 !format_desc->is_mixed && 430 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || 431 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { 432 433 LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; 434 LLVMValueRef res; 435 unsigned k; 436 437 /* 438 * Unpack a pixel at a time into a <4 x float> RGBA vector 439 */ 440 441 for (k = 0; k < num_pixels; ++k) { 442 LLVMValueRef packed; 443 444 packed = lp_build_gather_elem(gallivm, num_pixels, 445 format_desc->block.bits, 32, 446 base_ptr, offset, k); 447 448 tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, 449 format_desc, 450 packed); 451 } 452 453 /* 454 * Type conversion. 455 * 456 * TODO: We could avoid floating conversion for integer to 457 * integer conversions. 458 */ 459 460 if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { 461 debug_printf("%s: unpacking %s with floating point\n", 462 __FUNCTION__, format_desc->short_name); 463 } 464 465 lp_build_conv(gallivm, 466 lp_float32_vec4_type(), 467 type, 468 tmps, num_pixels, &res, 1); 469 470 return lp_build_format_swizzle_aos(format_desc, &bld, res); 471 } 472 473 /* 474 * YUV / subsampled formats 475 */ 476 477 if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 478 struct lp_type tmp_type; 479 LLVMValueRef tmp; 480 481 memset(&tmp_type, 0, sizeof tmp_type); 482 tmp_type.width = 8; 483 tmp_type.length = num_pixels * 4; 484 tmp_type.norm = TRUE; 485 486 tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, 487 format_desc, 488 num_pixels, 489 base_ptr, 490 offset, 491 i, j); 492 493 lp_build_conv(gallivm, 494 tmp_type, type, 495 &tmp, 1, &tmp, 1); 496 497 return tmp; 498 } 499 500 /* 501 * Fallback to util_format_description::fetch_rgba_8unorm(). 502 */ 503 504 if (format_desc->fetch_rgba_8unorm && 505 !type.floating && type.width == 8 && !type.sign && type.norm) { 506 /* 507 * Fallback to calling util_format_description::fetch_rgba_8unorm. 508 * 509 * This is definitely not the most efficient way of fetching pixels, as 510 * we miss the opportunity to do vectorization, but this it is a 511 * convenient for formats or scenarios for which there was no opportunity 512 * or incentive to optimize. 513 */ 514 515 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 516 char name[256]; 517 LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); 518 LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); 519 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 520 LLVMValueRef function; 521 LLVMValueRef tmp_ptr; 522 LLVMValueRef tmp; 523 LLVMValueRef res; 524 LLVMValueRef callee; 525 unsigned k; 526 527 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", 528 format_desc->short_name); 529 530 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 531 debug_printf("%s: falling back to %s\n", __FUNCTION__, name); 532 } 533 534 /* 535 * Declare and bind format_desc->fetch_rgba_8unorm(). 536 */ 537 538 function = LLVMGetNamedFunction(module, name); 539 if (!function) { 540 /* 541 * Function to call looks like: 542 * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) 543 */ 544 LLVMTypeRef ret_type; 545 LLVMTypeRef arg_types[4]; 546 LLVMTypeRef function_type; 547 548 ret_type = LLVMVoidTypeInContext(gallivm->context); 549 arg_types[0] = pi8t; 550 arg_types[1] = pi8t; 551 arg_types[2] = i32t; 552 arg_types[3] = i32t; 553 function_type = LLVMFunctionType(ret_type, arg_types, 554 Elements(arg_types), 0); 555 function = LLVMAddFunction(module, name, function_type); 556 557 LLVMSetFunctionCallConv(function, LLVMCCallConv); 558 LLVMSetLinkage(function, LLVMExternalLinkage); 559 560 assert(LLVMIsDeclaration(function)); 561 } 562 563 /* make const pointer for the C fetch_rgba_float function */ 564 callee = lp_build_const_int_pointer(gallivm, 565 func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); 566 567 /* cast the callee pointer to the function's type */ 568 function = LLVMBuildBitCast(builder, callee, 569 LLVMTypeOf(function), "cast callee"); 570 571 tmp_ptr = lp_build_alloca(gallivm, i32t, ""); 572 573 res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); 574 575 /* 576 * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result 577 * in the SoA vectors. 578 */ 579 580 for (k = 0; k < num_pixels; ++k) { 581 LLVMValueRef index = lp_build_const_int32(gallivm, k); 582 LLVMValueRef args[4]; 583 584 args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); 585 args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, 586 base_ptr, offset, k); 587 588 if (num_pixels == 1) { 589 args[2] = i; 590 args[3] = j; 591 } 592 else { 593 args[2] = LLVMBuildExtractElement(builder, i, index, ""); 594 args[3] = LLVMBuildExtractElement(builder, j, index, ""); 595 } 596 597 LLVMBuildCall(builder, function, args, Elements(args), ""); 598 599 tmp = LLVMBuildLoad(builder, tmp_ptr, ""); 600 601 if (num_pixels == 1) { 602 res = tmp; 603 } 604 else { 605 res = LLVMBuildInsertElement(builder, res, tmp, index, ""); 606 } 607 } 608 609 /* Bitcast from <n x i32> to <4n x i8> */ 610 res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); 611 612 return res; 613 } 614 615 616 /* 617 * Fallback to util_format_description::fetch_rgba_float(). 618 */ 619 620 if (format_desc->fetch_rgba_float) { 621 /* 622 * Fallback to calling util_format_description::fetch_rgba_float. 623 * 624 * This is definitely not the most efficient way of fetching pixels, as 625 * we miss the opportunity to do vectorization, but this it is a 626 * convenient for formats or scenarios for which there was no opportunity 627 * or incentive to optimize. 628 */ 629 630 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); 631 char name[256]; 632 LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); 633 LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); 634 LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); 635 LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); 636 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 637 LLVMValueRef function; 638 LLVMValueRef tmp_ptr; 639 LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; 640 LLVMValueRef res; 641 LLVMValueRef callee; 642 unsigned k; 643 644 util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", 645 format_desc->short_name); 646 647 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 648 debug_printf("%s: falling back to %s\n", __FUNCTION__, name); 649 } 650 651 /* 652 * Declare and bind format_desc->fetch_rgba_float(). 653 */ 654 655 function = LLVMGetNamedFunction(module, name); 656 if (!function) { 657 /* 658 * Function to call looks like: 659 * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) 660 */ 661 LLVMTypeRef ret_type; 662 LLVMTypeRef arg_types[4]; 663 LLVMTypeRef function_type; 664 665 ret_type = LLVMVoidTypeInContext(gallivm->context); 666 arg_types[0] = pf32t; 667 arg_types[1] = pi8t; 668 arg_types[2] = i32t; 669 arg_types[3] = i32t; 670 function_type = LLVMFunctionType(ret_type, arg_types, 671 Elements(arg_types), 0); 672 function = LLVMAddFunction(module, name, function_type); 673 674 LLVMSetFunctionCallConv(function, LLVMCCallConv); 675 LLVMSetLinkage(function, LLVMExternalLinkage); 676 677 assert(LLVMIsDeclaration(function)); 678 } 679 680 /* Note: we're using this casting here instead of LLVMAddGlobalMapping() 681 * to work around a bug in LLVM 2.6. 682 */ 683 684 /* make const pointer for the C fetch_rgba_float function */ 685 callee = lp_build_const_int_pointer(gallivm, 686 func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); 687 688 /* cast the callee pointer to the function's type */ 689 function = LLVMBuildBitCast(builder, callee, 690 LLVMTypeOf(function), "cast callee"); 691 692 693 tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); 694 695 /* 696 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result 697 * in the SoA vectors. 698 */ 699 700 for (k = 0; k < num_pixels; ++k) { 701 LLVMValueRef args[4]; 702 703 args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); 704 args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, 705 base_ptr, offset, k); 706 707 if (num_pixels == 1) { 708 args[2] = i; 709 args[3] = j; 710 } 711 else { 712 LLVMValueRef index = lp_build_const_int32(gallivm, k); 713 args[2] = LLVMBuildExtractElement(builder, i, index, ""); 714 args[3] = LLVMBuildExtractElement(builder, j, index, ""); 715 } 716 717 LLVMBuildCall(builder, function, args, Elements(args), ""); 718 719 tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); 720 } 721 722 lp_build_conv(gallivm, 723 lp_float32_vec4_type(), 724 type, 725 tmps, num_pixels, &res, 1); 726 727 return res; 728 } 729 730 assert(0); 731 return lp_build_undef(gallivm, type); 732} 733