lp_bld_sample_soa.c revision 923256626931c057d1a7c20d8900768b0c1faea9
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Texture sampling -- SoA. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 * @author Brian Paul <brianp@vmware.com> 34 */ 35 36#include "pipe/p_defines.h" 37#include "pipe/p_state.h" 38#include "util/u_debug.h" 39#include "util/u_dump.h" 40#include "util/u_memory.h" 41#include "util/u_math.h" 42#include "util/u_format.h" 43#include "util/u_cpu_detect.h" 44#include "lp_bld_debug.h" 45#include "lp_bld_type.h" 46#include "lp_bld_const.h" 47#include "lp_bld_conv.h" 48#include "lp_bld_arit.h" 49#include "lp_bld_logic.h" 50#include "lp_bld_swizzle.h" 51#include "lp_bld_pack.h" 52#include "lp_bld_flow.h" 53#include "lp_bld_gather.h" 54#include "lp_bld_format.h" 55#include "lp_bld_sample.h" 56#include "lp_bld_quad.h" 57 58 59/** 60 * Keep all information for sampling code generation in a single place. 61 */ 62struct lp_build_sample_context 63{ 64 LLVMBuilderRef builder; 65 66 const struct lp_sampler_static_state *static_state; 67 68 struct lp_sampler_dynamic_state *dynamic_state; 69 70 const struct util_format_description *format_desc; 71 72 /** regular scalar float type */ 73 struct lp_type float_type; 74 struct lp_build_context float_bld; 75 76 /** regular scalar float type */ 77 struct lp_type int_type; 78 struct lp_build_context int_bld; 79 80 /** Incoming coordinates type and build context */ 81 struct lp_type coord_type; 82 struct lp_build_context coord_bld; 83 84 /** Unsigned integer coordinates */ 85 struct lp_type uint_coord_type; 86 struct lp_build_context uint_coord_bld; 87 88 /** Signed integer coordinates */ 89 struct lp_type int_coord_type; 90 struct lp_build_context int_coord_bld; 91 92 /** Output texels type and build context */ 93 struct lp_type texel_type; 94 struct lp_build_context texel_bld; 95}; 96 97 98/** 99 * Does the given texture wrap mode allow sampling the texture border color? 100 * XXX maybe move this into gallium util code. 101 */ 102static boolean 103wrap_mode_uses_border_color(unsigned mode) 104{ 105 switch (mode) { 106 case PIPE_TEX_WRAP_REPEAT: 107 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 108 case PIPE_TEX_WRAP_MIRROR_REPEAT: 109 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 110 return FALSE; 111 case PIPE_TEX_WRAP_CLAMP: 112 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 113 case PIPE_TEX_WRAP_MIRROR_CLAMP: 114 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 115 return TRUE; 116 default: 117 assert(0 && "unexpected wrap mode"); 118 return FALSE; 119 } 120} 121 122 123static LLVMValueRef 124lp_build_get_mipmap_level(struct lp_build_sample_context *bld, 125 LLVMValueRef data_array, LLVMValueRef level) 126{ 127 LLVMValueRef indexes[2], data_ptr; 128 indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 129 indexes[1] = level; 130 data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); 131 data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); 132 return data_ptr; 133} 134 135 136static LLVMValueRef 137lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, 138 LLVMValueRef data_array, int level) 139{ 140 LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); 141 return lp_build_get_mipmap_level(bld, data_array, lvl); 142} 143 144 145/** 146 * Dereference stride_array[mipmap_level] array to get a stride. 147 * Return stride as a vector. 148 */ 149static LLVMValueRef 150lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, 151 LLVMValueRef stride_array, LLVMValueRef level) 152{ 153 LLVMValueRef indexes[2], stride; 154 indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 155 indexes[1] = level; 156 stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); 157 stride = LLVMBuildLoad(bld->builder, stride, ""); 158 stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); 159 return stride; 160} 161 162 163/** Dereference stride_array[0] array to get a stride (as vector). */ 164static LLVMValueRef 165lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld, 166 LLVMValueRef stride_array, int level) 167{ 168 LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); 169 return lp_build_get_level_stride_vec(bld, stride_array, lvl); 170} 171 172 173static int 174texture_dims(enum pipe_texture_target tex) 175{ 176 switch (tex) { 177 case PIPE_TEXTURE_1D: 178 return 1; 179 case PIPE_TEXTURE_2D: 180 case PIPE_TEXTURE_CUBE: 181 return 2; 182 case PIPE_TEXTURE_3D: 183 return 3; 184 default: 185 assert(0 && "bad texture target in texture_dims()"); 186 return 2; 187 } 188} 189 190 191static void 192apply_sampler_swizzle(struct lp_build_sample_context *bld, 193 LLVMValueRef *texel) 194{ 195 unsigned char swizzles[4]; 196 197 swizzles[0] = bld->static_state->swizzle_r; 198 swizzles[1] = bld->static_state->swizzle_g; 199 swizzles[2] = bld->static_state->swizzle_b; 200 swizzles[3] = bld->static_state->swizzle_a; 201 202 lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles); 203} 204 205 206 207/** 208 * Generate code to fetch a texel from a texture at int coords (x, y, z). 209 * The computation depends on whether the texture is 1D, 2D or 3D. 210 * The result, texel, will be: 211 * texel[0] = red values 212 * texel[1] = green values 213 * texel[2] = blue values 214 * texel[3] = alpha values 215 */ 216static void 217lp_build_sample_texel_soa(struct lp_build_sample_context *bld, 218 LLVMValueRef width, 219 LLVMValueRef height, 220 LLVMValueRef depth, 221 LLVMValueRef x, 222 LLVMValueRef y, 223 LLVMValueRef z, 224 LLVMValueRef y_stride, 225 LLVMValueRef z_stride, 226 LLVMValueRef data_ptr, 227 LLVMValueRef texel_out[4]) 228{ 229 const int dims = texture_dims(bld->static_state->target); 230 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 231 LLVMValueRef offset; 232 LLVMValueRef i, j; 233 LLVMValueRef use_border = NULL; 234 235 /* use_border = x < 0 || x >= width || y < 0 || y >= height */ 236 if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) { 237 LLVMValueRef b1, b2; 238 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 239 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 240 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); 241 } 242 243 if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) { 244 LLVMValueRef b1, b2; 245 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 246 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 247 if (use_border) { 248 use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); 249 use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); 250 } 251 else { 252 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); 253 } 254 } 255 256 if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) { 257 LLVMValueRef b1, b2; 258 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); 259 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 260 if (use_border) { 261 use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); 262 use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); 263 } 264 else { 265 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); 266 } 267 } 268 269 /* convert x,y,z coords to linear offset from start of texture, in bytes */ 270 lp_build_sample_offset(&bld->uint_coord_bld, 271 bld->format_desc, 272 x, y, z, y_stride, z_stride, 273 &offset, &i, &j); 274 275 if (use_border) { 276 /* If we can sample the border color, it means that texcoords may 277 * lie outside the bounds of the texture image. We need to do 278 * something to prevent reading out of bounds and causing a segfault. 279 * 280 * Simply AND the texture coords with !use_border. This will cause 281 * coords which are out of bounds to become zero. Zero's guaranteed 282 * to be inside the texture image. 283 */ 284 offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border); 285 } 286 287 lp_build_fetch_rgba_soa(bld->builder, 288 bld->format_desc, 289 bld->texel_type, 290 data_ptr, offset, 291 i, j, 292 texel_out); 293 294 apply_sampler_swizzle(bld, texel_out); 295 296 /* 297 * Note: if we find an app which frequently samples the texture border 298 * we might want to implement a true conditional here to avoid sampling 299 * the texture whenever possible (since that's quite a bit of code). 300 * Ex: 301 * if (use_border) { 302 * texel = border_color; 303 * } 304 * else { 305 * texel = sample_texture(coord); 306 * } 307 * As it is now, we always sample the texture, then selectively replace 308 * the texel color results with the border color. 309 */ 310 311 if (use_border) { 312 /* select texel color or border color depending on use_border */ 313 int chan; 314 for (chan = 0; chan < 4; chan++) { 315 LLVMValueRef border_chan = 316 lp_build_const_vec(bld->texel_type, 317 bld->static_state->border_color[chan]); 318 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border, 319 border_chan, texel_out[chan]); 320 } 321 } 322} 323 324 325/** 326 * Fetch the texels as <4n x i8> in AoS form. 327 */ 328static LLVMValueRef 329lp_build_sample_packed(struct lp_build_sample_context *bld, 330 LLVMValueRef x, 331 LLVMValueRef y, 332 LLVMValueRef y_stride, 333 LLVMValueRef data_array) 334{ 335 LLVMValueRef offset, i, j; 336 LLVMValueRef data_ptr; 337 LLVMValueRef res; 338 339 /* convert x,y,z coords to linear offset from start of texture, in bytes */ 340 lp_build_sample_offset(&bld->uint_coord_bld, 341 bld->format_desc, 342 x, y, NULL, y_stride, NULL, 343 &offset, &i, &j); 344 345 /* get pointer to mipmap level 0 data */ 346 data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); 347 348 if (util_format_is_rgba8_variant(bld->format_desc)) { 349 /* Just fetch the data directly without swizzling */ 350 assert(bld->format_desc->block.width == 1); 351 assert(bld->format_desc->block.height == 1); 352 assert(bld->format_desc->block.bits <= bld->texel_type.width); 353 354 res = lp_build_gather(bld->builder, 355 bld->texel_type.length, 356 bld->format_desc->block.bits, 357 bld->texel_type.width, 358 data_ptr, offset); 359 } 360 else { 361 struct lp_type type; 362 363 assert(bld->texel_type.width == 32); 364 365 memset(&type, 0, sizeof type); 366 type.width = 8; 367 type.length = bld->texel_type.length*4; 368 type.norm = TRUE; 369 370 res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type, 371 data_ptr, offset, i, j); 372 } 373 374 return res; 375} 376 377 378/** 379 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. 380 */ 381static LLVMValueRef 382lp_build_coord_mirror(struct lp_build_sample_context *bld, 383 LLVMValueRef coord) 384{ 385 struct lp_build_context *coord_bld = &bld->coord_bld; 386 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 387 LLVMValueRef fract, flr, isOdd; 388 389 /* fract = coord - floor(coord) */ 390 fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord)); 391 392 /* flr = ifloor(coord); */ 393 flr = lp_build_ifloor(coord_bld, coord); 394 395 /* isOdd = flr & 1 */ 396 isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, ""); 397 398 /* make coord positive or negative depending on isOdd */ 399 coord = lp_build_set_sign(coord_bld, fract, isOdd); 400 401 /* convert isOdd to float */ 402 isOdd = lp_build_int_to_float(coord_bld, isOdd); 403 404 /* add isOdd to coord */ 405 coord = lp_build_add(coord_bld, coord, isOdd); 406 407 return coord; 408} 409 410 411/** 412 * We only support a few wrap modes in lp_build_sample_wrap_int() at this time. 413 * Return whether the given mode is supported by that function. 414 */ 415static boolean 416is_simple_wrap_mode(unsigned mode) 417{ 418 switch (mode) { 419 case PIPE_TEX_WRAP_REPEAT: 420 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 421 return TRUE; 422 default: 423 return FALSE; 424 } 425} 426 427 428/** 429 * Build LLVM code for texture wrap mode, for scaled integer texcoords. 430 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size 431 * \param length the texture size along one dimension 432 * \param is_pot if TRUE, length is a power of two 433 * \param wrap_mode one of PIPE_TEX_WRAP_x 434 */ 435static LLVMValueRef 436lp_build_sample_wrap_int(struct lp_build_sample_context *bld, 437 LLVMValueRef coord, 438 LLVMValueRef length, 439 boolean is_pot, 440 unsigned wrap_mode) 441{ 442 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; 443 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 444 LLVMValueRef length_minus_one; 445 446 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); 447 448 switch(wrap_mode) { 449 case PIPE_TEX_WRAP_REPEAT: 450 if(is_pot) 451 coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); 452 else 453 /* Signed remainder won't give the right results for negative 454 * dividends but unsigned remainder does.*/ 455 coord = LLVMBuildURem(bld->builder, coord, length, ""); 456 break; 457 458 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 459 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); 460 coord = lp_build_min(int_coord_bld, coord, length_minus_one); 461 break; 462 463 case PIPE_TEX_WRAP_CLAMP: 464 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 465 case PIPE_TEX_WRAP_MIRROR_REPEAT: 466 case PIPE_TEX_WRAP_MIRROR_CLAMP: 467 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 468 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 469 default: 470 assert(0); 471 } 472 473 return coord; 474} 475 476 477/** 478 * Build LLVM code for texture wrap mode for linear filtering. 479 * \param x0_out returns first integer texcoord 480 * \param x1_out returns second integer texcoord 481 * \param weight_out returns linear interpolation weight 482 */ 483static void 484lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, 485 LLVMValueRef coord, 486 LLVMValueRef length, 487 boolean is_pot, 488 unsigned wrap_mode, 489 LLVMValueRef *x0_out, 490 LLVMValueRef *x1_out, 491 LLVMValueRef *weight_out) 492{ 493 struct lp_build_context *coord_bld = &bld->coord_bld; 494 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 495 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; 496 LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); 497 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); 498 LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); 499 LLVMValueRef coord0, coord1, weight; 500 501 switch(wrap_mode) { 502 case PIPE_TEX_WRAP_REPEAT: 503 /* mul by size and subtract 0.5 */ 504 coord = lp_build_mul(coord_bld, coord, length_f); 505 coord = lp_build_sub(coord_bld, coord, half); 506 /* convert to int */ 507 coord0 = lp_build_ifloor(coord_bld, coord); 508 coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one); 509 /* compute lerp weight */ 510 weight = lp_build_fract(coord_bld, coord); 511 /* repeat wrap */ 512 if (is_pot) { 513 coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); 514 coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, ""); 515 } 516 else { 517 /* Signed remainder won't give the right results for negative 518 * dividends but unsigned remainder does.*/ 519 coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); 520 coord1 = LLVMBuildURem(bld->builder, coord1, length, ""); 521 } 522 break; 523 524 case PIPE_TEX_WRAP_CLAMP: 525 if (bld->static_state->normalized_coords) { 526 /* scale coord to length */ 527 coord = lp_build_mul(coord_bld, coord, length_f); 528 } 529 530 /* clamp to [0, length] */ 531 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f); 532 533 coord = lp_build_sub(coord_bld, coord, half); 534 535 weight = lp_build_fract(coord_bld, coord); 536 coord0 = lp_build_ifloor(coord_bld, coord); 537 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 538 break; 539 540 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 541 if (bld->static_state->normalized_coords) { 542 /* clamp to [0,1] */ 543 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one); 544 /* mul by tex size and subtract 0.5 */ 545 coord = lp_build_mul(coord_bld, coord, length_f); 546 coord = lp_build_sub(coord_bld, coord, half); 547 } 548 else { 549 LLVMValueRef min, max; 550 /* clamp to [0.5, length - 0.5] */ 551 min = half; 552 max = lp_build_sub(coord_bld, length_f, min); 553 coord = lp_build_clamp(coord_bld, coord, min, max); 554 } 555 /* compute lerp weight */ 556 weight = lp_build_fract(coord_bld, coord); 557 /* coord0 = floor(coord); */ 558 coord0 = lp_build_ifloor(coord_bld, coord); 559 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 560 /* coord0 = max(coord0, 0) */ 561 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); 562 /* coord1 = min(coord1, length-1) */ 563 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 564 break; 565 566 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 567 { 568 LLVMValueRef min, max; 569 if (bld->static_state->normalized_coords) { 570 /* scale coord to length */ 571 coord = lp_build_mul(coord_bld, coord, length_f); 572 } 573 /* clamp to [-0.5, length + 0.5] */ 574 min = lp_build_const_vec(coord_bld->type, -0.5F); 575 max = lp_build_sub(coord_bld, length_f, min); 576 coord = lp_build_clamp(coord_bld, coord, min, max); 577 coord = lp_build_sub(coord_bld, coord, half); 578 /* compute lerp weight */ 579 weight = lp_build_fract(coord_bld, coord); 580 /* convert to int */ 581 coord0 = lp_build_ifloor(coord_bld, coord); 582 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 583 } 584 break; 585 586 case PIPE_TEX_WRAP_MIRROR_REPEAT: 587 /* compute mirror function */ 588 coord = lp_build_coord_mirror(bld, coord); 589 590 /* scale coord to length */ 591 coord = lp_build_mul(coord_bld, coord, length_f); 592 coord = lp_build_sub(coord_bld, coord, half); 593 594 /* compute lerp weight */ 595 weight = lp_build_fract(coord_bld, coord); 596 597 /* convert to int coords */ 598 coord0 = lp_build_ifloor(coord_bld, coord); 599 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 600 601 /* coord0 = max(coord0, 0) */ 602 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); 603 /* coord1 = min(coord1, length-1) */ 604 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 605 break; 606 607 case PIPE_TEX_WRAP_MIRROR_CLAMP: 608 coord = lp_build_abs(coord_bld, coord); 609 610 if (bld->static_state->normalized_coords) { 611 /* scale coord to length */ 612 coord = lp_build_mul(coord_bld, coord, length_f); 613 } 614 615 /* clamp to [0, length] */ 616 coord = lp_build_min(coord_bld, coord, length_f); 617 618 coord = lp_build_sub(coord_bld, coord, half); 619 620 weight = lp_build_fract(coord_bld, coord); 621 coord0 = lp_build_ifloor(coord_bld, coord); 622 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 623 break; 624 625 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 626 { 627 LLVMValueRef min, max; 628 629 coord = lp_build_abs(coord_bld, coord); 630 631 if (bld->static_state->normalized_coords) { 632 /* scale coord to length */ 633 coord = lp_build_mul(coord_bld, coord, length_f); 634 } 635 636 /* clamp to [0.5, length - 0.5] */ 637 min = half; 638 max = lp_build_sub(coord_bld, length_f, min); 639 coord = lp_build_clamp(coord_bld, coord, min, max); 640 641 coord = lp_build_sub(coord_bld, coord, half); 642 643 weight = lp_build_fract(coord_bld, coord); 644 coord0 = lp_build_ifloor(coord_bld, coord); 645 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 646 } 647 break; 648 649 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 650 { 651 LLVMValueRef min, max; 652 653 coord = lp_build_abs(coord_bld, coord); 654 655 if (bld->static_state->normalized_coords) { 656 /* scale coord to length */ 657 coord = lp_build_mul(coord_bld, coord, length_f); 658 } 659 660 /* clamp to [-0.5, length + 0.5] */ 661 min = lp_build_negate(coord_bld, half); 662 max = lp_build_sub(coord_bld, length_f, min); 663 coord = lp_build_clamp(coord_bld, coord, min, max); 664 665 coord = lp_build_sub(coord_bld, coord, half); 666 667 weight = lp_build_fract(coord_bld, coord); 668 coord0 = lp_build_ifloor(coord_bld, coord); 669 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 670 } 671 break; 672 673 default: 674 assert(0); 675 coord0 = NULL; 676 coord1 = NULL; 677 weight = NULL; 678 } 679 680 *x0_out = coord0; 681 *x1_out = coord1; 682 *weight_out = weight; 683} 684 685 686/** 687 * Build LLVM code for texture wrap mode for nearest filtering. 688 * \param coord the incoming texcoord (nominally in [0,1]) 689 * \param length the texture size along one dimension, as int 690 * \param is_pot if TRUE, length is a power of two 691 * \param wrap_mode one of PIPE_TEX_WRAP_x 692 */ 693static LLVMValueRef 694lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, 695 LLVMValueRef coord, 696 LLVMValueRef length, 697 boolean is_pot, 698 unsigned wrap_mode) 699{ 700 struct lp_build_context *coord_bld = &bld->coord_bld; 701 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 702 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; 703 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); 704 LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); 705 LLVMValueRef icoord; 706 707 switch(wrap_mode) { 708 case PIPE_TEX_WRAP_REPEAT: 709 coord = lp_build_mul(coord_bld, coord, length_f); 710 icoord = lp_build_ifloor(coord_bld, coord); 711 if (is_pot) 712 icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, ""); 713 else 714 /* Signed remainder won't give the right results for negative 715 * dividends but unsigned remainder does.*/ 716 icoord = LLVMBuildURem(bld->builder, icoord, length, ""); 717 break; 718 719 case PIPE_TEX_WRAP_CLAMP: 720 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 721 if (bld->static_state->normalized_coords) { 722 /* scale coord to length */ 723 coord = lp_build_mul(coord_bld, coord, length_f); 724 } 725 726 /* floor */ 727 icoord = lp_build_ifloor(coord_bld, coord); 728 729 /* clamp to [0, length - 1]. */ 730 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, 731 length_minus_one); 732 break; 733 734 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 735 /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */ 736 { 737 LLVMValueRef min, max; 738 739 if (bld->static_state->normalized_coords) { 740 /* scale coord to length */ 741 coord = lp_build_mul(coord_bld, coord, length_f); 742 } 743 744 icoord = lp_build_ifloor(coord_bld, coord); 745 746 /* clamp to [-1, length] */ 747 min = lp_build_negate(int_coord_bld, int_coord_bld->one); 748 max = length; 749 icoord = lp_build_clamp(int_coord_bld, icoord, min, max); 750 } 751 break; 752 753 case PIPE_TEX_WRAP_MIRROR_REPEAT: 754 /* compute mirror function */ 755 coord = lp_build_coord_mirror(bld, coord); 756 757 /* scale coord to length */ 758 assert(bld->static_state->normalized_coords); 759 coord = lp_build_mul(coord_bld, coord, length_f); 760 761 icoord = lp_build_ifloor(coord_bld, coord); 762 763 /* clamp to [0, length - 1] */ 764 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); 765 break; 766 767 case PIPE_TEX_WRAP_MIRROR_CLAMP: 768 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 769 coord = lp_build_abs(coord_bld, coord); 770 771 if (bld->static_state->normalized_coords) { 772 /* scale coord to length */ 773 coord = lp_build_mul(coord_bld, coord, length_f); 774 } 775 776 icoord = lp_build_ifloor(coord_bld, coord); 777 778 /* clamp to [0, length - 1] */ 779 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); 780 break; 781 782 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 783 coord = lp_build_abs(coord_bld, coord); 784 785 if (bld->static_state->normalized_coords) { 786 /* scale coord to length */ 787 coord = lp_build_mul(coord_bld, coord, length_f); 788 } 789 790 icoord = lp_build_ifloor(coord_bld, coord); 791 792 /* clamp to [0, length] */ 793 icoord = lp_build_min(int_coord_bld, icoord, length); 794 break; 795 796 default: 797 assert(0); 798 icoord = NULL; 799 } 800 801 return icoord; 802} 803 804 805/** 806 * Codegen equivalent for u_minify(). 807 * Return max(1, base_size >> level); 808 */ 809static LLVMValueRef 810lp_build_minify(struct lp_build_sample_context *bld, 811 LLVMValueRef base_size, 812 LLVMValueRef level) 813{ 814 LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); 815 size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); 816 return size; 817} 818 819 820/** 821 * Generate code to compute texture level of detail (lambda). 822 * \param ddx partial derivatives of (s, t, r, q) with respect to X 823 * \param ddy partial derivatives of (s, t, r, q) with respect to Y 824 * \param lod_bias optional float vector with the shader lod bias 825 * \param explicit_lod optional float vector with the explicit lod 826 * \param width scalar int texture width 827 * \param height scalar int texture height 828 * \param depth scalar int texture depth 829 * 830 * XXX: The resulting lod is scalar, so ignore all but the first element of 831 * derivatives, lod_bias, etc that are passed by the shader. 832 */ 833static LLVMValueRef 834lp_build_lod_selector(struct lp_build_sample_context *bld, 835 const LLVMValueRef ddx[4], 836 const LLVMValueRef ddy[4], 837 LLVMValueRef lod_bias, /* optional */ 838 LLVMValueRef explicit_lod, /* optional */ 839 LLVMValueRef width, 840 LLVMValueRef height, 841 LLVMValueRef depth) 842 843{ 844 if (bld->static_state->min_lod == bld->static_state->max_lod) { 845 /* User is forcing sampling from a particular mipmap level. 846 * This is hit during mipmap generation. 847 */ 848 return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); 849 } 850 else { 851 struct lp_build_context *float_bld = &bld->float_bld; 852 LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), 853 bld->static_state->lod_bias); 854 LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), 855 bld->static_state->min_lod); 856 LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), 857 bld->static_state->max_lod); 858 LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 859 LLVMValueRef lod; 860 861 if (explicit_lod) { 862 lod = LLVMBuildExtractElement(bld->builder, explicit_lod, 863 index0, ""); 864 } 865 else { 866 const int dims = texture_dims(bld->static_state->target); 867 LLVMValueRef dsdx, dsdy; 868 LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; 869 LLVMValueRef rho; 870 871 dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); 872 dsdx = lp_build_abs(float_bld, dsdx); 873 dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); 874 dsdy = lp_build_abs(float_bld, dsdy); 875 if (dims > 1) { 876 dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); 877 dtdx = lp_build_abs(float_bld, dtdx); 878 dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); 879 dtdy = lp_build_abs(float_bld, dtdy); 880 if (dims > 2) { 881 drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); 882 drdx = lp_build_abs(float_bld, drdx); 883 drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); 884 drdy = lp_build_abs(float_bld, drdy); 885 } 886 } 887 888 /* Compute rho = max of all partial derivatives scaled by texture size. 889 * XXX this could be vectorized somewhat 890 */ 891 rho = LLVMBuildFMul(bld->builder, 892 lp_build_max(float_bld, dsdx, dsdy), 893 lp_build_int_to_float(float_bld, width), ""); 894 if (dims > 1) { 895 LLVMValueRef max; 896 max = LLVMBuildFMul(bld->builder, 897 lp_build_max(float_bld, dtdx, dtdy), 898 lp_build_int_to_float(float_bld, height), ""); 899 rho = lp_build_max(float_bld, rho, max); 900 if (dims > 2) { 901 max = LLVMBuildFMul(bld->builder, 902 lp_build_max(float_bld, drdx, drdy), 903 lp_build_int_to_float(float_bld, depth), ""); 904 rho = lp_build_max(float_bld, rho, max); 905 } 906 } 907 908 /* compute lod = log2(rho) */ 909 lod = lp_build_log2(float_bld, rho); 910 911 /* add shader lod bias */ 912 if (lod_bias) { 913 lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, 914 index0, ""); 915 lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); 916 } 917 } 918 919 /* add sampler lod bias */ 920 lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); 921 922 /* clamp lod */ 923 lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); 924 925 return lod; 926 } 927} 928 929 930/** 931 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer 932 * mipmap level index. 933 * Note: this is all scalar code. 934 * \param lod scalar float texture level of detail 935 * \param level_out returns integer 936 */ 937static void 938lp_build_nearest_mip_level(struct lp_build_sample_context *bld, 939 unsigned unit, 940 LLVMValueRef lod, 941 LLVMValueRef *level_out) 942{ 943 struct lp_build_context *float_bld = &bld->float_bld; 944 struct lp_build_context *int_bld = &bld->int_bld; 945 LLVMValueRef last_level, level; 946 947 LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); 948 949 last_level = bld->dynamic_state->last_level(bld->dynamic_state, 950 bld->builder, unit); 951 952 /* convert float lod to integer */ 953 level = lp_build_iround(float_bld, lod); 954 955 /* clamp level to legal range of levels */ 956 *level_out = lp_build_clamp(int_bld, level, zero, last_level); 957} 958 959 960/** 961 * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to 962 * two (adjacent) mipmap level indexes. Later, we'll sample from those 963 * two mipmap levels and interpolate between them. 964 */ 965static void 966lp_build_linear_mip_levels(struct lp_build_sample_context *bld, 967 unsigned unit, 968 LLVMValueRef lod, 969 LLVMValueRef *level0_out, 970 LLVMValueRef *level1_out, 971 LLVMValueRef *weight_out) 972{ 973 struct lp_build_context *float_bld = &bld->float_bld; 974 struct lp_build_context *int_bld = &bld->int_bld; 975 LLVMValueRef last_level, level; 976 977 last_level = bld->dynamic_state->last_level(bld->dynamic_state, 978 bld->builder, unit); 979 980 /* convert float lod to integer */ 981 level = lp_build_ifloor(float_bld, lod); 982 983 /* compute level 0 and clamp to legal range of levels */ 984 *level0_out = lp_build_clamp(int_bld, level, 985 int_bld->zero, 986 last_level); 987 /* compute level 1 and clamp to legal range of levels */ 988 level = lp_build_add(int_bld, level, int_bld->one); 989 *level1_out = lp_build_clamp(int_bld, level, 990 int_bld->zero, 991 last_level); 992 993 *weight_out = lp_build_fract(float_bld, lod); 994} 995 996 997/** 998 * Generate code to sample a mipmap level with nearest filtering. 999 * If sampling a cube texture, r = cube face in [0,5]. 1000 */ 1001static void 1002lp_build_sample_image_nearest(struct lp_build_sample_context *bld, 1003 LLVMValueRef width_vec, 1004 LLVMValueRef height_vec, 1005 LLVMValueRef depth_vec, 1006 LLVMValueRef row_stride_vec, 1007 LLVMValueRef img_stride_vec, 1008 LLVMValueRef data_ptr, 1009 LLVMValueRef s, 1010 LLVMValueRef t, 1011 LLVMValueRef r, 1012 LLVMValueRef colors_out[4]) 1013{ 1014 const int dims = texture_dims(bld->static_state->target); 1015 LLVMValueRef x, y, z; 1016 1017 /* 1018 * Compute integer texcoords. 1019 */ 1020 x = lp_build_sample_wrap_nearest(bld, s, width_vec, 1021 bld->static_state->pot_width, 1022 bld->static_state->wrap_s); 1023 lp_build_name(x, "tex.x.wrapped"); 1024 1025 if (dims >= 2) { 1026 y = lp_build_sample_wrap_nearest(bld, t, height_vec, 1027 bld->static_state->pot_height, 1028 bld->static_state->wrap_t); 1029 lp_build_name(y, "tex.y.wrapped"); 1030 1031 if (dims == 3) { 1032 z = lp_build_sample_wrap_nearest(bld, r, depth_vec, 1033 bld->static_state->pot_height, 1034 bld->static_state->wrap_r); 1035 lp_build_name(z, "tex.z.wrapped"); 1036 } 1037 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 1038 z = r; 1039 } 1040 else { 1041 z = NULL; 1042 } 1043 } 1044 else { 1045 y = z = NULL; 1046 } 1047 1048 /* 1049 * Get texture colors. 1050 */ 1051 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1052 x, y, z, 1053 row_stride_vec, img_stride_vec, 1054 data_ptr, colors_out); 1055} 1056 1057 1058/** 1059 * Generate code to sample a mipmap level with linear filtering. 1060 * If sampling a cube texture, r = cube face in [0,5]. 1061 */ 1062static void 1063lp_build_sample_image_linear(struct lp_build_sample_context *bld, 1064 LLVMValueRef width_vec, 1065 LLVMValueRef height_vec, 1066 LLVMValueRef depth_vec, 1067 LLVMValueRef row_stride_vec, 1068 LLVMValueRef img_stride_vec, 1069 LLVMValueRef data_ptr, 1070 LLVMValueRef s, 1071 LLVMValueRef t, 1072 LLVMValueRef r, 1073 LLVMValueRef colors_out[4]) 1074{ 1075 const int dims = texture_dims(bld->static_state->target); 1076 LLVMValueRef x0, y0, z0, x1, y1, z1; 1077 LLVMValueRef s_fpart, t_fpart, r_fpart; 1078 LLVMValueRef neighbors[2][2][4]; 1079 int chan; 1080 1081 /* 1082 * Compute integer texcoords. 1083 */ 1084 lp_build_sample_wrap_linear(bld, s, width_vec, 1085 bld->static_state->pot_width, 1086 bld->static_state->wrap_s, 1087 &x0, &x1, &s_fpart); 1088 lp_build_name(x0, "tex.x0.wrapped"); 1089 lp_build_name(x1, "tex.x1.wrapped"); 1090 1091 if (dims >= 2) { 1092 lp_build_sample_wrap_linear(bld, t, height_vec, 1093 bld->static_state->pot_height, 1094 bld->static_state->wrap_t, 1095 &y0, &y1, &t_fpart); 1096 lp_build_name(y0, "tex.y0.wrapped"); 1097 lp_build_name(y1, "tex.y1.wrapped"); 1098 1099 if (dims == 3) { 1100 lp_build_sample_wrap_linear(bld, r, depth_vec, 1101 bld->static_state->pot_depth, 1102 bld->static_state->wrap_r, 1103 &z0, &z1, &r_fpart); 1104 lp_build_name(z0, "tex.z0.wrapped"); 1105 lp_build_name(z1, "tex.z1.wrapped"); 1106 } 1107 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 1108 z0 = z1 = r; /* cube face */ 1109 r_fpart = NULL; 1110 } 1111 else { 1112 z0 = z1 = NULL; 1113 r_fpart = NULL; 1114 } 1115 } 1116 else { 1117 y0 = y1 = t_fpart = NULL; 1118 z0 = z1 = r_fpart = NULL; 1119 } 1120 1121 /* 1122 * Get texture colors. 1123 */ 1124 /* get x0/x1 texels */ 1125 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1126 x0, y0, z0, 1127 row_stride_vec, img_stride_vec, 1128 data_ptr, neighbors[0][0]); 1129 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1130 x1, y0, z0, 1131 row_stride_vec, img_stride_vec, 1132 data_ptr, neighbors[0][1]); 1133 1134 if (dims == 1) { 1135 /* Interpolate two samples from 1D image to produce one color */ 1136 for (chan = 0; chan < 4; chan++) { 1137 colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart, 1138 neighbors[0][0][chan], 1139 neighbors[0][1][chan]); 1140 } 1141 } 1142 else { 1143 /* 2D/3D texture */ 1144 LLVMValueRef colors0[4]; 1145 1146 /* get x0/x1 texels at y1 */ 1147 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1148 x0, y1, z0, 1149 row_stride_vec, img_stride_vec, 1150 data_ptr, neighbors[1][0]); 1151 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1152 x1, y1, z0, 1153 row_stride_vec, img_stride_vec, 1154 data_ptr, neighbors[1][1]); 1155 1156 /* Bilinear interpolate the four samples from the 2D image / 3D slice */ 1157 for (chan = 0; chan < 4; chan++) { 1158 colors0[chan] = lp_build_lerp_2d(&bld->texel_bld, 1159 s_fpart, t_fpart, 1160 neighbors[0][0][chan], 1161 neighbors[0][1][chan], 1162 neighbors[1][0][chan], 1163 neighbors[1][1][chan]); 1164 } 1165 1166 if (dims == 3) { 1167 LLVMValueRef neighbors1[2][2][4]; 1168 LLVMValueRef colors1[4]; 1169 1170 /* get x0/x1/y0/y1 texels at z1 */ 1171 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1172 x0, y0, z1, 1173 row_stride_vec, img_stride_vec, 1174 data_ptr, neighbors1[0][0]); 1175 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1176 x1, y0, z1, 1177 row_stride_vec, img_stride_vec, 1178 data_ptr, neighbors1[0][1]); 1179 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1180 x0, y1, z1, 1181 row_stride_vec, img_stride_vec, 1182 data_ptr, neighbors1[1][0]); 1183 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec, 1184 x1, y1, z1, 1185 row_stride_vec, img_stride_vec, 1186 data_ptr, neighbors1[1][1]); 1187 1188 /* Bilinear interpolate the four samples from the second Z slice */ 1189 for (chan = 0; chan < 4; chan++) { 1190 colors1[chan] = lp_build_lerp_2d(&bld->texel_bld, 1191 s_fpart, t_fpart, 1192 neighbors1[0][0][chan], 1193 neighbors1[0][1][chan], 1194 neighbors1[1][0][chan], 1195 neighbors1[1][1][chan]); 1196 } 1197 1198 /* Linearly interpolate the two samples from the two 3D slices */ 1199 for (chan = 0; chan < 4; chan++) { 1200 colors_out[chan] = lp_build_lerp(&bld->texel_bld, 1201 r_fpart, 1202 colors0[chan], colors1[chan]); 1203 } 1204 } 1205 else { 1206 /* 2D tex */ 1207 for (chan = 0; chan < 4; chan++) { 1208 colors_out[chan] = colors0[chan]; 1209 } 1210 } 1211 } 1212} 1213 1214 1215/** Helper used by lp_build_cube_lookup() */ 1216static LLVMValueRef 1217lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) 1218{ 1219 /* ima = -0.5 / abs(coord); */ 1220 LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); 1221 LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); 1222 LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); 1223 return ima; 1224} 1225 1226 1227/** 1228 * Helper used by lp_build_cube_lookup() 1229 * \param sign scalar +1 or -1 1230 * \param coord float vector 1231 * \param ima float vector 1232 */ 1233static LLVMValueRef 1234lp_build_cube_coord(struct lp_build_context *coord_bld, 1235 LLVMValueRef sign, int negate_coord, 1236 LLVMValueRef coord, LLVMValueRef ima) 1237{ 1238 /* return negate(coord) * ima * sign + 0.5; */ 1239 LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); 1240 LLVMValueRef res; 1241 1242 assert(negate_coord == +1 || negate_coord == -1); 1243 1244 if (negate_coord == -1) { 1245 coord = lp_build_negate(coord_bld, coord); 1246 } 1247 1248 res = lp_build_mul(coord_bld, coord, ima); 1249 if (sign) { 1250 sign = lp_build_broadcast_scalar(coord_bld, sign); 1251 res = lp_build_mul(coord_bld, res, sign); 1252 } 1253 res = lp_build_add(coord_bld, res, half); 1254 1255 return res; 1256} 1257 1258 1259/** Helper used by lp_build_cube_lookup() 1260 * Return (major_coord >= 0) ? pos_face : neg_face; 1261 */ 1262static LLVMValueRef 1263lp_build_cube_face(struct lp_build_sample_context *bld, 1264 LLVMValueRef major_coord, 1265 unsigned pos_face, unsigned neg_face) 1266{ 1267 LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, 1268 major_coord, 1269 bld->float_bld.zero, ""); 1270 LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); 1271 LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); 1272 LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); 1273 return res; 1274} 1275 1276 1277 1278/** 1279 * Generate code to do cube face selection and compute per-face texcoords. 1280 */ 1281static void 1282lp_build_cube_lookup(struct lp_build_sample_context *bld, 1283 LLVMValueRef s, 1284 LLVMValueRef t, 1285 LLVMValueRef r, 1286 LLVMValueRef *face, 1287 LLVMValueRef *face_s, 1288 LLVMValueRef *face_t) 1289{ 1290 struct lp_build_context *float_bld = &bld->float_bld; 1291 struct lp_build_context *coord_bld = &bld->coord_bld; 1292 LLVMValueRef rx, ry, rz; 1293 LLVMValueRef arx, ary, arz; 1294 LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); 1295 LLVMValueRef arx_ge_ary, arx_ge_arz; 1296 LLVMValueRef ary_ge_arx, ary_ge_arz; 1297 LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; 1298 LLVMValueRef rx_pos, ry_pos, rz_pos; 1299 1300 assert(bld->coord_bld.type.length == 4); 1301 1302 /* 1303 * Use the average of the four pixel's texcoords to choose the face. 1304 */ 1305 rx = lp_build_mul(float_bld, c25, 1306 lp_build_sum_vector(&bld->coord_bld, s)); 1307 ry = lp_build_mul(float_bld, c25, 1308 lp_build_sum_vector(&bld->coord_bld, t)); 1309 rz = lp_build_mul(float_bld, c25, 1310 lp_build_sum_vector(&bld->coord_bld, r)); 1311 1312 arx = lp_build_abs(float_bld, rx); 1313 ary = lp_build_abs(float_bld, ry); 1314 arz = lp_build_abs(float_bld, rz); 1315 1316 /* 1317 * Compare sign/magnitude of rx,ry,rz to determine face 1318 */ 1319 arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); 1320 arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); 1321 ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); 1322 ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); 1323 1324 arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); 1325 ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); 1326 1327 rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); 1328 ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); 1329 rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); 1330 1331 { 1332 struct lp_build_flow_context *flow_ctx; 1333 struct lp_build_if_state if_ctx; 1334 1335 flow_ctx = lp_build_flow_create(bld->builder); 1336 lp_build_flow_scope_begin(flow_ctx); 1337 1338 *face_s = bld->coord_bld.undef; 1339 *face_t = bld->coord_bld.undef; 1340 *face = bld->int_bld.undef; 1341 1342 lp_build_name(*face_s, "face_s"); 1343 lp_build_name(*face_t, "face_t"); 1344 lp_build_name(*face, "face"); 1345 1346 lp_build_flow_scope_declare(flow_ctx, face_s); 1347 lp_build_flow_scope_declare(flow_ctx, face_t); 1348 lp_build_flow_scope_declare(flow_ctx, face); 1349 1350 lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); 1351 { 1352 /* +/- X face */ 1353 LLVMValueRef sign = lp_build_sgn(float_bld, rx); 1354 LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); 1355 *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); 1356 *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); 1357 *face = lp_build_cube_face(bld, rx, 1358 PIPE_TEX_FACE_POS_X, 1359 PIPE_TEX_FACE_NEG_X); 1360 } 1361 lp_build_else(&if_ctx); 1362 { 1363 struct lp_build_flow_context *flow_ctx2; 1364 struct lp_build_if_state if_ctx2; 1365 1366 LLVMValueRef face_s2 = bld->coord_bld.undef; 1367 LLVMValueRef face_t2 = bld->coord_bld.undef; 1368 LLVMValueRef face2 = bld->int_bld.undef; 1369 1370 flow_ctx2 = lp_build_flow_create(bld->builder); 1371 lp_build_flow_scope_begin(flow_ctx2); 1372 lp_build_flow_scope_declare(flow_ctx2, &face_s2); 1373 lp_build_flow_scope_declare(flow_ctx2, &face_t2); 1374 lp_build_flow_scope_declare(flow_ctx2, &face2); 1375 1376 ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); 1377 1378 lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); 1379 { 1380 /* +/- Y face */ 1381 LLVMValueRef sign = lp_build_sgn(float_bld, ry); 1382 LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); 1383 face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); 1384 face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); 1385 face2 = lp_build_cube_face(bld, ry, 1386 PIPE_TEX_FACE_POS_Y, 1387 PIPE_TEX_FACE_NEG_Y); 1388 } 1389 lp_build_else(&if_ctx2); 1390 { 1391 /* +/- Z face */ 1392 LLVMValueRef sign = lp_build_sgn(float_bld, rz); 1393 LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); 1394 face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); 1395 face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); 1396 face2 = lp_build_cube_face(bld, rz, 1397 PIPE_TEX_FACE_POS_Z, 1398 PIPE_TEX_FACE_NEG_Z); 1399 } 1400 lp_build_endif(&if_ctx2); 1401 lp_build_flow_scope_end(flow_ctx2); 1402 lp_build_flow_destroy(flow_ctx2); 1403 *face_s = face_s2; 1404 *face_t = face_t2; 1405 *face = face2; 1406 } 1407 1408 lp_build_endif(&if_ctx); 1409 lp_build_flow_scope_end(flow_ctx); 1410 lp_build_flow_destroy(flow_ctx); 1411 } 1412} 1413 1414 1415 1416/** 1417 * Sample the texture/mipmap using given image filter and mip filter. 1418 * data0_ptr and data1_ptr point to the two mipmap levels to sample 1419 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. 1420 * If we're using nearest miplevel sampling the '1' values will be null/unused. 1421 */ 1422static void 1423lp_build_sample_mipmap(struct lp_build_sample_context *bld, 1424 unsigned img_filter, 1425 unsigned mip_filter, 1426 LLVMValueRef s, 1427 LLVMValueRef t, 1428 LLVMValueRef r, 1429 LLVMValueRef lod_fpart, 1430 LLVMValueRef width0_vec, 1431 LLVMValueRef width1_vec, 1432 LLVMValueRef height0_vec, 1433 LLVMValueRef height1_vec, 1434 LLVMValueRef depth0_vec, 1435 LLVMValueRef depth1_vec, 1436 LLVMValueRef row_stride0_vec, 1437 LLVMValueRef row_stride1_vec, 1438 LLVMValueRef img_stride0_vec, 1439 LLVMValueRef img_stride1_vec, 1440 LLVMValueRef data_ptr0, 1441 LLVMValueRef data_ptr1, 1442 LLVMValueRef *colors_out) 1443{ 1444 LLVMValueRef colors0[4], colors1[4]; 1445 int chan; 1446 1447 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1448 /* sample the first mipmap level */ 1449 lp_build_sample_image_nearest(bld, 1450 width0_vec, height0_vec, depth0_vec, 1451 row_stride0_vec, img_stride0_vec, 1452 data_ptr0, s, t, r, colors0); 1453 1454 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1455 /* sample the second mipmap level */ 1456 lp_build_sample_image_nearest(bld, 1457 width1_vec, height1_vec, depth1_vec, 1458 row_stride1_vec, img_stride1_vec, 1459 data_ptr1, s, t, r, colors1); 1460 } 1461 } 1462 else { 1463 assert(img_filter == PIPE_TEX_FILTER_LINEAR); 1464 1465 /* sample the first mipmap level */ 1466 lp_build_sample_image_linear(bld, 1467 width0_vec, height0_vec, depth0_vec, 1468 row_stride0_vec, img_stride0_vec, 1469 data_ptr0, s, t, r, colors0); 1470 1471 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1472 /* sample the second mipmap level */ 1473 lp_build_sample_image_linear(bld, 1474 width1_vec, height1_vec, depth1_vec, 1475 row_stride1_vec, img_stride1_vec, 1476 data_ptr1, s, t, r, colors1); 1477 } 1478 } 1479 1480 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1481 /* interpolate samples from the two mipmap levels */ 1482 for (chan = 0; chan < 4; chan++) { 1483 colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, 1484 colors0[chan], colors1[chan]); 1485 } 1486 } 1487 else { 1488 /* use first/only level's colors */ 1489 for (chan = 0; chan < 4; chan++) { 1490 colors_out[chan] = colors0[chan]; 1491 } 1492 } 1493} 1494 1495 1496 1497/** 1498 * General texture sampling codegen. 1499 * This function handles texture sampling for all texture targets (1D, 1500 * 2D, 3D, cube) and all filtering modes. 1501 */ 1502static void 1503lp_build_sample_general(struct lp_build_sample_context *bld, 1504 unsigned unit, 1505 LLVMValueRef s, 1506 LLVMValueRef t, 1507 LLVMValueRef r, 1508 const LLVMValueRef *ddx, 1509 const LLVMValueRef *ddy, 1510 LLVMValueRef lod_bias, /* optional */ 1511 LLVMValueRef explicit_lod, /* optional */ 1512 LLVMValueRef width, 1513 LLVMValueRef height, 1514 LLVMValueRef depth, 1515 LLVMValueRef width_vec, 1516 LLVMValueRef height_vec, 1517 LLVMValueRef depth_vec, 1518 LLVMValueRef row_stride_array, 1519 LLVMValueRef img_stride_array, 1520 LLVMValueRef data_array, 1521 LLVMValueRef *colors_out) 1522{ 1523 struct lp_build_context *float_bld = &bld->float_bld; 1524 const unsigned mip_filter = bld->static_state->min_mip_filter; 1525 const unsigned min_filter = bld->static_state->min_img_filter; 1526 const unsigned mag_filter = bld->static_state->mag_img_filter; 1527 const int dims = texture_dims(bld->static_state->target); 1528 LLVMValueRef lod = NULL, lod_fpart = NULL; 1529 LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL; 1530 LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; 1531 LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; 1532 LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; 1533 LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; 1534 LLVMValueRef data_ptr0, data_ptr1 = NULL; 1535 LLVMValueRef face_ddx[4], face_ddy[4]; 1536 1537 /* 1538 printf("%s mip %d min %d mag %d\n", __FUNCTION__, 1539 mip_filter, min_filter, mag_filter); 1540 */ 1541 1542 /* 1543 * Choose cube face, recompute texcoords and derivatives for the chosen face. 1544 */ 1545 if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 1546 LLVMValueRef face, face_s, face_t; 1547 lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); 1548 s = face_s; /* vec */ 1549 t = face_t; /* vec */ 1550 /* use 'r' to indicate cube face */ 1551 r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ 1552 1553 /* recompute ddx, ddy using the new (s,t) face texcoords */ 1554 face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); 1555 face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); 1556 face_ddx[2] = NULL; 1557 face_ddx[3] = NULL; 1558 face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); 1559 face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); 1560 face_ddy[2] = NULL; 1561 face_ddy[3] = NULL; 1562 ddx = face_ddx; 1563 ddy = face_ddy; 1564 } 1565 1566 /* 1567 * Compute the level of detail (float). 1568 */ 1569 if (min_filter != mag_filter || 1570 mip_filter != PIPE_TEX_MIPFILTER_NONE) { 1571 /* Need to compute lod either to choose mipmap levels or to 1572 * distinguish between minification/magnification with one mipmap level. 1573 */ 1574 lod = lp_build_lod_selector(bld, ddx, ddy, 1575 lod_bias, explicit_lod, 1576 width, height, depth); 1577 } 1578 1579 /* 1580 * Compute integer mipmap level(s) to fetch texels from. 1581 */ 1582 if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { 1583 /* always use mip level 0 */ 1584 if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 1585 /* XXX this is a work-around for an apparent bug in LLVM 2.7. 1586 * We should be able to set ilevel0 = const(0) but that causes 1587 * bad x86 code to be emitted. 1588 */ 1589 lod = lp_build_const_elem(bld->coord_bld.type, 0.0); 1590 lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); 1591 } 1592 else { 1593 ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 1594 } 1595 } 1596 else { 1597 assert(lod); 1598 if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { 1599 lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); 1600 } 1601 else { 1602 assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR); 1603 lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, 1604 &lod_fpart); 1605 lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart); 1606 } 1607 } 1608 1609 /* 1610 * Convert scalar integer mipmap levels into vectors. 1611 */ 1612 ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); 1613 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) 1614 ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); 1615 1616 /* 1617 * Compute width, height at mipmap level 'ilevel0' 1618 */ 1619 width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); 1620 if (dims >= 2) { 1621 height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); 1622 row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array, 1623 ilevel0); 1624 if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { 1625 img_stride0_vec = lp_build_get_level_stride_vec(bld, 1626 img_stride_array, 1627 ilevel0); 1628 if (dims == 3) { 1629 depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); 1630 } 1631 } 1632 } 1633 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1634 /* compute width, height, depth for second mipmap level at 'ilevel1' */ 1635 width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); 1636 if (dims >= 2) { 1637 height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); 1638 row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array, 1639 ilevel1); 1640 if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { 1641 img_stride1_vec = lp_build_get_level_stride_vec(bld, 1642 img_stride_array, 1643 ilevel1); 1644 if (dims ==3) { 1645 depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); 1646 } 1647 } 1648 } 1649 } 1650 1651 /* 1652 * Get pointer(s) to image data for mipmap level(s). 1653 */ 1654 data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); 1655 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1656 data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); 1657 } 1658 1659 /* 1660 * Get/interpolate texture colors. 1661 */ 1662 if (min_filter == mag_filter) { 1663 /* no need to distinquish between minification and magnification */ 1664 lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart, 1665 width0_vec, width1_vec, 1666 height0_vec, height1_vec, 1667 depth0_vec, depth1_vec, 1668 row_stride0_vec, row_stride1_vec, 1669 img_stride0_vec, img_stride1_vec, 1670 data_ptr0, data_ptr1, 1671 colors_out); 1672 } 1673 else { 1674 /* Emit conditional to choose min image filter or mag image filter 1675 * depending on the lod being >0 or <= 0, respectively. 1676 */ 1677 struct lp_build_flow_context *flow_ctx; 1678 struct lp_build_if_state if_ctx; 1679 LLVMValueRef minify; 1680 1681 flow_ctx = lp_build_flow_create(bld->builder); 1682 lp_build_flow_scope_begin(flow_ctx); 1683 1684 lp_build_flow_scope_declare(flow_ctx, &colors_out[0]); 1685 lp_build_flow_scope_declare(flow_ctx, &colors_out[1]); 1686 lp_build_flow_scope_declare(flow_ctx, &colors_out[2]); 1687 lp_build_flow_scope_declare(flow_ctx, &colors_out[3]); 1688 1689 /* minify = lod > 0.0 */ 1690 minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE, 1691 lod, float_bld->zero, ""); 1692 1693 lp_build_if(&if_ctx, flow_ctx, bld->builder, minify); 1694 { 1695 /* Use the minification filter */ 1696 lp_build_sample_mipmap(bld, min_filter, mip_filter, 1697 s, t, r, lod_fpart, 1698 width0_vec, width1_vec, 1699 height0_vec, height1_vec, 1700 depth0_vec, depth1_vec, 1701 row_stride0_vec, row_stride1_vec, 1702 img_stride0_vec, img_stride1_vec, 1703 data_ptr0, data_ptr1, 1704 colors_out); 1705 } 1706 lp_build_else(&if_ctx); 1707 { 1708 /* Use the magnification filter */ 1709 lp_build_sample_mipmap(bld, mag_filter, mip_filter, 1710 s, t, r, lod_fpart, 1711 width0_vec, width1_vec, 1712 height0_vec, height1_vec, 1713 depth0_vec, depth1_vec, 1714 row_stride0_vec, row_stride1_vec, 1715 img_stride0_vec, img_stride1_vec, 1716 data_ptr0, data_ptr1, 1717 colors_out); 1718 } 1719 lp_build_endif(&if_ctx); 1720 1721 lp_build_flow_scope_end(flow_ctx); 1722 lp_build_flow_destroy(flow_ctx); 1723 } 1724} 1725 1726 1727 1728static void 1729lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, 1730 LLVMValueRef s, 1731 LLVMValueRef t, 1732 LLVMValueRef width, 1733 LLVMValueRef height, 1734 LLVMValueRef stride_array, 1735 LLVMValueRef data_array, 1736 LLVMValueRef texel_out[4]) 1737{ 1738 LLVMBuilderRef builder = bld->builder; 1739 struct lp_build_context i32, h16, u8n; 1740 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; 1741 LLVMValueRef i32_c8, i32_c128, i32_c255; 1742 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; 1743 LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; 1744 LLVMValueRef x0, x1; 1745 LLVMValueRef y0, y1; 1746 LLVMValueRef neighbors[2][2]; 1747 LLVMValueRef neighbors_lo[2][2]; 1748 LLVMValueRef neighbors_hi[2][2]; 1749 LLVMValueRef packed, packed_lo, packed_hi; 1750 LLVMValueRef unswizzled[4]; 1751 LLVMValueRef stride; 1752 1753 assert(bld->static_state->target == PIPE_TEXTURE_2D); 1754 assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); 1755 assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); 1756 assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); 1757 1758 lp_build_context_init(&i32, builder, lp_type_int_vec(32)); 1759 lp_build_context_init(&h16, builder, lp_type_ufixed(16)); 1760 lp_build_context_init(&u8n, builder, lp_type_unorm(8)); 1761 1762 i32_vec_type = lp_build_vec_type(i32.type); 1763 h16_vec_type = lp_build_vec_type(h16.type); 1764 u8n_vec_type = lp_build_vec_type(u8n.type); 1765 1766 if (bld->static_state->normalized_coords) { 1767 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); 1768 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, ""); 1769 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, ""); 1770 s = lp_build_mul(&bld->coord_bld, s, fp_width); 1771 t = lp_build_mul(&bld->coord_bld, t, fp_height); 1772 } 1773 1774 /* scale coords by 256 (8 fractional bits) */ 1775 s = lp_build_mul_imm(&bld->coord_bld, s, 256); 1776 t = lp_build_mul_imm(&bld->coord_bld, t, 256); 1777 1778 /* convert float to int */ 1779 s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); 1780 t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); 1781 1782 /* subtract 0.5 (add -128) */ 1783 i32_c128 = lp_build_const_int_vec(i32.type, -128); 1784 s = LLVMBuildAdd(builder, s, i32_c128, ""); 1785 t = LLVMBuildAdd(builder, t, i32_c128, ""); 1786 1787 /* compute floor (shift right 8) */ 1788 i32_c8 = lp_build_const_int_vec(i32.type, 8); 1789 s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); 1790 t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); 1791 1792 /* compute fractional part (AND with 0xff) */ 1793 i32_c255 = lp_build_const_int_vec(i32.type, 255); 1794 s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); 1795 t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); 1796 1797 x0 = s_ipart; 1798 y0 = t_ipart; 1799 1800 x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); 1801 y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); 1802 1803 x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width, 1804 bld->static_state->wrap_s); 1805 y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height, 1806 bld->static_state->wrap_t); 1807 1808 x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width, 1809 bld->static_state->wrap_s); 1810 y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height, 1811 bld->static_state->wrap_t); 1812 1813 /* 1814 * Transform 4 x i32 in 1815 * 1816 * s_fpart = {s0, s1, s2, s3} 1817 * 1818 * into 8 x i16 1819 * 1820 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} 1821 * 1822 * into two 8 x i16 1823 * 1824 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} 1825 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} 1826 * 1827 * and likewise for t_fpart. There is no risk of loosing precision here 1828 * since the fractional parts only use the lower 8bits. 1829 */ 1830 1831 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); 1832 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); 1833 1834 { 1835 LLVMTypeRef elem_type = LLVMInt32Type(); 1836 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; 1837 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; 1838 LLVMValueRef shuffle_lo; 1839 LLVMValueRef shuffle_hi; 1840 unsigned i, j; 1841 1842 for(j = 0; j < h16.type.length; j += 4) { 1843#ifdef PIPE_ARCH_LITTLE_ENDIAN 1844 unsigned subindex = 0; 1845#else 1846 unsigned subindex = 1; 1847#endif 1848 LLVMValueRef index; 1849 1850 index = LLVMConstInt(elem_type, j/2 + subindex, 0); 1851 for(i = 0; i < 4; ++i) 1852 shuffles_lo[j + i] = index; 1853 1854 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); 1855 for(i = 0; i < 4; ++i) 1856 shuffles_hi[j + i] = index; 1857 } 1858 1859 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); 1860 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); 1861 1862 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); 1863 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); 1864 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); 1865 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); 1866 } 1867 1868 stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0); 1869 1870 /* 1871 * Fetch the pixels as 4 x 32bit (rgba order might differ): 1872 * 1873 * rgba0 rgba1 rgba2 rgba3 1874 * 1875 * bit cast them into 16 x u8 1876 * 1877 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 1878 * 1879 * unpack them into two 8 x i16: 1880 * 1881 * r0 g0 b0 a0 r1 g1 b1 a1 1882 * r2 g2 b2 a2 r3 g3 b3 a3 1883 * 1884 * The higher 8 bits of the resulting elements will be zero. 1885 */ 1886 1887 neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array); 1888 neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array); 1889 neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array); 1890 neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array); 1891 1892 neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); 1893 neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); 1894 neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); 1895 neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); 1896 1897 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); 1898 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); 1899 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); 1900 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); 1901 1902 /* 1903 * Linear interpolate with 8.8 fixed point. 1904 */ 1905 1906 packed_lo = lp_build_lerp_2d(&h16, 1907 s_fpart_lo, t_fpart_lo, 1908 neighbors_lo[0][0], 1909 neighbors_lo[0][1], 1910 neighbors_lo[1][0], 1911 neighbors_lo[1][1]); 1912 1913 packed_hi = lp_build_lerp_2d(&h16, 1914 s_fpart_hi, t_fpart_hi, 1915 neighbors_hi[0][0], 1916 neighbors_hi[0][1], 1917 neighbors_hi[1][0], 1918 neighbors_hi[1][1]); 1919 1920 packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); 1921 1922 /* 1923 * Convert to SoA and swizzle. 1924 */ 1925 1926 lp_build_rgba8_to_f32_soa(bld->builder, 1927 bld->texel_type, 1928 packed, unswizzled); 1929 1930 if (util_format_is_rgba8_variant(bld->format_desc)) { 1931 lp_build_format_swizzle_soa(bld->format_desc, 1932 &bld->texel_bld, 1933 unswizzled, texel_out); 1934 } else { 1935 texel_out[0] = unswizzled[0]; 1936 texel_out[1] = unswizzled[1]; 1937 texel_out[2] = unswizzled[2]; 1938 texel_out[3] = unswizzled[3]; 1939 } 1940 1941 apply_sampler_swizzle(bld, texel_out); 1942} 1943 1944 1945static void 1946lp_build_sample_compare(struct lp_build_sample_context *bld, 1947 LLVMValueRef p, 1948 LLVMValueRef texel[4]) 1949{ 1950 struct lp_build_context *texel_bld = &bld->texel_bld; 1951 LLVMValueRef res; 1952 unsigned chan; 1953 1954 if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) 1955 return; 1956 1957 /* TODO: Compare before swizzling, to avoid redundant computations */ 1958 res = NULL; 1959 for(chan = 0; chan < 4; ++chan) { 1960 LLVMValueRef cmp; 1961 cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); 1962 cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); 1963 1964 if(res) 1965 res = lp_build_add(texel_bld, res, cmp); 1966 else 1967 res = cmp; 1968 } 1969 1970 assert(res); 1971 res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25)); 1972 1973 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ 1974 for(chan = 0; chan < 3; ++chan) 1975 texel[chan] = res; 1976 texel[3] = texel_bld->one; 1977} 1978 1979 1980/** 1981 * Just set texels to white instead of actually sampling the texture. 1982 * For debugging. 1983 */ 1984static void 1985lp_build_sample_nop(struct lp_build_sample_context *bld, 1986 LLVMValueRef texel_out[4]) 1987{ 1988 struct lp_build_context *texel_bld = &bld->texel_bld; 1989 unsigned chan; 1990 1991 for (chan = 0; chan < 4; chan++) { 1992 /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/ 1993 texel_out[chan] = texel_bld->one; 1994 } 1995} 1996 1997 1998/** 1999 * Build texture sampling code. 2000 * 'texel' will return a vector of four LLVMValueRefs corresponding to 2001 * R, G, B, A. 2002 * \param type vector float type to use for coords, etc. 2003 * \param ddx partial derivatives of (s,t,r,q) with respect to x 2004 * \param ddy partial derivatives of (s,t,r,q) with respect to y 2005 */ 2006void 2007lp_build_sample_soa(LLVMBuilderRef builder, 2008 const struct lp_sampler_static_state *static_state, 2009 struct lp_sampler_dynamic_state *dynamic_state, 2010 struct lp_type type, 2011 unsigned unit, 2012 unsigned num_coords, 2013 const LLVMValueRef *coords, 2014 const LLVMValueRef ddx[4], 2015 const LLVMValueRef ddy[4], 2016 LLVMValueRef lod_bias, /* optional */ 2017 LLVMValueRef explicit_lod, /* optional */ 2018 LLVMValueRef texel_out[4]) 2019{ 2020 struct lp_build_sample_context bld; 2021 LLVMValueRef width, width_vec; 2022 LLVMValueRef height, height_vec; 2023 LLVMValueRef depth, depth_vec; 2024 LLVMValueRef row_stride_array, img_stride_array; 2025 LLVMValueRef data_array; 2026 LLVMValueRef s; 2027 LLVMValueRef t; 2028 LLVMValueRef r; 2029 2030 if (0) { 2031 enum pipe_format fmt = static_state->format; 2032 debug_printf("Sample from %s\n", util_format_name(fmt)); 2033 } 2034 2035 assert(type.floating); 2036 2037 /* Setup our build context */ 2038 memset(&bld, 0, sizeof bld); 2039 bld.builder = builder; 2040 bld.static_state = static_state; 2041 bld.dynamic_state = dynamic_state; 2042 bld.format_desc = util_format_description(static_state->format); 2043 2044 bld.float_type = lp_type_float(32); 2045 bld.int_type = lp_type_int(32); 2046 bld.coord_type = type; 2047 bld.uint_coord_type = lp_uint_type(type); 2048 bld.int_coord_type = lp_int_type(type); 2049 bld.texel_type = type; 2050 2051 lp_build_context_init(&bld.float_bld, builder, bld.float_type); 2052 lp_build_context_init(&bld.int_bld, builder, bld.int_type); 2053 lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); 2054 lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type); 2055 lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); 2056 lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); 2057 2058 /* Get the dynamic state */ 2059 width = dynamic_state->width(dynamic_state, builder, unit); 2060 height = dynamic_state->height(dynamic_state, builder, unit); 2061 depth = dynamic_state->depth(dynamic_state, builder, unit); 2062 row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit); 2063 img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit); 2064 data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); 2065 /* Note that data_array is an array[level] of pointers to texture images */ 2066 2067 s = coords[0]; 2068 t = coords[1]; 2069 r = coords[2]; 2070 2071 width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); 2072 height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); 2073 depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); 2074 2075 if (0) { 2076 /* For debug: no-op texture sampling */ 2077 lp_build_sample_nop(&bld, texel_out); 2078 } 2079 else if (util_format_fits_8unorm(bld.format_desc) && 2080 bld.format_desc->nr_channels > 1 && 2081 static_state->target == PIPE_TEXTURE_2D && 2082 static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && 2083 static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && 2084 static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && 2085 is_simple_wrap_mode(static_state->wrap_s) && 2086 is_simple_wrap_mode(static_state->wrap_t)) { 2087 /* special case */ 2088 lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, 2089 row_stride_array, data_array, texel_out); 2090 } 2091 else { 2092 lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, 2093 lod_bias, explicit_lod, 2094 width, height, depth, 2095 width_vec, height_vec, depth_vec, 2096 row_stride_array, img_stride_array, 2097 data_array, 2098 texel_out); 2099 } 2100 2101 lp_build_sample_compare(&bld, r, texel_out); 2102} 2103