lp_bld_sample_soa.c revision 2ccae040a458ad0f95ee46916e2ea467d5cf9d02
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Texture sampling -- SoA. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35#include "pipe/p_defines.h" 36#include "pipe/p_state.h" 37#include "util/u_debug.h" 38#include "util/u_dump.h" 39#include "util/u_memory.h" 40#include "util/u_math.h" 41#include "util/u_format.h" 42#include "util/u_cpu_detect.h" 43#include "lp_bld_debug.h" 44#include "lp_bld_type.h" 45#include "lp_bld_const.h" 46#include "lp_bld_conv.h" 47#include "lp_bld_arit.h" 48#include "lp_bld_logic.h" 49#include "lp_bld_swizzle.h" 50#include "lp_bld_pack.h" 51#include "lp_bld_format.h" 52#include "lp_bld_sample.h" 53 54 55/** 56 * Keep all information for sampling code generation in a single place. 57 */ 58struct lp_build_sample_context 59{ 60 LLVMBuilderRef builder; 61 62 const struct lp_sampler_static_state *static_state; 63 64 struct lp_sampler_dynamic_state *dynamic_state; 65 66 const struct util_format_description *format_desc; 67 68 /** regular scalar float type */ 69 struct lp_type float_type; 70 struct lp_build_context float_bld; 71 72 /** regular scalar float type */ 73 struct lp_type int_type; 74 struct lp_build_context int_bld; 75 76 /** Incoming coordinates type and build context */ 77 struct lp_type coord_type; 78 struct lp_build_context coord_bld; 79 80 /** Unsigned integer coordinates */ 81 struct lp_type uint_coord_type; 82 struct lp_build_context uint_coord_bld; 83 84 /** Signed integer coordinates */ 85 struct lp_type int_coord_type; 86 struct lp_build_context int_coord_bld; 87 88 /** Output texels type and build context */ 89 struct lp_type texel_type; 90 struct lp_build_context texel_bld; 91}; 92 93 94/** 95 * Does the given texture wrap mode allow sampling the texture border color? 96 * XXX maybe move this into gallium util code. 97 */ 98static boolean 99wrap_mode_uses_border_color(unsigned mode) 100{ 101 switch (mode) { 102 case PIPE_TEX_WRAP_REPEAT: 103 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 104 case PIPE_TEX_WRAP_MIRROR_REPEAT: 105 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 106 return FALSE; 107 case PIPE_TEX_WRAP_CLAMP: 108 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 109 case PIPE_TEX_WRAP_MIRROR_CLAMP: 110 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 111 return TRUE; 112 default: 113 assert(0 && "unexpected wrap mode"); 114 return FALSE; 115 } 116} 117 118 119static LLVMValueRef 120lp_build_get_mipmap_level(struct lp_build_sample_context *bld, 121 LLVMValueRef data_array, LLVMValueRef level) 122{ 123 LLVMValueRef indexes[2], data_ptr; 124 indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); 125 indexes[1] = level; 126 data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); 127 data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); 128 return data_ptr; 129} 130 131 132static LLVMValueRef 133lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, 134 LLVMValueRef data_array, int level) 135{ 136 LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); 137 return lp_build_get_mipmap_level(bld, data_array, lvl); 138} 139 140 141/** 142 * Gen code to fetch a texel from a texture at int coords (x, y). 143 * The result, texel, will be: 144 * texel[0] = red values 145 * texel[1] = green values 146 * texel[2] = blue values 147 * texel[3] = alpha values 148 */ 149static void 150lp_build_sample_texel_soa(struct lp_build_sample_context *bld, 151 LLVMValueRef width, 152 LLVMValueRef height, 153 LLVMValueRef x, 154 LLVMValueRef y, 155 LLVMValueRef y_stride, 156 LLVMValueRef data_ptr, 157 LLVMValueRef *texel) 158{ 159 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 160 LLVMValueRef offset; 161 LLVMValueRef packed; 162 LLVMValueRef use_border = NULL; 163 164 /* use_border = x < 0 || x >= width || y < 0 || y >= height */ 165 if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) { 166 LLVMValueRef b1, b2; 167 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 168 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 169 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); 170 } 171 172 if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) { 173 LLVMValueRef b1, b2; 174 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 175 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 176 if (use_border) { 177 use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1"); 178 use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2"); 179 } 180 else { 181 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2"); 182 } 183 } 184 185 /* 186 * Note: if we find an app which frequently samples the texture border 187 * we might want to implement a true conditional here to avoid sampling 188 * the texture whenever possible (since that's quite a bit of code). 189 * Ex: 190 * if (use_border) { 191 * texel = border_color; 192 * } 193 * else { 194 * texel = sample_texture(coord); 195 * } 196 * As it is now, we always sample the texture, then selectively replace 197 * the texel color results with the border color. 198 */ 199 200 /* convert x,y coords to linear offset from start of texture, in bytes */ 201 offset = lp_build_sample_offset(&bld->uint_coord_bld, 202 bld->format_desc, 203 x, y, y_stride); 204 205 assert(bld->format_desc->block.width == 1); 206 assert(bld->format_desc->block.height == 1); 207 assert(bld->format_desc->block.bits <= bld->texel_type.width); 208 209 /* gather the texels from the texture */ 210 packed = lp_build_gather(bld->builder, 211 bld->texel_type.length, 212 bld->format_desc->block.bits, 213 bld->texel_type.width, 214 data_ptr, offset); 215 216 /* convert texels to float rgba */ 217 lp_build_unpack_rgba_soa(bld->builder, 218 bld->format_desc, 219 bld->texel_type, 220 packed, texel); 221 222 if (use_border) { 223 /* select texel color or border color depending on use_border */ 224 int chan; 225 for (chan = 0; chan < 4; chan++) { 226 LLVMValueRef border_chan = 227 lp_build_const_scalar(bld->texel_type, 228 bld->static_state->border_color[chan]); 229 texel[chan] = lp_build_select(&bld->texel_bld, use_border, 230 border_chan, texel[chan]); 231 } 232 } 233} 234 235 236static LLVMValueRef 237lp_build_sample_packed(struct lp_build_sample_context *bld, 238 LLVMValueRef x, 239 LLVMValueRef y, 240 LLVMValueRef y_stride, 241 LLVMValueRef data_array) 242{ 243 LLVMValueRef offset; 244 LLVMValueRef data_ptr; 245 246 offset = lp_build_sample_offset(&bld->uint_coord_bld, 247 bld->format_desc, 248 x, y, y_stride); 249 250 assert(bld->format_desc->block.width == 1); 251 assert(bld->format_desc->block.height == 1); 252 assert(bld->format_desc->block.bits <= bld->texel_type.width); 253 254 /* get pointer to mipmap level 0 data */ 255 data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); 256 257 return lp_build_gather(bld->builder, 258 bld->texel_type.length, 259 bld->format_desc->block.bits, 260 bld->texel_type.width, 261 data_ptr, offset); 262} 263 264 265/** 266 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. 267 */ 268static LLVMValueRef 269lp_build_coord_mirror(struct lp_build_sample_context *bld, 270 LLVMValueRef coord) 271{ 272 struct lp_build_context *coord_bld = &bld->coord_bld; 273 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 274 LLVMValueRef fract, flr, isOdd; 275 276 /* fract = coord - floor(coord) */ 277 fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord)); 278 279 /* flr = ifloor(coord); */ 280 flr = lp_build_ifloor(coord_bld, coord); 281 282 /* isOdd = flr & 1 */ 283 isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, ""); 284 285 /* make coord positive or negative depending on isOdd */ 286 coord = lp_build_set_sign(coord_bld, fract, isOdd); 287 288 /* convert isOdd to float */ 289 isOdd = lp_build_int_to_float(coord_bld, isOdd); 290 291 /* add isOdd to coord */ 292 coord = lp_build_add(coord_bld, coord, isOdd); 293 294 return coord; 295} 296 297 298/** 299 * We only support a few wrap modes in lp_build_sample_wrap_int() at this time. 300 * Return whether the given mode is supported by that function. 301 */ 302static boolean 303is_simple_wrap_mode(unsigned mode) 304{ 305 switch (mode) { 306 case PIPE_TEX_WRAP_REPEAT: 307 case PIPE_TEX_WRAP_CLAMP: 308 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 309 return TRUE; 310 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 311 default: 312 return FALSE; 313 } 314} 315 316 317/** 318 * Build LLVM code for texture wrap mode, for scaled integer texcoords. 319 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size 320 * \param length the texture size along one dimension 321 * \param is_pot if TRUE, length is a power of two 322 * \param wrap_mode one of PIPE_TEX_WRAP_x 323 */ 324static LLVMValueRef 325lp_build_sample_wrap_int(struct lp_build_sample_context *bld, 326 LLVMValueRef coord, 327 LLVMValueRef length, 328 boolean is_pot, 329 unsigned wrap_mode) 330{ 331 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; 332 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 333 LLVMValueRef length_minus_one; 334 335 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); 336 337 switch(wrap_mode) { 338 case PIPE_TEX_WRAP_REPEAT: 339 if(is_pot) 340 coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); 341 else 342 /* Signed remainder won't give the right results for negative 343 * dividends but unsigned remainder does.*/ 344 coord = LLVMBuildURem(bld->builder, coord, length, ""); 345 break; 346 347 case PIPE_TEX_WRAP_CLAMP: 348 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 349 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 350 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); 351 coord = lp_build_min(int_coord_bld, coord, length_minus_one); 352 break; 353 354 case PIPE_TEX_WRAP_MIRROR_REPEAT: 355 case PIPE_TEX_WRAP_MIRROR_CLAMP: 356 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 357 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 358 /* FIXME */ 359 _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n", 360 util_dump_tex_wrap(wrap_mode, TRUE)); 361 coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero); 362 coord = lp_build_min(uint_coord_bld, coord, length_minus_one); 363 break; 364 365 default: 366 assert(0); 367 } 368 369 return coord; 370} 371 372 373/** 374 * Build LLVM code for texture wrap mode for linear filtering. 375 * \param x0_out returns first integer texcoord 376 * \param x1_out returns second integer texcoord 377 * \param weight_out returns linear interpolation weight 378 */ 379static void 380lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, 381 LLVMValueRef coord, 382 LLVMValueRef length, 383 boolean is_pot, 384 unsigned wrap_mode, 385 LLVMValueRef *x0_out, 386 LLVMValueRef *x1_out, 387 LLVMValueRef *weight_out) 388{ 389 struct lp_build_context *coord_bld = &bld->coord_bld; 390 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 391 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; 392 LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); 393 LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5); 394 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); 395 LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); 396 LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); 397 LLVMValueRef coord0, coord1, weight; 398 399 switch(wrap_mode) { 400 case PIPE_TEX_WRAP_REPEAT: 401 /* mul by size and subtract 0.5 */ 402 coord = lp_build_mul(coord_bld, coord, length_f); 403 coord = lp_build_sub(coord_bld, coord, half); 404 /* convert to int */ 405 coord0 = lp_build_ifloor(coord_bld, coord); 406 coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one); 407 /* compute lerp weight */ 408 weight = lp_build_fract(coord_bld, coord); 409 /* repeat wrap */ 410 if (is_pot) { 411 coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); 412 coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, ""); 413 } 414 else { 415 /* Signed remainder won't give the right results for negative 416 * dividends but unsigned remainder does.*/ 417 coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); 418 coord1 = LLVMBuildURem(bld->builder, coord1, length, ""); 419 } 420 break; 421 422 case PIPE_TEX_WRAP_CLAMP: 423 if (bld->static_state->normalized_coords) { 424 coord = lp_build_mul(coord_bld, coord, length_f); 425 } 426 weight = lp_build_fract(coord_bld, coord); 427 coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero, 428 length_f_minus_one); 429 coord1 = lp_build_add(coord_bld, coord, coord_bld->one); 430 coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero, 431 length_f_minus_one); 432 coord0 = lp_build_ifloor(coord_bld, coord0); 433 coord1 = lp_build_ifloor(coord_bld, coord1); 434 break; 435 436 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 437 if (bld->static_state->normalized_coords) { 438 /* clamp to [0,1] */ 439 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one); 440 /* mul by tex size and subtract 0.5 */ 441 coord = lp_build_mul(coord_bld, coord, length_f); 442 coord = lp_build_sub(coord_bld, coord, half); 443 } 444 else { 445 LLVMValueRef min, max; 446 /* clamp to [0.5, length - 0.5] */ 447 min = lp_build_const_scalar(coord_bld->type, 0.5F); 448 max = lp_build_sub(coord_bld, length_f, min); 449 coord = lp_build_clamp(coord_bld, coord, min, max); 450 } 451 /* compute lerp weight */ 452 weight = lp_build_fract(coord_bld, coord); 453 /* coord0 = floor(coord); */ 454 coord0 = lp_build_ifloor(coord_bld, coord); 455 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 456 /* coord0 = max(coord0, 0) */ 457 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); 458 /* coord1 = min(coord1, length-1) */ 459 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 460 break; 461 462 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 463 { 464 LLVMValueRef min, max; 465 if (bld->static_state->normalized_coords) { 466 /* min = -1.0 / (2 * length) = -0.5 / length */ 467 min = lp_build_mul(coord_bld, 468 lp_build_const_scalar(coord_bld->type, -0.5F), 469 lp_build_rcp(coord_bld, length_f)); 470 /* max = 1.0 - min */ 471 max = lp_build_sub(coord_bld, coord_bld->one, min); 472 /* coord = clamp(coord, min, max) */ 473 coord = lp_build_clamp(coord_bld, coord, min, max); 474 /* scale coord to length (and sub 0.5?) */ 475 coord = lp_build_mul(coord_bld, coord, length_f); 476 coord = lp_build_sub(coord_bld, coord, half); 477 } 478 else { 479 /* clamp to [-0.5, length + 0.5] */ 480 min = lp_build_const_scalar(coord_bld->type, -0.5F); 481 max = lp_build_sub(coord_bld, length_f, min); 482 coord = lp_build_clamp(coord_bld, coord, min, max); 483 coord = lp_build_sub(coord_bld, coord, half); 484 } 485 /* compute lerp weight */ 486 weight = lp_build_fract(coord_bld, coord); 487 /* convert to int */ 488 coord0 = lp_build_ifloor(coord_bld, coord); 489 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 490 } 491 break; 492 493 case PIPE_TEX_WRAP_MIRROR_REPEAT: 494 /* compute mirror function */ 495 coord = lp_build_coord_mirror(bld, coord); 496 497 /* scale coord to length */ 498 coord = lp_build_mul(coord_bld, coord, length_f); 499 coord = lp_build_sub(coord_bld, coord, half); 500 501 /* compute lerp weight */ 502 weight = lp_build_fract(coord_bld, coord); 503 504 /* convert to int coords */ 505 coord0 = lp_build_ifloor(coord_bld, coord); 506 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 507 508 /* coord0 = max(coord0, 0) */ 509 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); 510 /* coord1 = min(coord1, length-1) */ 511 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 512 break; 513 514 case PIPE_TEX_WRAP_MIRROR_CLAMP: 515 { 516 LLVMValueRef min, max; 517 /* min = 1.0 / (2 * length) */ 518 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); 519 /* max = 1.0 - min */ 520 max = lp_build_sub(coord_bld, coord_bld->one, min); 521 522 coord = lp_build_abs(coord_bld, coord); 523 coord = lp_build_clamp(coord_bld, coord, min, max); 524 coord = lp_build_mul(coord_bld, coord, length_f); 525 if(0)coord = lp_build_sub(coord_bld, coord, half); 526 weight = lp_build_fract(coord_bld, coord); 527 coord0 = lp_build_ifloor(coord_bld, coord); 528 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 529 } 530 break; 531 532 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 533 { 534 LLVMValueRef min, max; 535 /* min = 1.0 / (2 * length) */ 536 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); 537 /* max = 1.0 - min */ 538 max = lp_build_sub(coord_bld, coord_bld->one, min); 539 540 coord = lp_build_abs(coord_bld, coord); 541 coord = lp_build_clamp(coord_bld, coord, min, max); 542 coord = lp_build_mul(coord_bld, coord, length_f); 543 coord = lp_build_sub(coord_bld, coord, half); 544 weight = lp_build_fract(coord_bld, coord); 545 coord0 = lp_build_ifloor(coord_bld, coord); 546 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 547 } 548 break; 549 550 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 551 { 552 LLVMValueRef min, max; 553 /* min = -1.0 / (2 * length) = -0.5 / length */ 554 min = lp_build_mul(coord_bld, 555 lp_build_const_scalar(coord_bld->type, -0.5F), 556 lp_build_rcp(coord_bld, length_f)); 557 /* max = 1.0 - min */ 558 max = lp_build_sub(coord_bld, coord_bld->one, min); 559 560 coord = lp_build_abs(coord_bld, coord); 561 coord = lp_build_clamp(coord_bld, coord, min, max); 562 coord = lp_build_mul(coord_bld, coord, length_f); 563 coord = lp_build_sub(coord_bld, coord, half); 564 weight = lp_build_fract(coord_bld, coord); 565 coord0 = lp_build_ifloor(coord_bld, coord); 566 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 567 } 568 break; 569 570 default: 571 assert(0); 572 coord0 = NULL; 573 coord1 = NULL; 574 weight = NULL; 575 } 576 577 *x0_out = coord0; 578 *x1_out = coord1; 579 *weight_out = weight; 580} 581 582 583/** 584 * Build LLVM code for texture wrap mode for nearest filtering. 585 * \param coord the incoming texcoord (nominally in [0,1]) 586 * \param length the texture size along one dimension, as int 587 * \param is_pot if TRUE, length is a power of two 588 * \param wrap_mode one of PIPE_TEX_WRAP_x 589 */ 590static LLVMValueRef 591lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, 592 LLVMValueRef coord, 593 LLVMValueRef length, 594 boolean is_pot, 595 unsigned wrap_mode) 596{ 597 struct lp_build_context *coord_bld = &bld->coord_bld; 598 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 599 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; 600 LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0); 601 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); 602 LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); 603 LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one); 604 LLVMValueRef icoord; 605 606 switch(wrap_mode) { 607 case PIPE_TEX_WRAP_REPEAT: 608 coord = lp_build_mul(coord_bld, coord, length_f); 609 icoord = lp_build_ifloor(coord_bld, coord); 610 if (is_pot) 611 icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, ""); 612 else 613 /* Signed remainder won't give the right results for negative 614 * dividends but unsigned remainder does.*/ 615 icoord = LLVMBuildURem(bld->builder, icoord, length, ""); 616 break; 617 618 case PIPE_TEX_WRAP_CLAMP: 619 /* mul by size */ 620 if (bld->static_state->normalized_coords) { 621 coord = lp_build_mul(coord_bld, coord, length_f); 622 } 623 /* floor */ 624 icoord = lp_build_ifloor(coord_bld, coord); 625 /* clamp to [0, size-1]. Note: int coord builder type */ 626 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, 627 length_minus_one); 628 break; 629 630 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 631 { 632 LLVMValueRef min, max; 633 if (bld->static_state->normalized_coords) { 634 /* min = 1.0 / (2 * length) */ 635 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); 636 /* max = length - min */ 637 max = lp_build_sub(coord_bld, length_f, min); 638 /* scale coord to length */ 639 coord = lp_build_mul(coord_bld, coord, length_f); 640 } 641 else { 642 /* clamp to [0.5, length - 0.5] */ 643 min = lp_build_const_scalar(coord_bld->type, 0.5F); 644 max = lp_build_sub(coord_bld, length_f, min); 645 } 646 /* coord = clamp(coord, min, max) */ 647 coord = lp_build_clamp(coord_bld, coord, min, max); 648 icoord = lp_build_ifloor(coord_bld, coord); 649 } 650 break; 651 652 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 653 /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */ 654 { 655 LLVMValueRef min, max; 656 if (bld->static_state->normalized_coords) { 657 /* min = -1.0 / (2 * length) = -0.5 / length */ 658 min = lp_build_mul(coord_bld, 659 lp_build_const_scalar(coord_bld->type, -0.5F), 660 lp_build_rcp(coord_bld, length_f)); 661 /* max = length - min */ 662 max = lp_build_sub(coord_bld, length_f, min); 663 /* scale coord to length */ 664 coord = lp_build_mul(coord_bld, coord, length_f); 665 } 666 else { 667 /* clamp to [-0.5, length + 0.5] */ 668 min = lp_build_const_scalar(coord_bld->type, -0.5F); 669 max = lp_build_sub(coord_bld, length_f, min); 670 } 671 /* coord = clamp(coord, min, max) */ 672 coord = lp_build_clamp(coord_bld, coord, min, max); 673 icoord = lp_build_ifloor(coord_bld, coord); 674 } 675 break; 676 677 case PIPE_TEX_WRAP_MIRROR_REPEAT: 678 { 679 LLVMValueRef min, max; 680 /* min = 1.0 / (2 * length) */ 681 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); 682 /* max = length - min */ 683 max = lp_build_sub(coord_bld, length_f, min); 684 685 /* compute mirror function */ 686 coord = lp_build_coord_mirror(bld, coord); 687 688 /* scale coord to length */ 689 coord = lp_build_mul(coord_bld, coord, length_f); 690 691 /* coord = clamp(coord, min, max) */ 692 coord = lp_build_clamp(coord_bld, coord, min, max); 693 icoord = lp_build_ifloor(coord_bld, coord); 694 } 695 break; 696 697 case PIPE_TEX_WRAP_MIRROR_CLAMP: 698 coord = lp_build_abs(coord_bld, coord); 699 coord = lp_build_mul(coord_bld, coord, length_f); 700 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one); 701 icoord = lp_build_ifloor(coord_bld, coord); 702 break; 703 704 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 705 { 706 LLVMValueRef min, max; 707 /* min = 1.0 / (2 * length) */ 708 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); 709 /* max = length - min */ 710 max = lp_build_sub(coord_bld, length_f, min); 711 712 coord = lp_build_abs(coord_bld, coord); 713 coord = lp_build_mul(coord_bld, coord, length_f); 714 coord = lp_build_clamp(coord_bld, coord, min, max); 715 icoord = lp_build_ifloor(coord_bld, coord); 716 } 717 break; 718 719 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 720 { 721 LLVMValueRef min, max; 722 /* min = 1.0 / (2 * length) */ 723 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f)); 724 min = lp_build_negate(coord_bld, min); 725 /* max = length - min */ 726 max = lp_build_sub(coord_bld, length_f, min); 727 728 coord = lp_build_abs(coord_bld, coord); 729 coord = lp_build_mul(coord_bld, coord, length_f); 730 coord = lp_build_clamp(coord_bld, coord, min, max); 731 icoord = lp_build_ifloor(coord_bld, coord); 732 } 733 break; 734 735 default: 736 assert(0); 737 icoord = NULL; 738 } 739 740 return icoord; 741} 742 743 744/** 745 * Codegen equivalent for u_minify(). 746 * Return max(1, base_size >> level); 747 */ 748static LLVMValueRef 749lp_build_minify(struct lp_build_sample_context *bld, 750 LLVMValueRef base_size, 751 LLVMValueRef level) 752{ 753 LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify"); 754 size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); 755 return size; 756} 757 758 759static int 760texture_dims(enum pipe_texture_target tex) 761{ 762 switch (tex) { 763 case PIPE_TEXTURE_1D: 764 return 1; 765 case PIPE_TEXTURE_2D: 766 case PIPE_TEXTURE_CUBE: 767 return 2; 768 case PIPE_TEXTURE_3D: 769 return 3; 770 default: 771 assert(0 && "bad texture target in texture_dims()"); 772 return 2; 773 } 774} 775 776 777/** 778 * Generate code to compute texture level of detail (lambda). 779 * \param s vector of texcoord s values 780 * \param t vector of texcoord t values 781 * \param r vector of texcoord r values 782 * \param width scalar int texture width 783 * \param height scalar int texture height 784 * \param depth scalar int texture depth 785 */ 786static LLVMValueRef 787lp_build_lod_selector(struct lp_build_sample_context *bld, 788 LLVMValueRef s, 789 LLVMValueRef t, 790 LLVMValueRef r, 791 LLVMValueRef width, 792 LLVMValueRef height, 793 LLVMValueRef depth) 794 795{ 796 const int dims = texture_dims(bld->static_state->target); 797 struct lp_build_context *coord_bld = &bld->coord_bld; 798 struct lp_build_context *float_bld = &bld->float_bld; 799 LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias); 800 LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); 801 LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod); 802 803 LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); 804 LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); 805 LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); 806 807 LLVMValueRef s0, s1, s2; 808 LLVMValueRef t0, t1, t2; 809 LLVMValueRef r0, r1, r2; 810 LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; 811 LLVMValueRef rho, lod; 812 813 /* 814 * dsdx = abs(s[1] - s[0]); 815 * dsdy = abs(s[2] - s[0]); 816 * dtdx = abs(t[1] - t[0]); 817 * dtdy = abs(t[2] - t[0]); 818 * drdx = abs(r[1] - r[0]); 819 * drdy = abs(r[2] - r[0]); 820 * XXX we're assuming a four-element quad in 2x2 layout here. 821 */ 822 s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); 823 s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); 824 s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); 825 dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); 826 dsdx = lp_build_abs(float_bld, dsdx); 827 dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); 828 dsdy = lp_build_abs(float_bld, dsdy); 829 if (dims > 1) { 830 t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); 831 t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); 832 t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); 833 dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); 834 dtdx = lp_build_abs(float_bld, dtdx); 835 dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); 836 dtdy = lp_build_abs(float_bld, dtdy); 837 if (dims > 2) { 838 r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); 839 r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); 840 r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); 841 drdx = LLVMBuildSub(bld->builder, r1, r0, ""); 842 drdx = lp_build_abs(float_bld, drdx); 843 drdy = LLVMBuildSub(bld->builder, r2, r0, ""); 844 drdy = lp_build_abs(float_bld, drdy); 845 } 846 } 847 848 /* Compute rho = max of all partial derivatives scaled by texture size. 849 * XXX this could be vectorized somewhat 850 */ 851 rho = LLVMBuildMul(bld->builder, 852 lp_build_max(float_bld, dsdx, dsdy), 853 lp_build_int_to_float(float_bld, width), ""); 854 if (dims > 1) { 855 LLVMValueRef max; 856 max = LLVMBuildMul(bld->builder, 857 lp_build_max(float_bld, dtdx, dtdy), 858 lp_build_int_to_float(float_bld, height), ""); 859 rho = lp_build_max(float_bld, rho, max); 860 if (dims > 2) { 861 max = LLVMBuildMul(bld->builder, 862 lp_build_max(float_bld, drdx, drdy), 863 lp_build_int_to_float(float_bld, depth), ""); 864 rho = lp_build_max(float_bld, rho, max); 865 } 866 } 867 868 /* compute lod = log2(rho) */ 869 lod = lp_build_log2(float_bld, rho); 870 871 /* add lod bias */ 872 lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias"); 873 874 /* clamp lod */ 875 lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); 876 877 return lod; 878} 879 880 881/** 882 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer 883 * mipmap level index. 884 * Note: this is all scalar code. 885 * \param lod scalar float texture level of detail 886 * \param level_out returns integer 887 */ 888static void 889lp_build_nearest_mip_level(struct lp_build_sample_context *bld, 890 unsigned unit, 891 LLVMValueRef lod, 892 LLVMValueRef *level_out) 893{ 894 struct lp_build_context *float_bld = &bld->float_bld; 895 struct lp_build_context *int_bld = &bld->int_bld; 896 LLVMValueRef last_level, level; 897 898 LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); 899 900 last_level = bld->dynamic_state->last_level(bld->dynamic_state, 901 bld->builder, unit); 902 903 /* convert float lod to integer */ 904 level = lp_build_iround(float_bld, lod); 905 906 /* clamp level to legal range of levels */ 907 *level_out = lp_build_clamp(int_bld, level, zero, last_level); 908} 909 910 911/** 912 * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to 913 * two (adjacent) mipmap level indexes. Later, we'll sample from those 914 * two mipmap levels and interpolate between them. 915 */ 916static void 917lp_build_linear_mip_levels(struct lp_build_sample_context *bld, 918 unsigned unit, 919 LLVMValueRef lod, 920 LLVMValueRef *level0_out, 921 LLVMValueRef *level1_out, 922 LLVMValueRef *weight_out) 923{ 924 struct lp_build_context *coord_bld = &bld->coord_bld; 925 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 926 LLVMValueRef last_level, level; 927 928 last_level = bld->dynamic_state->last_level(bld->dynamic_state, 929 bld->builder, unit); 930 931 /* convert float lod to integer */ 932 level = lp_build_ifloor(coord_bld, lod); 933 934 /* compute level 0 and clamp to legal range of levels */ 935 *level0_out = lp_build_clamp(int_coord_bld, level, 936 int_coord_bld->zero, 937 last_level); 938 /* compute level 1 and clamp to legal range of levels */ 939 *level1_out = lp_build_add(int_coord_bld, *level0_out, int_coord_bld->one); 940 *level1_out = lp_build_min(int_coord_bld, *level1_out, int_coord_bld->zero); 941 942 *weight_out = lp_build_fract(coord_bld, lod); 943} 944 945 946 947/** 948 * Sample 2D texture with nearest filtering, no mipmapping. 949 */ 950static void 951lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld, 952 LLVMValueRef s, 953 LLVMValueRef t, 954 LLVMValueRef width, 955 LLVMValueRef height, 956 LLVMValueRef stride, 957 LLVMValueRef data_array, 958 LLVMValueRef *texel) 959{ 960 LLVMValueRef x, y; 961 LLVMValueRef data_ptr; 962 963 x = lp_build_sample_wrap_nearest(bld, s, width, 964 bld->static_state->pot_width, 965 bld->static_state->wrap_s); 966 y = lp_build_sample_wrap_nearest(bld, t, height, 967 bld->static_state->pot_height, 968 bld->static_state->wrap_t); 969 970 lp_build_name(x, "tex.x.wrapped"); 971 lp_build_name(y, "tex.y.wrapped"); 972 973 /* get pointer to mipmap level 0 data */ 974 data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); 975 976 lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel); 977} 978 979 980/** 981 * Sample 2D texture with nearest filtering, nearest mipmap. 982 */ 983static void 984lp_build_sample_2d_nearest_mip_nearest_soa(struct lp_build_sample_context *bld, 985 unsigned unit, 986 LLVMValueRef s, 987 LLVMValueRef t, 988 LLVMValueRef width, 989 LLVMValueRef height, 990 LLVMValueRef width_vec, 991 LLVMValueRef height_vec, 992 LLVMValueRef stride, 993 LLVMValueRef data_array, 994 LLVMValueRef *texel) 995{ 996 LLVMValueRef x, y; 997 LLVMValueRef lod, ilevel, ilevel_vec; 998 LLVMValueRef data_ptr; 999 1000 /* compute float LOD */ 1001 lod = lp_build_lod_selector(bld, s, t, NULL, width, height, NULL); 1002 1003 /* convert LOD to int */ 1004 lp_build_nearest_mip_level(bld, unit, lod, &ilevel); 1005 1006 ilevel_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel); 1007 1008 /* compute width_vec, height at mipmap level 'ilevel' */ 1009 width_vec = lp_build_minify(bld, width_vec, ilevel_vec); 1010 height_vec = lp_build_minify(bld, height_vec, ilevel_vec); 1011 stride = lp_build_minify(bld, stride, ilevel_vec); 1012 1013 x = lp_build_sample_wrap_nearest(bld, s, width_vec, 1014 bld->static_state->pot_width, 1015 bld->static_state->wrap_s); 1016 y = lp_build_sample_wrap_nearest(bld, t, height_vec, 1017 bld->static_state->pot_height, 1018 bld->static_state->wrap_t); 1019 1020 lp_build_name(x, "tex.x.wrapped"); 1021 lp_build_name(y, "tex.y.wrapped"); 1022 1023 /* get pointer to mipmap level [ilevel] data */ 1024 if (0) 1025 data_ptr = lp_build_get_mipmap_level(bld, data_array, ilevel); 1026 else 1027 data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); 1028 1029 lp_build_sample_texel_soa(bld, width_vec, height_vec, x, y, stride, data_ptr, texel); 1030} 1031 1032 1033/** 1034 * Sample 2D texture with bilinear filtering. 1035 */ 1036static void 1037lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld, 1038 LLVMValueRef s, 1039 LLVMValueRef t, 1040 LLVMValueRef width, 1041 LLVMValueRef height, 1042 LLVMValueRef stride, 1043 LLVMValueRef data_array, 1044 LLVMValueRef *texel) 1045{ 1046 LLVMValueRef s_fpart; 1047 LLVMValueRef t_fpart; 1048 LLVMValueRef x0, x1; 1049 LLVMValueRef y0, y1; 1050 LLVMValueRef neighbors[2][2][4]; 1051 LLVMValueRef data_ptr; 1052 unsigned chan; 1053 1054 lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width, 1055 bld->static_state->wrap_s, &x0, &x1, &s_fpart); 1056 lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height, 1057 bld->static_state->wrap_t, &y0, &y1, &t_fpart); 1058 1059 /* get pointer to mipmap level 0 data */ 1060 data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); 1061 1062 lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]); 1063 lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]); 1064 lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]); 1065 lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]); 1066 1067 /* TODO: Don't interpolate missing channels */ 1068 for(chan = 0; chan < 4; ++chan) { 1069 texel[chan] = lp_build_lerp_2d(&bld->texel_bld, 1070 s_fpart, t_fpart, 1071 neighbors[0][0][chan], 1072 neighbors[0][1][chan], 1073 neighbors[1][0][chan], 1074 neighbors[1][1][chan]); 1075 } 1076} 1077 1078 1079static void 1080lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder, 1081 struct lp_type dst_type, 1082 LLVMValueRef packed, 1083 LLVMValueRef *rgba) 1084{ 1085 LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff); 1086 unsigned chan; 1087 1088 /* Decode the input vector components */ 1089 for (chan = 0; chan < 4; ++chan) { 1090 unsigned start = chan*8; 1091 unsigned stop = start + 8; 1092 LLVMValueRef input; 1093 1094 input = packed; 1095 1096 if(start) 1097 input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), ""); 1098 1099 if(stop < 32) 1100 input = LLVMBuildAnd(builder, input, mask, ""); 1101 1102 input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input); 1103 1104 rgba[chan] = input; 1105 } 1106} 1107 1108 1109static void 1110lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, 1111 LLVMValueRef s, 1112 LLVMValueRef t, 1113 LLVMValueRef width, 1114 LLVMValueRef height, 1115 LLVMValueRef stride, 1116 LLVMValueRef data_array, 1117 LLVMValueRef *texel) 1118{ 1119 LLVMBuilderRef builder = bld->builder; 1120 struct lp_build_context i32, h16, u8n; 1121 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; 1122 LLVMValueRef i32_c8, i32_c128, i32_c255; 1123 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; 1124 LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; 1125 LLVMValueRef x0, x1; 1126 LLVMValueRef y0, y1; 1127 LLVMValueRef neighbors[2][2]; 1128 LLVMValueRef neighbors_lo[2][2]; 1129 LLVMValueRef neighbors_hi[2][2]; 1130 LLVMValueRef packed, packed_lo, packed_hi; 1131 LLVMValueRef unswizzled[4]; 1132 1133 lp_build_context_init(&i32, builder, lp_type_int_vec(32)); 1134 lp_build_context_init(&h16, builder, lp_type_ufixed(16)); 1135 lp_build_context_init(&u8n, builder, lp_type_unorm(8)); 1136 1137 i32_vec_type = lp_build_vec_type(i32.type); 1138 h16_vec_type = lp_build_vec_type(h16.type); 1139 u8n_vec_type = lp_build_vec_type(u8n.type); 1140 1141 if (bld->static_state->normalized_coords) { 1142 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); 1143 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, ""); 1144 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, ""); 1145 s = lp_build_mul(&bld->coord_bld, s, fp_width); 1146 t = lp_build_mul(&bld->coord_bld, t, fp_height); 1147 } 1148 1149 /* scale coords by 256 (8 fractional bits) */ 1150 s = lp_build_mul_imm(&bld->coord_bld, s, 256); 1151 t = lp_build_mul_imm(&bld->coord_bld, t, 256); 1152 1153 /* convert float to int */ 1154 s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); 1155 t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); 1156 1157 /* subtract 0.5 (add -128) */ 1158 i32_c128 = lp_build_int_const_scalar(i32.type, -128); 1159 s = LLVMBuildAdd(builder, s, i32_c128, ""); 1160 t = LLVMBuildAdd(builder, t, i32_c128, ""); 1161 1162 /* compute floor (shift right 8) */ 1163 i32_c8 = lp_build_int_const_scalar(i32.type, 8); 1164 s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); 1165 t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); 1166 1167 /* compute fractional part (AND with 0xff) */ 1168 i32_c255 = lp_build_int_const_scalar(i32.type, 255); 1169 s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); 1170 t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); 1171 1172 x0 = s_ipart; 1173 y0 = t_ipart; 1174 1175 x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); 1176 y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); 1177 1178 x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width, 1179 bld->static_state->wrap_s); 1180 y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height, 1181 bld->static_state->wrap_t); 1182 1183 x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width, 1184 bld->static_state->wrap_s); 1185 y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height, 1186 bld->static_state->wrap_t); 1187 1188 /* 1189 * Transform 4 x i32 in 1190 * 1191 * s_fpart = {s0, s1, s2, s3} 1192 * 1193 * into 8 x i16 1194 * 1195 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} 1196 * 1197 * into two 8 x i16 1198 * 1199 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} 1200 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} 1201 * 1202 * and likewise for t_fpart. There is no risk of loosing precision here 1203 * since the fractional parts only use the lower 8bits. 1204 */ 1205 1206 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); 1207 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); 1208 1209 { 1210 LLVMTypeRef elem_type = LLVMInt32Type(); 1211 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; 1212 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; 1213 LLVMValueRef shuffle_lo; 1214 LLVMValueRef shuffle_hi; 1215 unsigned i, j; 1216 1217 for(j = 0; j < h16.type.length; j += 4) { 1218 unsigned subindex = util_cpu_caps.little_endian ? 0 : 1; 1219 LLVMValueRef index; 1220 1221 index = LLVMConstInt(elem_type, j/2 + subindex, 0); 1222 for(i = 0; i < 4; ++i) 1223 shuffles_lo[j + i] = index; 1224 1225 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); 1226 for(i = 0; i < 4; ++i) 1227 shuffles_hi[j + i] = index; 1228 } 1229 1230 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); 1231 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); 1232 1233 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); 1234 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); 1235 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); 1236 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); 1237 } 1238 1239 /* 1240 * Fetch the pixels as 4 x 32bit (rgba order might differ): 1241 * 1242 * rgba0 rgba1 rgba2 rgba3 1243 * 1244 * bit cast them into 16 x u8 1245 * 1246 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 1247 * 1248 * unpack them into two 8 x i16: 1249 * 1250 * r0 g0 b0 a0 r1 g1 b1 a1 1251 * r2 g2 b2 a2 r3 g3 b3 a3 1252 * 1253 * The higher 8 bits of the resulting elements will be zero. 1254 */ 1255 1256 neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array); 1257 neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array); 1258 neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array); 1259 neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array); 1260 1261 neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); 1262 neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); 1263 neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); 1264 neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); 1265 1266 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); 1267 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); 1268 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); 1269 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); 1270 1271 /* 1272 * Linear interpolate with 8.8 fixed point. 1273 */ 1274 1275 packed_lo = lp_build_lerp_2d(&h16, 1276 s_fpart_lo, t_fpart_lo, 1277 neighbors_lo[0][0], 1278 neighbors_lo[0][1], 1279 neighbors_lo[1][0], 1280 neighbors_lo[1][1]); 1281 1282 packed_hi = lp_build_lerp_2d(&h16, 1283 s_fpart_hi, t_fpart_hi, 1284 neighbors_hi[0][0], 1285 neighbors_hi[0][1], 1286 neighbors_hi[1][0], 1287 neighbors_hi[1][1]); 1288 1289 packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); 1290 1291 /* 1292 * Convert to SoA and swizzle. 1293 */ 1294 1295 packed = LLVMBuildBitCast(builder, packed, i32_vec_type, ""); 1296 1297 lp_build_rgba8_to_f32_soa(bld->builder, 1298 bld->texel_type, 1299 packed, unswizzled); 1300 1301 lp_build_format_swizzle_soa(bld->format_desc, 1302 bld->texel_type, unswizzled, 1303 texel); 1304} 1305 1306 1307static void 1308lp_build_sample_compare(struct lp_build_sample_context *bld, 1309 LLVMValueRef p, 1310 LLVMValueRef *texel) 1311{ 1312 struct lp_build_context *texel_bld = &bld->texel_bld; 1313 LLVMValueRef res; 1314 unsigned chan; 1315 1316 if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) 1317 return; 1318 1319 /* TODO: Compare before swizzling, to avoid redundant computations */ 1320 res = NULL; 1321 for(chan = 0; chan < 4; ++chan) { 1322 LLVMValueRef cmp; 1323 cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]); 1324 cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero); 1325 1326 if(res) 1327 res = lp_build_add(texel_bld, res, cmp); 1328 else 1329 res = cmp; 1330 } 1331 1332 assert(res); 1333 res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25)); 1334 1335 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ 1336 for(chan = 0; chan < 3; ++chan) 1337 texel[chan] = res; 1338 texel[3] = texel_bld->one; 1339} 1340 1341 1342/** 1343 * Build texture sampling code. 1344 * 'texel' will return a vector of four LLVMValueRefs corresponding to 1345 * R, G, B, A. 1346 * \param type vector float type to use for coords, etc. 1347 */ 1348void 1349lp_build_sample_soa(LLVMBuilderRef builder, 1350 const struct lp_sampler_static_state *static_state, 1351 struct lp_sampler_dynamic_state *dynamic_state, 1352 struct lp_type type, 1353 unsigned unit, 1354 unsigned num_coords, 1355 const LLVMValueRef *coords, 1356 LLVMValueRef lodbias, 1357 LLVMValueRef *texel) 1358{ 1359 struct lp_build_sample_context bld; 1360 LLVMValueRef width, width_vec; 1361 LLVMValueRef height, height_vec; 1362 LLVMValueRef stride, stride_vec; 1363 LLVMValueRef data_array; 1364 LLVMValueRef s; 1365 LLVMValueRef t; 1366 LLVMValueRef r; 1367 boolean done = FALSE; 1368 1369 (void) lp_build_lod_selector; /* temporary to silence warning */ 1370 (void) lp_build_nearest_mip_level; 1371 (void) lp_build_linear_mip_levels; 1372 (void) lp_build_minify; 1373 1374 /* Setup our build context */ 1375 memset(&bld, 0, sizeof bld); 1376 bld.builder = builder; 1377 bld.static_state = static_state; 1378 bld.dynamic_state = dynamic_state; 1379 bld.format_desc = util_format_description(static_state->format); 1380 1381 bld.float_type = lp_type_float(32); 1382 bld.int_type = lp_type_int(32); 1383 bld.coord_type = type; 1384 bld.uint_coord_type = lp_uint_type(type); 1385 bld.int_coord_type = lp_int_type(type); 1386 bld.texel_type = type; 1387 1388 lp_build_context_init(&bld.float_bld, builder, bld.float_type); 1389 lp_build_context_init(&bld.int_bld, builder, bld.int_type); 1390 lp_build_context_init(&bld.coord_bld, builder, bld.coord_type); 1391 lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type); 1392 lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type); 1393 lp_build_context_init(&bld.texel_bld, builder, bld.texel_type); 1394 1395 /* Get the dynamic state */ 1396 width = dynamic_state->width(dynamic_state, builder, unit); 1397 height = dynamic_state->height(dynamic_state, builder, unit); 1398 stride = dynamic_state->stride(dynamic_state, builder, unit); 1399 data_array = dynamic_state->data_ptr(dynamic_state, builder, unit); 1400 /* Note that data_array is an array[level] of pointers to texture images */ 1401 1402 s = coords[0]; 1403 t = coords[1]; 1404 r = coords[2]; 1405 1406 width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); 1407 height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); 1408 stride_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride); 1409 1410 if(static_state->target == PIPE_TEXTURE_1D) 1411 t = bld.coord_bld.zero; 1412 1413 switch (static_state->min_mip_filter) { 1414 case PIPE_TEX_MIPFILTER_NONE: 1415 break; 1416 case PIPE_TEX_MIPFILTER_NEAREST: 1417 1418 switch (static_state->min_img_filter) { 1419 case PIPE_TEX_FILTER_NEAREST: 1420 lp_build_sample_2d_nearest_mip_nearest_soa(&bld, unit, 1421 s, t, 1422 width, height, 1423 width_vec, height_vec, 1424 stride_vec, 1425 data_array, texel); 1426 done = TRUE; 1427 break; 1428 } 1429 1430 break; 1431 case PIPE_TEX_MIPFILTER_LINEAR: 1432 break; 1433 default: 1434 assert(0 && "invalid mip filter"); 1435 } 1436 1437 if (!done) { 1438 switch (static_state->min_img_filter) { 1439 case PIPE_TEX_FILTER_NEAREST: 1440 lp_build_sample_2d_nearest_soa(&bld, s, t, width_vec, height_vec, 1441 stride_vec, data_array, texel); 1442 break; 1443 case PIPE_TEX_FILTER_LINEAR: 1444 if(lp_format_is_rgba8(bld.format_desc) && 1445 is_simple_wrap_mode(static_state->wrap_s) && 1446 is_simple_wrap_mode(static_state->wrap_t)) 1447 lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, 1448 stride_vec, data_array, texel); 1449 else 1450 lp_build_sample_2d_linear_soa(&bld, s, t, width_vec, height_vec, 1451 stride_vec, data_array, texel); 1452 break; 1453 default: 1454 assert(0); 1455 } 1456 } 1457 1458 /* FIXME: respect static_state->min_mip_filter */; 1459 /* FIXME: respect static_state->mag_img_filter */; 1460 1461 lp_build_sample_compare(&bld, r, texel); 1462} 1463