lp_bld_sample.c revision e039fd079b8089d2cc0204fab732cd3b5e96cca5
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Texture sampling -- common code. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35#include "pipe/p_defines.h" 36#include "pipe/p_state.h" 37#include "util/u_format.h" 38#include "util/u_math.h" 39#include "lp_bld_arit.h" 40#include "lp_bld_const.h" 41#include "lp_bld_debug.h" 42#include "lp_bld_printf.h" 43#include "lp_bld_flow.h" 44#include "lp_bld_sample.h" 45#include "lp_bld_swizzle.h" 46#include "lp_bld_type.h" 47 48 49/* 50 * Bri-linear factor. Should be greater than one. 51 */ 52#define BRILINEAR_FACTOR 2 53 54static LLVMValueRef 55lp_build_minify(struct lp_build_context *bld, 56 LLVMValueRef base_size, 57 LLVMValueRef level); 58 59/** 60 * Does the given texture wrap mode allow sampling the texture border color? 61 * XXX maybe move this into gallium util code. 62 */ 63boolean 64lp_sampler_wrap_mode_uses_border_color(unsigned mode, 65 unsigned min_img_filter, 66 unsigned mag_img_filter) 67{ 68 switch (mode) { 69 case PIPE_TEX_WRAP_REPEAT: 70 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 71 case PIPE_TEX_WRAP_MIRROR_REPEAT: 72 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 73 return FALSE; 74 case PIPE_TEX_WRAP_CLAMP: 75 case PIPE_TEX_WRAP_MIRROR_CLAMP: 76 if (min_img_filter == PIPE_TEX_FILTER_NEAREST && 77 mag_img_filter == PIPE_TEX_FILTER_NEAREST) { 78 return FALSE; 79 } else { 80 return TRUE; 81 } 82 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 83 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 84 return TRUE; 85 default: 86 assert(0 && "unexpected wrap mode"); 87 return FALSE; 88 } 89} 90 91 92/** 93 * Initialize lp_sampler_static_state object with the gallium sampler 94 * and texture state. 95 * The former is considered to be static and the later dynamic. 96 */ 97void 98lp_sampler_static_state(struct lp_sampler_static_state *state, 99 const struct pipe_sampler_view *view, 100 const struct pipe_sampler_state *sampler) 101{ 102 const struct pipe_resource *texture = view->texture; 103 104 memset(state, 0, sizeof *state); 105 106 if(!texture) 107 return; 108 109 if(!sampler) 110 return; 111 112 /* 113 * We don't copy sampler state over unless it is actually enabled, to avoid 114 * spurious recompiles, as the sampler static state is part of the shader 115 * key. 116 * 117 * Ideally the state tracker or cso_cache module would make all state 118 * canonical, but until that happens it's better to be safe than sorry here. 119 * 120 * XXX: Actually there's much more than can be done here, especially 121 * regarding 1D/2D/3D/CUBE textures, wrap modes, etc. 122 */ 123 124 state->format = view->format; 125 state->swizzle_r = view->swizzle_r; 126 state->swizzle_g = view->swizzle_g; 127 state->swizzle_b = view->swizzle_b; 128 state->swizzle_a = view->swizzle_a; 129 130 state->target = texture->target; 131 state->pot_width = util_is_power_of_two(texture->width0); 132 state->pot_height = util_is_power_of_two(texture->height0); 133 state->pot_depth = util_is_power_of_two(texture->depth0); 134 135 state->wrap_s = sampler->wrap_s; 136 state->wrap_t = sampler->wrap_t; 137 state->wrap_r = sampler->wrap_r; 138 state->min_img_filter = sampler->min_img_filter; 139 state->mag_img_filter = sampler->mag_img_filter; 140 141 if (view->u.tex.last_level && sampler->max_lod > 0.0f) { 142 state->min_mip_filter = sampler->min_mip_filter; 143 } else { 144 state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 145 } 146 147 if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { 148 if (sampler->lod_bias != 0.0f) { 149 state->lod_bias_non_zero = 1; 150 } 151 152 /* If min_lod == max_lod we can greatly simplify mipmap selection. 153 * This is a case that occurs during automatic mipmap generation. 154 */ 155 if (sampler->min_lod == sampler->max_lod) { 156 state->min_max_lod_equal = 1; 157 } else { 158 if (sampler->min_lod > 0.0f) { 159 state->apply_min_lod = 1; 160 } 161 162 if (sampler->max_lod < (float)view->u.tex.last_level) { 163 state->apply_max_lod = 1; 164 } 165 } 166 } 167 168 state->compare_mode = sampler->compare_mode; 169 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) { 170 state->compare_func = sampler->compare_func; 171 } 172 173 state->normalized_coords = sampler->normalized_coords; 174 175 /* 176 * FIXME: Handle the remainder of pipe_sampler_view. 177 */ 178} 179 180 181/** 182 * Generate code to compute coordinate gradient (rho). 183 * \param ddx partial derivatives of (s, t, r, q) with respect to X 184 * \param ddy partial derivatives of (s, t, r, q) with respect to Y 185 * 186 * XXX: The resulting rho is scalar, so we ignore all but the first element of 187 * derivatives that are passed by the shader. 188 */ 189static LLVMValueRef 190lp_build_rho(struct lp_build_sample_context *bld, 191 unsigned unit, 192 const LLVMValueRef ddx[4], 193 const LLVMValueRef ddy[4]) 194{ 195 struct lp_build_context *int_size_bld = &bld->int_size_bld; 196 struct lp_build_context *float_size_bld = &bld->float_size_bld; 197 struct lp_build_context *float_bld = &bld->float_bld; 198 const unsigned dims = bld->dims; 199 LLVMBuilderRef builder = bld->gallivm->builder; 200 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); 201 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); 202 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0); 203 LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0); 204 LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; 205 LLVMValueRef rho_x, rho_y; 206 LLVMValueRef rho_vec; 207 LLVMValueRef int_size, float_size; 208 LLVMValueRef rho; 209 LLVMValueRef first_level, first_level_vec; 210 211 dsdx = ddx[0]; 212 dsdy = ddy[0]; 213 214 if (dims <= 1) { 215 rho_x = dsdx; 216 rho_y = dsdy; 217 } 218 else { 219 rho_x = float_size_bld->undef; 220 rho_y = float_size_bld->undef; 221 222 rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, ""); 223 rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, ""); 224 225 dtdx = ddx[1]; 226 dtdy = ddy[1]; 227 228 rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, ""); 229 rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, ""); 230 231 if (dims >= 3) { 232 drdx = ddx[2]; 233 drdy = ddy[2]; 234 235 rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, ""); 236 rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, ""); 237 } 238 } 239 240 rho_x = lp_build_abs(float_size_bld, rho_x); 241 rho_y = lp_build_abs(float_size_bld, rho_y); 242 243 rho_vec = lp_build_max(float_size_bld, rho_x, rho_y); 244 245 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 246 bld->gallivm, unit); 247 first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level); 248 int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec); 249 float_size = lp_build_int_to_float(float_size_bld, int_size); 250 251 rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size); 252 253 if (dims <= 1) { 254 rho = rho_vec; 255 } 256 else { 257 if (dims >= 2) { 258 LLVMValueRef rho_s, rho_t, rho_r; 259 260 rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, ""); 261 rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, ""); 262 263 rho = lp_build_max(float_bld, rho_s, rho_t); 264 265 if (dims >= 3) { 266 rho_r = LLVMBuildExtractElement(builder, rho_vec, index0, ""); 267 rho = lp_build_max(float_bld, rho, rho_r); 268 } 269 } 270 } 271 272 return rho; 273} 274 275 276/* 277 * Bri-linear lod computation 278 * 279 * Use a piece-wise linear approximation of log2 such that: 280 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc. 281 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc, 282 * with the steepness specified in 'factor' 283 * - exact result for 0.5, 1.5, etc. 284 * 285 * 286 * 1.0 - /----* 287 * / 288 * / 289 * / 290 * 0.5 - * 291 * / 292 * / 293 * / 294 * 0.0 - *----/ 295 * 296 * | | 297 * 2^0 2^1 298 * 299 * This is a technique also commonly used in hardware: 300 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html 301 * 302 * TODO: For correctness, this should only be applied when texture is known to 303 * have regular mipmaps, i.e., mipmaps derived from the base level. 304 * 305 * TODO: This could be done in fixed point, where applicable. 306 */ 307static void 308lp_build_brilinear_lod(struct lp_build_context *bld, 309 LLVMValueRef lod, 310 double factor, 311 LLVMValueRef *out_lod_ipart, 312 LLVMValueRef *out_lod_fpart) 313{ 314 LLVMValueRef lod_fpart; 315 double pre_offset = (factor - 0.5)/factor - 0.5; 316 double post_offset = 1 - factor; 317 318 if (0) { 319 lp_build_printf(bld->gallivm, "lod = %f\n", lod); 320 } 321 322 lod = lp_build_add(bld, lod, 323 lp_build_const_vec(bld->gallivm, bld->type, pre_offset)); 324 325 lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart); 326 327 lod_fpart = lp_build_mul(bld, lod_fpart, 328 lp_build_const_vec(bld->gallivm, bld->type, factor)); 329 330 lod_fpart = lp_build_add(bld, lod_fpart, 331 lp_build_const_vec(bld->gallivm, bld->type, post_offset)); 332 333 /* 334 * It's not necessary to clamp lod_fpart since: 335 * - the above expression will never produce numbers greater than one. 336 * - the mip filtering branch is only taken if lod_fpart is positive 337 */ 338 339 *out_lod_fpart = lod_fpart; 340 341 if (0) { 342 lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart); 343 lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart); 344 } 345} 346 347 348/* 349 * Combined log2 and brilinear lod computation. 350 * 351 * It's in all identical to calling lp_build_fast_log2() and 352 * lp_build_brilinear_lod() above, but by combining we can compute the integer 353 * and fractional part independently. 354 */ 355static void 356lp_build_brilinear_rho(struct lp_build_context *bld, 357 LLVMValueRef rho, 358 double factor, 359 LLVMValueRef *out_lod_ipart, 360 LLVMValueRef *out_lod_fpart) 361{ 362 LLVMValueRef lod_ipart; 363 LLVMValueRef lod_fpart; 364 365 const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor); 366 const double post_offset = 1 - 2*factor; 367 368 assert(bld->type.floating); 369 370 assert(lp_check_value(bld->type, rho)); 371 372 /* 373 * The pre factor will make the intersections with the exact powers of two 374 * happen precisely where we want then to be, which means that the integer 375 * part will not need any post adjustments. 376 */ 377 rho = lp_build_mul(bld, rho, 378 lp_build_const_vec(bld->gallivm, bld->type, pre_factor)); 379 380 /* ipart = ifloor(log2(rho)) */ 381 lod_ipart = lp_build_extract_exponent(bld, rho, 0); 382 383 /* fpart = rho / 2**ipart */ 384 lod_fpart = lp_build_extract_mantissa(bld, rho); 385 386 lod_fpart = lp_build_mul(bld, lod_fpart, 387 lp_build_const_vec(bld->gallivm, bld->type, factor)); 388 389 lod_fpart = lp_build_add(bld, lod_fpart, 390 lp_build_const_vec(bld->gallivm, bld->type, post_offset)); 391 392 /* 393 * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since: 394 * - the above expression will never produce numbers greater than one. 395 * - the mip filtering branch is only taken if lod_fpart is positive 396 */ 397 398 *out_lod_ipart = lod_ipart; 399 *out_lod_fpart = lod_fpart; 400} 401 402 403/** 404 * Generate code to compute texture level of detail (lambda). 405 * \param ddx partial derivatives of (s, t, r, q) with respect to X 406 * \param ddy partial derivatives of (s, t, r, q) with respect to Y 407 * \param lod_bias optional float vector with the shader lod bias 408 * \param explicit_lod optional float vector with the explicit lod 409 * \param width scalar int texture width 410 * \param height scalar int texture height 411 * \param depth scalar int texture depth 412 * 413 * XXX: The resulting lod is scalar, so ignore all but the first element of 414 * derivatives, lod_bias, etc that are passed by the shader. 415 */ 416void 417lp_build_lod_selector(struct lp_build_sample_context *bld, 418 unsigned unit, 419 const LLVMValueRef ddx[4], 420 const LLVMValueRef ddy[4], 421 LLVMValueRef lod_bias, /* optional */ 422 LLVMValueRef explicit_lod, /* optional */ 423 unsigned mip_filter, 424 LLVMValueRef *out_lod_ipart, 425 LLVMValueRef *out_lod_fpart) 426 427{ 428 LLVMBuilderRef builder = bld->gallivm->builder; 429 struct lp_build_context *float_bld = &bld->float_bld; 430 LLVMValueRef lod; 431 432 *out_lod_ipart = bld->int_bld.zero; 433 *out_lod_fpart = bld->float_bld.zero; 434 435 if (bld->static_state->min_max_lod_equal) { 436 /* User is forcing sampling from a particular mipmap level. 437 * This is hit during mipmap generation. 438 */ 439 LLVMValueRef min_lod = 440 bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit); 441 442 lod = min_lod; 443 } 444 else { 445 LLVMValueRef sampler_lod_bias = 446 bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit); 447 LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0); 448 449 if (explicit_lod) { 450 lod = LLVMBuildExtractElement(builder, explicit_lod, 451 index0, ""); 452 } 453 else { 454 LLVMValueRef rho; 455 456 rho = lp_build_rho(bld, unit, ddx, ddy); 457 458 /* 459 * Compute lod = log2(rho) 460 */ 461 462 if (!lod_bias && 463 !bld->static_state->lod_bias_non_zero && 464 !bld->static_state->apply_max_lod && 465 !bld->static_state->apply_min_lod) { 466 /* 467 * Special case when there are no post-log2 adjustments, which 468 * saves instructions but keeping the integer and fractional lod 469 * computations separate from the start. 470 */ 471 472 if (mip_filter == PIPE_TEX_MIPFILTER_NONE || 473 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { 474 *out_lod_ipart = lp_build_ilog2(float_bld, rho); 475 *out_lod_fpart = bld->float_bld.zero; 476 return; 477 } 478 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && 479 !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { 480 lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, 481 out_lod_ipart, out_lod_fpart); 482 return; 483 } 484 } 485 486 if (0) { 487 lod = lp_build_log2(float_bld, rho); 488 } 489 else { 490 lod = lp_build_fast_log2(float_bld, rho); 491 } 492 493 /* add shader lod bias */ 494 if (lod_bias) { 495 lod_bias = LLVMBuildExtractElement(builder, lod_bias, 496 index0, ""); 497 lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias"); 498 } 499 } 500 501 /* add sampler lod bias */ 502 if (bld->static_state->lod_bias_non_zero) 503 lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias"); 504 505 506 /* clamp lod */ 507 if (bld->static_state->apply_max_lod) { 508 LLVMValueRef max_lod = 509 bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit); 510 511 lod = lp_build_min(float_bld, lod, max_lod); 512 } 513 if (bld->static_state->apply_min_lod) { 514 LLVMValueRef min_lod = 515 bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit); 516 517 lod = lp_build_max(float_bld, lod, min_lod); 518 } 519 } 520 521 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 522 if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { 523 lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, 524 out_lod_ipart, out_lod_fpart); 525 } 526 else { 527 lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); 528 } 529 530 lp_build_name(*out_lod_fpart, "lod_fpart"); 531 } 532 else { 533 *out_lod_ipart = lp_build_iround(float_bld, lod); 534 } 535 536 lp_build_name(*out_lod_ipart, "lod_ipart"); 537 538 return; 539} 540 541 542/** 543 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer 544 * mipmap level index. 545 * Note: this is all scalar code. 546 * \param lod scalar float texture level of detail 547 * \param level_out returns integer 548 */ 549void 550lp_build_nearest_mip_level(struct lp_build_sample_context *bld, 551 unsigned unit, 552 LLVMValueRef lod_ipart, 553 LLVMValueRef *level_out) 554{ 555 struct lp_build_context *int_bld = &bld->int_bld; 556 LLVMValueRef first_level, last_level, level; 557 558 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 559 bld->gallivm, unit); 560 last_level = bld->dynamic_state->last_level(bld->dynamic_state, 561 bld->gallivm, unit); 562 563 /* convert float lod to integer */ 564 level = lp_build_add(int_bld, lod_ipart, first_level); 565 566 /* clamp level to legal range of levels */ 567 *level_out = lp_build_clamp(int_bld, level, first_level, last_level); 568} 569 570 571/** 572 * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to 573 * two (adjacent) mipmap level indexes. Later, we'll sample from those 574 * two mipmap levels and interpolate between them. 575 */ 576void 577lp_build_linear_mip_levels(struct lp_build_sample_context *bld, 578 unsigned unit, 579 LLVMValueRef lod_ipart, 580 LLVMValueRef *lod_fpart_inout, 581 LLVMValueRef *level0_out, 582 LLVMValueRef *level1_out) 583{ 584 LLVMBuilderRef builder = bld->gallivm->builder; 585 struct lp_build_context *int_bld = &bld->int_bld; 586 struct lp_build_context *float_bld = &bld->float_bld; 587 LLVMValueRef first_level, last_level; 588 LLVMValueRef clamp_min; 589 LLVMValueRef clamp_max; 590 591 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 592 bld->gallivm, unit); 593 594 *level0_out = lp_build_add(int_bld, lod_ipart, first_level); 595 *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one); 596 597 last_level = bld->dynamic_state->last_level(bld->dynamic_state, 598 bld->gallivm, unit); 599 600 /* 601 * Clamp both *level0_out and *level1_out to [first_level, last_level], with 602 * the minimum number of comparisons, and zeroing lod_fpart in the extreme 603 * ends in the process. 604 */ 605 606 /* *level0_out < first_level */ 607 clamp_min = LLVMBuildICmp(builder, LLVMIntSLT, 608 *level0_out, first_level, 609 "clamp_lod_to_first"); 610 611 *level0_out = LLVMBuildSelect(builder, clamp_min, 612 first_level, *level0_out, ""); 613 614 *level1_out = LLVMBuildSelect(builder, clamp_min, 615 first_level, *level1_out, ""); 616 617 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min, 618 float_bld->zero, *lod_fpart_inout, ""); 619 620 /* *level0_out >= last_level */ 621 clamp_max = LLVMBuildICmp(builder, LLVMIntSGE, 622 *level0_out, last_level, 623 "clamp_lod_to_last"); 624 625 *level0_out = LLVMBuildSelect(builder, clamp_max, 626 last_level, *level0_out, ""); 627 628 *level1_out = LLVMBuildSelect(builder, clamp_max, 629 last_level, *level1_out, ""); 630 631 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max, 632 float_bld->zero, *lod_fpart_inout, ""); 633 634 lp_build_name(*level0_out, "sampler%u_miplevel0", unit); 635 lp_build_name(*level1_out, "sampler%u_miplevel1", unit); 636 lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit); 637} 638 639 640/** 641 * Return pointer to a single mipmap level. 642 * \param data_array array of pointers to mipmap levels 643 * \param level integer mipmap level 644 */ 645LLVMValueRef 646lp_build_get_mipmap_level(struct lp_build_sample_context *bld, 647 LLVMValueRef level) 648{ 649 LLVMBuilderRef builder = bld->gallivm->builder; 650 LLVMValueRef indexes[2], data_ptr; 651 652 indexes[0] = lp_build_const_int32(bld->gallivm, 0); 653 indexes[1] = level; 654 data_ptr = LLVMBuildGEP(builder, bld->data_array, indexes, 2, ""); 655 data_ptr = LLVMBuildLoad(builder, data_ptr, ""); 656 return data_ptr; 657} 658 659 660LLVMValueRef 661lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, 662 int level) 663{ 664 LLVMValueRef lvl = lp_build_const_int32(bld->gallivm, level); 665 return lp_build_get_mipmap_level(bld, lvl); 666} 667 668 669/** 670 * Codegen equivalent for u_minify(). 671 * Return max(1, base_size >> level); 672 */ 673static LLVMValueRef 674lp_build_minify(struct lp_build_context *bld, 675 LLVMValueRef base_size, 676 LLVMValueRef level) 677{ 678 LLVMBuilderRef builder = bld->gallivm->builder; 679 assert(lp_check_value(bld->type, base_size)); 680 assert(lp_check_value(bld->type, level)); 681 682 if (level == bld->zero) { 683 /* if we're using mipmap level zero, no minification is needed */ 684 return base_size; 685 } 686 else { 687 LLVMValueRef size = 688 LLVMBuildLShr(builder, base_size, level, "minify"); 689 assert(bld->type.sign); 690 size = lp_build_max(bld, size, bld->one); 691 return size; 692 } 693} 694 695 696/** 697 * Dereference stride_array[mipmap_level] array to get a stride. 698 * Return stride as a vector. 699 */ 700static LLVMValueRef 701lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, 702 LLVMValueRef stride_array, LLVMValueRef level) 703{ 704 LLVMBuilderRef builder = bld->gallivm->builder; 705 LLVMValueRef indexes[2], stride; 706 indexes[0] = lp_build_const_int32(bld->gallivm, 0); 707 indexes[1] = level; 708 stride = LLVMBuildGEP(builder, stride_array, indexes, 2, ""); 709 stride = LLVMBuildLoad(builder, stride, ""); 710 stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); 711 return stride; 712} 713 714 715/** 716 * When sampling a mipmap, we need to compute the width, height, depth 717 * of the source levels from the level indexes. This helper function 718 * does that. 719 */ 720void 721lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld, 722 LLVMValueRef ilevel, 723 LLVMValueRef *out_size, 724 LLVMValueRef *row_stride_vec, 725 LLVMValueRef *img_stride_vec) 726{ 727 const unsigned dims = bld->dims; 728 LLVMValueRef ilevel_vec; 729 730 ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel); 731 732 /* 733 * Compute width, height, depth at mipmap level 'ilevel' 734 */ 735 *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec); 736 737 if (dims >= 2) { 738 *row_stride_vec = lp_build_get_level_stride_vec(bld, 739 bld->row_stride_array, 740 ilevel); 741 if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { 742 *img_stride_vec = lp_build_get_level_stride_vec(bld, 743 bld->img_stride_array, 744 ilevel); 745 } 746 } 747} 748 749 750/** 751 * Extract and broadcast texture size. 752 * 753 * @param size_type type of the texture size vector (either 754 * bld->int_size_type or bld->float_size_type) 755 * @param coord_type type of the texture size vector (either 756 * bld->int_coord_type or bld->coord_type) 757 * @param int_size vector with the integer texture size (width, height, 758 * depth) 759 */ 760void 761lp_build_extract_image_sizes(struct lp_build_sample_context *bld, 762 struct lp_type size_type, 763 struct lp_type coord_type, 764 LLVMValueRef size, 765 LLVMValueRef *out_width, 766 LLVMValueRef *out_height, 767 LLVMValueRef *out_depth) 768{ 769 const unsigned dims = bld->dims; 770 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); 771 772 *out_width = lp_build_extract_broadcast(bld->gallivm, 773 size_type, 774 coord_type, 775 size, 776 LLVMConstInt(i32t, 0, 0)); 777 if (dims >= 2) { 778 *out_height = lp_build_extract_broadcast(bld->gallivm, 779 size_type, 780 coord_type, 781 size, 782 LLVMConstInt(i32t, 1, 0)); 783 if (dims == 3) { 784 *out_depth = lp_build_extract_broadcast(bld->gallivm, 785 size_type, 786 coord_type, 787 size, 788 LLVMConstInt(i32t, 2, 0)); 789 } 790 } 791} 792 793 794/** 795 * Unnormalize coords. 796 * 797 * @param int_size vector with the integer texture size (width, height, depth) 798 */ 799void 800lp_build_unnormalized_coords(struct lp_build_sample_context *bld, 801 LLVMValueRef flt_size, 802 LLVMValueRef *s, 803 LLVMValueRef *t, 804 LLVMValueRef *r) 805{ 806 const unsigned dims = bld->dims; 807 LLVMValueRef width; 808 LLVMValueRef height; 809 LLVMValueRef depth; 810 811 lp_build_extract_image_sizes(bld, 812 bld->float_size_type, 813 bld->coord_type, 814 flt_size, 815 &width, 816 &height, 817 &depth); 818 819 /* s = s * width, t = t * height */ 820 *s = lp_build_mul(&bld->coord_bld, *s, width); 821 if (dims >= 2) { 822 *t = lp_build_mul(&bld->coord_bld, *t, height); 823 if (dims >= 3) { 824 *r = lp_build_mul(&bld->coord_bld, *r, depth); 825 } 826 } 827} 828 829 830/** Helper used by lp_build_cube_lookup() */ 831static LLVMValueRef 832lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) 833{ 834 /* ima = -0.5 / abs(coord); */ 835 LLVMValueRef negHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, -0.5); 836 LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); 837 LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); 838 return ima; 839} 840 841 842/** 843 * Helper used by lp_build_cube_lookup() 844 * \param sign scalar +1 or -1 845 * \param coord float vector 846 * \param ima float vector 847 */ 848static LLVMValueRef 849lp_build_cube_coord(struct lp_build_context *coord_bld, 850 LLVMValueRef sign, int negate_coord, 851 LLVMValueRef coord, LLVMValueRef ima) 852{ 853 /* return negate(coord) * ima * sign + 0.5; */ 854 LLVMValueRef half = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5); 855 LLVMValueRef res; 856 857 assert(negate_coord == +1 || negate_coord == -1); 858 859 if (negate_coord == -1) { 860 coord = lp_build_negate(coord_bld, coord); 861 } 862 863 res = lp_build_mul(coord_bld, coord, ima); 864 if (sign) { 865 sign = lp_build_broadcast_scalar(coord_bld, sign); 866 res = lp_build_mul(coord_bld, res, sign); 867 } 868 res = lp_build_add(coord_bld, res, half); 869 870 return res; 871} 872 873 874/** Helper used by lp_build_cube_lookup() 875 * Return (major_coord >= 0) ? pos_face : neg_face; 876 */ 877static LLVMValueRef 878lp_build_cube_face(struct lp_build_sample_context *bld, 879 LLVMValueRef major_coord, 880 unsigned pos_face, unsigned neg_face) 881{ 882 struct gallivm_state *gallivm = bld->gallivm; 883 LLVMBuilderRef builder = gallivm->builder; 884 LLVMValueRef cmp = LLVMBuildFCmp(builder, LLVMRealUGE, 885 major_coord, 886 bld->float_bld.zero, ""); 887 LLVMValueRef pos = lp_build_const_int32(gallivm, pos_face); 888 LLVMValueRef neg = lp_build_const_int32(gallivm, neg_face); 889 LLVMValueRef res = LLVMBuildSelect(builder, cmp, pos, neg, ""); 890 return res; 891} 892 893 894 895/** 896 * Generate code to do cube face selection and compute per-face texcoords. 897 */ 898void 899lp_build_cube_lookup(struct lp_build_sample_context *bld, 900 LLVMValueRef s, 901 LLVMValueRef t, 902 LLVMValueRef r, 903 LLVMValueRef *face, 904 LLVMValueRef *face_s, 905 LLVMValueRef *face_t) 906{ 907 struct lp_build_context *float_bld = &bld->float_bld; 908 struct lp_build_context *coord_bld = &bld->coord_bld; 909 LLVMBuilderRef builder = bld->gallivm->builder; 910 LLVMValueRef rx, ry, rz; 911 LLVMValueRef arx, ary, arz; 912 LLVMValueRef c25 = lp_build_const_float(bld->gallivm, 0.25); 913 LLVMValueRef arx_ge_ary, arx_ge_arz; 914 LLVMValueRef ary_ge_arx, ary_ge_arz; 915 LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; 916 917 assert(bld->coord_bld.type.length == 4); 918 919 /* 920 * Use the average of the four pixel's texcoords to choose the face. 921 */ 922 rx = lp_build_mul(float_bld, c25, 923 lp_build_sum_vector(&bld->coord_bld, s)); 924 ry = lp_build_mul(float_bld, c25, 925 lp_build_sum_vector(&bld->coord_bld, t)); 926 rz = lp_build_mul(float_bld, c25, 927 lp_build_sum_vector(&bld->coord_bld, r)); 928 929 arx = lp_build_abs(float_bld, rx); 930 ary = lp_build_abs(float_bld, ry); 931 arz = lp_build_abs(float_bld, rz); 932 933 /* 934 * Compare sign/magnitude of rx,ry,rz to determine face 935 */ 936 arx_ge_ary = LLVMBuildFCmp(builder, LLVMRealUGE, arx, ary, ""); 937 arx_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, arx, arz, ""); 938 ary_ge_arx = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arx, ""); 939 ary_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arz, ""); 940 941 arx_ge_ary_arz = LLVMBuildAnd(builder, arx_ge_ary, arx_ge_arz, ""); 942 ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, ""); 943 944 { 945 struct lp_build_if_state if_ctx; 946 LLVMValueRef face_s_var; 947 LLVMValueRef face_t_var; 948 LLVMValueRef face_var; 949 950 face_s_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_s_var"); 951 face_t_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_t_var"); 952 face_var = lp_build_alloca(bld->gallivm, bld->int_bld.vec_type, "face_var"); 953 954 lp_build_if(&if_ctx, bld->gallivm, arx_ge_ary_arz); 955 { 956 /* +/- X face */ 957 LLVMValueRef sign = lp_build_sgn(float_bld, rx); 958 LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); 959 *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); 960 *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); 961 *face = lp_build_cube_face(bld, rx, 962 PIPE_TEX_FACE_POS_X, 963 PIPE_TEX_FACE_NEG_X); 964 LLVMBuildStore(builder, *face_s, face_s_var); 965 LLVMBuildStore(builder, *face_t, face_t_var); 966 LLVMBuildStore(builder, *face, face_var); 967 } 968 lp_build_else(&if_ctx); 969 { 970 struct lp_build_if_state if_ctx2; 971 972 lp_build_if(&if_ctx2, bld->gallivm, ary_ge_arx_arz); 973 { 974 /* +/- Y face */ 975 LLVMValueRef sign = lp_build_sgn(float_bld, ry); 976 LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); 977 *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); 978 *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima); 979 *face = lp_build_cube_face(bld, ry, 980 PIPE_TEX_FACE_POS_Y, 981 PIPE_TEX_FACE_NEG_Y); 982 LLVMBuildStore(builder, *face_s, face_s_var); 983 LLVMBuildStore(builder, *face_t, face_t_var); 984 LLVMBuildStore(builder, *face, face_var); 985 } 986 lp_build_else(&if_ctx2); 987 { 988 /* +/- Z face */ 989 LLVMValueRef sign = lp_build_sgn(float_bld, rz); 990 LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); 991 *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima); 992 *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); 993 *face = lp_build_cube_face(bld, rz, 994 PIPE_TEX_FACE_POS_Z, 995 PIPE_TEX_FACE_NEG_Z); 996 LLVMBuildStore(builder, *face_s, face_s_var); 997 LLVMBuildStore(builder, *face_t, face_t_var); 998 LLVMBuildStore(builder, *face, face_var); 999 } 1000 lp_build_endif(&if_ctx2); 1001 } 1002 1003 lp_build_endif(&if_ctx); 1004 1005 *face_s = LLVMBuildLoad(builder, face_s_var, "face_s"); 1006 *face_t = LLVMBuildLoad(builder, face_t_var, "face_t"); 1007 *face = LLVMBuildLoad(builder, face_var, "face"); 1008 } 1009} 1010 1011 1012/** 1013 * Compute the partial offset of a pixel block along an arbitrary axis. 1014 * 1015 * @param coord coordinate in pixels 1016 * @param stride number of bytes between rows of successive pixel blocks 1017 * @param block_length number of pixels in a pixels block along the coordinate 1018 * axis 1019 * @param out_offset resulting relative offset of the pixel block in bytes 1020 * @param out_subcoord resulting sub-block pixel coordinate 1021 */ 1022void 1023lp_build_sample_partial_offset(struct lp_build_context *bld, 1024 unsigned block_length, 1025 LLVMValueRef coord, 1026 LLVMValueRef stride, 1027 LLVMValueRef *out_offset, 1028 LLVMValueRef *out_subcoord) 1029{ 1030 LLVMBuilderRef builder = bld->gallivm->builder; 1031 LLVMValueRef offset; 1032 LLVMValueRef subcoord; 1033 1034 if (block_length == 1) { 1035 subcoord = bld->zero; 1036 } 1037 else { 1038 /* 1039 * Pixel blocks have power of two dimensions. LLVM should convert the 1040 * rem/div to bit arithmetic. 1041 * TODO: Verify this. 1042 * It does indeed BUT it does transform it to scalar (and back) when doing so 1043 * (using roughly extract, shift/and, mov, unpack) (llvm 2.7). 1044 * The generated code looks seriously unfunny and is quite expensive. 1045 */ 1046#if 0 1047 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); 1048 subcoord = LLVMBuildURem(builder, coord, block_width, ""); 1049 coord = LLVMBuildUDiv(builder, coord, block_width, ""); 1050#else 1051 unsigned logbase2 = util_logbase2(block_length); 1052 LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2); 1053 LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1); 1054 subcoord = LLVMBuildAnd(builder, coord, block_mask, ""); 1055 coord = LLVMBuildLShr(builder, coord, block_shift, ""); 1056#endif 1057 } 1058 1059 offset = lp_build_mul(bld, coord, stride); 1060 1061 assert(out_offset); 1062 assert(out_subcoord); 1063 1064 *out_offset = offset; 1065 *out_subcoord = subcoord; 1066} 1067 1068 1069/** 1070 * Compute the offset of a pixel block. 1071 * 1072 * x, y, z, y_stride, z_stride are vectors, and they refer to pixels. 1073 * 1074 * Returns the relative offset and i,j sub-block coordinates 1075 */ 1076void 1077lp_build_sample_offset(struct lp_build_context *bld, 1078 const struct util_format_description *format_desc, 1079 LLVMValueRef x, 1080 LLVMValueRef y, 1081 LLVMValueRef z, 1082 LLVMValueRef y_stride, 1083 LLVMValueRef z_stride, 1084 LLVMValueRef *out_offset, 1085 LLVMValueRef *out_i, 1086 LLVMValueRef *out_j) 1087{ 1088 LLVMValueRef x_stride; 1089 LLVMValueRef offset; 1090 1091 x_stride = lp_build_const_vec(bld->gallivm, bld->type, 1092 format_desc->block.bits/8); 1093 1094 lp_build_sample_partial_offset(bld, 1095 format_desc->block.width, 1096 x, x_stride, 1097 &offset, out_i); 1098 1099 if (y && y_stride) { 1100 LLVMValueRef y_offset; 1101 lp_build_sample_partial_offset(bld, 1102 format_desc->block.height, 1103 y, y_stride, 1104 &y_offset, out_j); 1105 offset = lp_build_add(bld, offset, y_offset); 1106 } 1107 else { 1108 *out_j = bld->zero; 1109 } 1110 1111 if (z && z_stride) { 1112 LLVMValueRef z_offset; 1113 LLVMValueRef k; 1114 lp_build_sample_partial_offset(bld, 1115 1, /* pixel blocks are always 2D */ 1116 z, z_stride, 1117 &z_offset, &k); 1118 offset = lp_build_add(bld, offset, z_offset); 1119 } 1120 1121 *out_offset = offset; 1122} 1123