lp_bld_depth.c revision cc40abad519cc0f765c6d8f6fad4154bed8dd9c2
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Depth/stencil testing to LLVM IR translation. 31 * 32 * To be done accurately/efficiently the depth/stencil test must be done with 33 * the same type/format of the depth/stencil buffer, which implies massaging 34 * the incoming depths to fit into place. Using a more straightforward 35 * type/format for depth/stencil values internally and only convert when 36 * flushing would avoid this, but it would most likely result in depth fighting 37 * artifacts. 38 * 39 * We are free to use a different pixel layout though. Since our basic 40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil 41 * values tiled, a quad at time. That is, a depth buffer containing 42 * 43 * Z11 Z12 Z13 Z14 ... 44 * Z21 Z22 Z23 Z24 ... 45 * Z31 Z32 Z33 Z34 ... 46 * Z41 Z42 Z43 Z44 ... 47 * ... ... ... ... ... 48 * 49 * will actually be stored in memory as 50 * 51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ... 52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... 53 * ... ... ... ... ... ... ... ... ... 54 * 55 * 56 * Stencil test: 57 * Two-sided stencil test is supported but probably not as efficient as 58 * it could be. Currently, we use if/then/else constructs to do the 59 * operations for front vs. back-facing polygons. We could probably do 60 * both the front and back arithmetic then use a Select() instruction to 61 * choose the result depending on polyon orientation. We'd have to 62 * measure performance both ways and see which is better. 63 * 64 * @author Jose Fonseca <jfonseca@vmware.com> 65 */ 66 67#include "pipe/p_state.h" 68#include "util/u_format.h" 69 70#include "gallivm/lp_bld_type.h" 71#include "gallivm/lp_bld_arit.h" 72#include "gallivm/lp_bld_bitarit.h" 73#include "gallivm/lp_bld_const.h" 74#include "gallivm/lp_bld_conv.h" 75#include "gallivm/lp_bld_logic.h" 76#include "gallivm/lp_bld_flow.h" 77#include "gallivm/lp_bld_intr.h" 78#include "gallivm/lp_bld_debug.h" 79#include "gallivm/lp_bld_swizzle.h" 80 81#include "lp_bld_depth.h" 82 83 84/** Used to select fields from pipe_stencil_state */ 85enum stencil_op { 86 S_FAIL_OP, 87 Z_FAIL_OP, 88 Z_PASS_OP 89}; 90 91 92 93/** 94 * Do the stencil test comparison (compare FB stencil values against ref value). 95 * This will be used twice when generating two-sided stencil code. 96 * \param stencil the front/back stencil state 97 * \param stencilRef the stencil reference value, replicated as a vector 98 * \param stencilVals vector of stencil values from framebuffer 99 * \return vector mask of pass/fail values (~0 or 0) 100 */ 101static LLVMValueRef 102lp_build_stencil_test_single(struct lp_build_context *bld, 103 const struct pipe_stencil_state *stencil, 104 LLVMValueRef stencilRef, 105 LLVMValueRef stencilVals) 106{ 107 const unsigned stencilMax = 255; /* XXX fix */ 108 struct lp_type type = bld->type; 109 LLVMValueRef res; 110 111 assert(type.sign); 112 113 assert(stencil->enabled); 114 115 if (stencil->valuemask != stencilMax) { 116 /* compute stencilRef = stencilRef & valuemask */ 117 LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask); 118 stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, ""); 119 /* compute stencilVals = stencilVals & valuemask */ 120 stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, ""); 121 } 122 123 res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); 124 125 return res; 126} 127 128 129/** 130 * Do the one or two-sided stencil test comparison. 131 * \sa lp_build_stencil_test_single 132 * \param front_facing an integer vector mask, indicating front (~0) or back 133 * (0) facing polygon. If NULL, assume front-facing. 134 */ 135static LLVMValueRef 136lp_build_stencil_test(struct lp_build_context *bld, 137 const struct pipe_stencil_state stencil[2], 138 LLVMValueRef stencilRefs[2], 139 LLVMValueRef stencilVals, 140 LLVMValueRef front_facing) 141{ 142 LLVMValueRef res; 143 144 assert(stencil[0].enabled); 145 146 /* do front face test */ 147 res = lp_build_stencil_test_single(bld, &stencil[0], 148 stencilRefs[0], stencilVals); 149 150 if (stencil[1].enabled && front_facing) { 151 /* do back face test */ 152 LLVMValueRef back_res; 153 154 back_res = lp_build_stencil_test_single(bld, &stencil[1], 155 stencilRefs[1], stencilVals); 156 157 res = lp_build_select(bld, front_facing, res, back_res); 158 } 159 160 return res; 161} 162 163 164/** 165 * Apply the stencil operator (add/sub/keep/etc) to the given vector 166 * of stencil values. 167 * \return new stencil values vector 168 */ 169static LLVMValueRef 170lp_build_stencil_op_single(struct lp_build_context *bld, 171 const struct pipe_stencil_state *stencil, 172 enum stencil_op op, 173 LLVMValueRef stencilRef, 174 LLVMValueRef stencilVals) 175 176{ 177 struct lp_type type = bld->type; 178 LLVMValueRef res; 179 LLVMValueRef max = lp_build_const_int_vec(type, 0xff); 180 unsigned stencil_op; 181 182 assert(type.sign); 183 184 switch (op) { 185 case S_FAIL_OP: 186 stencil_op = stencil->fail_op; 187 break; 188 case Z_FAIL_OP: 189 stencil_op = stencil->zfail_op; 190 break; 191 case Z_PASS_OP: 192 stencil_op = stencil->zpass_op; 193 break; 194 default: 195 assert(0 && "Invalid stencil_op mode"); 196 stencil_op = PIPE_STENCIL_OP_KEEP; 197 } 198 199 switch (stencil_op) { 200 case PIPE_STENCIL_OP_KEEP: 201 res = stencilVals; 202 /* we can return early for this case */ 203 return res; 204 case PIPE_STENCIL_OP_ZERO: 205 res = bld->zero; 206 break; 207 case PIPE_STENCIL_OP_REPLACE: 208 res = stencilRef; 209 break; 210 case PIPE_STENCIL_OP_INCR: 211 res = lp_build_add(bld, stencilVals, bld->one); 212 res = lp_build_min(bld, res, max); 213 break; 214 case PIPE_STENCIL_OP_DECR: 215 res = lp_build_sub(bld, stencilVals, bld->one); 216 res = lp_build_max(bld, res, bld->zero); 217 break; 218 case PIPE_STENCIL_OP_INCR_WRAP: 219 res = lp_build_add(bld, stencilVals, bld->one); 220 res = LLVMBuildAnd(bld->builder, res, max, ""); 221 break; 222 case PIPE_STENCIL_OP_DECR_WRAP: 223 res = lp_build_sub(bld, stencilVals, bld->one); 224 res = LLVMBuildAnd(bld->builder, res, max, ""); 225 break; 226 case PIPE_STENCIL_OP_INVERT: 227 res = LLVMBuildNot(bld->builder, stencilVals, ""); 228 res = LLVMBuildAnd(bld->builder, res, max, ""); 229 break; 230 default: 231 assert(0 && "bad stencil op mode"); 232 res = bld->undef; 233 } 234 235 return res; 236} 237 238 239/** 240 * Do the one or two-sided stencil test op/update. 241 */ 242static LLVMValueRef 243lp_build_stencil_op(struct lp_build_context *bld, 244 const struct pipe_stencil_state stencil[2], 245 enum stencil_op op, 246 LLVMValueRef stencilRefs[2], 247 LLVMValueRef stencilVals, 248 LLVMValueRef mask, 249 LLVMValueRef front_facing) 250 251{ 252 LLVMValueRef res; 253 254 assert(stencil[0].enabled); 255 256 /* do front face op */ 257 res = lp_build_stencil_op_single(bld, &stencil[0], op, 258 stencilRefs[0], stencilVals); 259 260 if (stencil[1].enabled && front_facing) { 261 /* do back face op */ 262 LLVMValueRef back_res; 263 264 back_res = lp_build_stencil_op_single(bld, &stencil[1], op, 265 stencilRefs[1], stencilVals); 266 267 res = lp_build_select(bld, front_facing, res, back_res); 268 } 269 270 if (stencil->writemask != 0xff) { 271 /* mask &= stencil->writemask */ 272 LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask); 273 mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); 274 /* res = (res & mask) | (stencilVals & ~mask) */ 275 res = lp_build_select_bitwise(bld, writemask, res, stencilVals); 276 } 277 else { 278 /* res = mask ? res : stencilVals */ 279 res = lp_build_select(bld, mask, res, stencilVals); 280 } 281 282 return res; 283} 284 285 286 287/** 288 * Return a type appropriate for depth/stencil testing. 289 */ 290struct lp_type 291lp_depth_type(const struct util_format_description *format_desc, 292 unsigned length) 293{ 294 struct lp_type type; 295 unsigned swizzle; 296 297 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 298 assert(format_desc->block.width == 1); 299 assert(format_desc->block.height == 1); 300 301 swizzle = format_desc->swizzle[0]; 302 assert(swizzle < 4); 303 304 memset(&type, 0, sizeof type); 305 type.width = format_desc->block.bits; 306 307 if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { 308 type.floating = TRUE; 309 assert(swizzle == 0); 310 assert(format_desc->channel[swizzle].size == format_desc->block.bits); 311 } 312 else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { 313 assert(format_desc->block.bits <= 32); 314 if(format_desc->channel[swizzle].normalized) 315 type.norm = TRUE; 316 } 317 else 318 assert(0); 319 320 assert(type.width <= length); 321 type.length = length / type.width; 322 323 return type; 324} 325 326 327/** 328 * Compute bitmask and bit shift to apply to the incoming fragment Z values 329 * and the Z buffer values needed before doing the Z comparison. 330 * 331 * Note that we leave the Z bits in the position that we find them 332 * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us 333 * get by with fewer bit twiddling steps. 334 */ 335static boolean 336get_z_shift_and_mask(const struct util_format_description *format_desc, 337 unsigned *shift, unsigned *mask) 338{ 339 const unsigned total_bits = format_desc->block.bits; 340 unsigned z_swizzle; 341 unsigned chan; 342 unsigned padding_left, padding_right; 343 344 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 345 assert(format_desc->block.width == 1); 346 assert(format_desc->block.height == 1); 347 348 z_swizzle = format_desc->swizzle[0]; 349 350 if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) 351 return FALSE; 352 353 padding_right = 0; 354 for (chan = 0; chan < z_swizzle; ++chan) 355 padding_right += format_desc->channel[chan].size; 356 357 padding_left = 358 total_bits - (padding_right + format_desc->channel[z_swizzle].size); 359 360 if (padding_left || padding_right) { 361 unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; 362 unsigned long long mask_right = (1ULL << (padding_right)) - 1; 363 *mask = mask_left ^ mask_right; 364 } 365 else { 366 *mask = 0xffffffff; 367 } 368 369 *shift = padding_left; 370 371 return TRUE; 372} 373 374 375/** 376 * Compute bitmask and bit shift to apply to the framebuffer pixel values 377 * to put the stencil bits in the least significant position. 378 * (i.e. 0x000000ff) 379 */ 380static boolean 381get_s_shift_and_mask(const struct util_format_description *format_desc, 382 unsigned *shift, unsigned *mask) 383{ 384 unsigned s_swizzle; 385 unsigned chan, sz; 386 387 s_swizzle = format_desc->swizzle[1]; 388 389 if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) 390 return FALSE; 391 392 *shift = 0; 393 for (chan = 0; chan < s_swizzle; chan++) 394 *shift += format_desc->channel[chan].size; 395 396 sz = format_desc->channel[s_swizzle].size; 397 *mask = (1U << sz) - 1U; 398 399 return TRUE; 400} 401 402 403/** 404 * Perform the occlusion test and increase the counter. 405 * Test the depth mask. Add the number of channel which has none zero mask 406 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. 407 * The counter will add 4. 408 * 409 * \param type holds element type of the mask vector. 410 * \param maskvalue is the depth test mask. 411 * \param counter is a pointer of the uint32 counter. 412 */ 413void 414lp_build_occlusion_count(LLVMBuilderRef builder, 415 struct lp_type type, 416 LLVMValueRef maskvalue, 417 LLVMValueRef counter) 418{ 419 LLVMValueRef countmask = lp_build_const_int_vec(type, 1); 420 LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); 421 LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16); 422 LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti"); 423 LLVMValueRef maskarray[4] = { 424 LLVMConstInt(LLVMInt32Type(), 0, 0), 425 LLVMConstInt(LLVMInt32Type(), 4, 0), 426 LLVMConstInt(LLVMInt32Type(), 8, 0), 427 LLVMConstInt(LLVMInt32Type(), 12, 0), 428 }; 429 LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4); 430 LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev"); 431 LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle"); 432 LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle); 433 LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig"); 434 LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr"); 435 LLVMBuildStore(builder, incr, counter); 436} 437 438 439 440/** 441 * Generate code for performing depth and/or stencil tests. 442 * We operate on a vector of values (typically a 2x2 quad). 443 * 444 * \param depth the depth test state 445 * \param stencil the front/back stencil state 446 * \param type the data type of the fragment depth/stencil values 447 * \param format_desc description of the depth/stencil surface 448 * \param mask the alive/dead pixel mask for the quad (vector) 449 * \param stencil_refs the front/back stencil ref values (scalar) 450 * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) 451 * \param zs_dst_ptr pointer to depth/stencil values in framebuffer 452 * \param facing contains float value indicating front/back facing polygon 453 */ 454void 455lp_build_depth_stencil_test(LLVMBuilderRef builder, 456 const struct pipe_depth_state *depth, 457 const struct pipe_stencil_state stencil[2], 458 struct lp_type z_src_type, 459 const struct util_format_description *format_desc, 460 struct lp_build_mask_context *mask, 461 LLVMValueRef stencil_refs[2], 462 LLVMValueRef z_src, 463 LLVMValueRef zs_dst_ptr, 464 LLVMValueRef face, 465 LLVMValueRef *zs_value, 466 boolean do_branch) 467{ 468 struct lp_type type; 469 struct lp_build_context bld; 470 struct lp_build_context sbld; 471 struct lp_type s_type; 472 LLVMValueRef zs_dst, z_dst = NULL; 473 LLVMValueRef stencil_vals = NULL; 474 LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; 475 LLVMValueRef z_pass = NULL, s_pass_mask = NULL; 476 LLVMValueRef orig_mask = lp_build_mask_value(mask); 477 LLVMValueRef front_facing = NULL; 478 479 /* Prototype a simpler path: 480 */ 481 if (z_src_type.floating && 482 format_desc->format == PIPE_FORMAT_X8Z24_UNORM && 483 depth->enabled) 484 { 485 LLVMValueRef zscaled; 486 LLVMValueRef const_ffffff_float; 487 LLVMValueRef const_8_int; 488 LLVMTypeRef int32_vec_type; 489 490 /* We know the values in z_dst are all >= 0, so allow 491 * lp_build_compare to use signed compare intrinsics: 492 */ 493 type.floating = 0; 494 type.fixed = 0; 495 type.sign = 1; 496 type.norm = 1; 497 type.width = 32; 498 type.length = z_src_type.length; 499 500 int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length); 501 502 const_8_int = lp_build_const_int_vec(type, 8); 503 const_ffffff_float = lp_build_const_vec(z_src_type, (float)0xffffff); 504 505 zscaled = LLVMBuildFMul(builder, z_src, const_ffffff_float, "zscaled"); 506 z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src"); 507 508 /* Load current z/stencil value from z/stencil buffer */ 509 z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); 510 z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst"); 511 512 /* compare src Z to dst Z, returning 'pass' mask */ 513 z_pass = lp_build_compare(builder, 514 type, 515 depth->func, z_src, z_dst); 516 517 lp_build_mask_update(mask, z_pass); 518 519 if (do_branch) 520 lp_build_mask_check(mask); 521 522 /* No need to worry about old stencil contents, just blend the 523 * old and new values and shift into the correct position for 524 * storage. 525 */ 526 if (depth->writemask) { 527 type.sign = 1; 528 lp_build_context_init(&bld, builder, type); 529 530 z_dst = lp_build_select(&bld, lp_build_mask_value(mask), z_src, z_dst); 531 z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); 532 *zs_value = z_dst; 533 } 534 535 return; 536 } 537 538 /* 539 * Depths are expected to be between 0 and 1, even if they are stored in 540 * floats. Setting these bits here will ensure that the lp_build_conv() call 541 * below won't try to unnecessarily clamp the incoming values. 542 */ 543 if(z_src_type.floating) { 544 z_src_type.sign = FALSE; 545 z_src_type.norm = TRUE; 546 } 547 else { 548 assert(!z_src_type.sign); 549 assert(z_src_type.norm); 550 } 551 552 /* Pick the depth type. */ 553 type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); 554 555 /* FIXME: Cope with a depth test type with a different bit width. */ 556 assert(type.width == z_src_type.width); 557 assert(type.length == z_src_type.length); 558 559 /* Convert fragment Z from float to integer */ 560 lp_build_conv(builder, z_src_type, type, &z_src, 1, &z_src, 1); 561 562 zs_dst_ptr = LLVMBuildBitCast(builder, 563 zs_dst_ptr, 564 LLVMPointerType(lp_build_vec_type(type), 0), ""); 565 566 567 568 /* Sanity checking */ 569 { 570 const unsigned z_swizzle = format_desc->swizzle[0]; 571 const unsigned s_swizzle = format_desc->swizzle[1]; 572 573 assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || 574 s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); 575 576 assert(depth->enabled || stencil[0].enabled); 577 578 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 579 assert(format_desc->block.width == 1); 580 assert(format_desc->block.height == 1); 581 582 if (stencil[0].enabled) { 583 assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || 584 format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); 585 } 586 587 assert(z_swizzle < 4); 588 assert(format_desc->block.bits == type.width); 589 if (type.floating) { 590 assert(z_swizzle == 0); 591 assert(format_desc->channel[z_swizzle].type == 592 UTIL_FORMAT_TYPE_FLOAT); 593 assert(format_desc->channel[z_swizzle].size == 594 format_desc->block.bits); 595 } 596 else { 597 assert(format_desc->channel[z_swizzle].type == 598 UTIL_FORMAT_TYPE_UNSIGNED); 599 assert(format_desc->channel[z_swizzle].normalized); 600 assert(!type.fixed); 601 assert(!type.sign); 602 assert(type.norm); 603 } 604 } 605 606 607 /* Setup build context for Z vals */ 608 lp_build_context_init(&bld, builder, type); 609 610 /* Setup build context for stencil vals */ 611 s_type = lp_type_int_vec(type.width); 612 lp_build_context_init(&sbld, builder, s_type); 613 614 /* Load current z/stencil value from z/stencil buffer */ 615 zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); 616 617 lp_build_name(zs_dst, "zsbufval"); 618 619 620 /* Compute and apply the Z/stencil bitmasks and shifts. 621 */ 622 { 623 unsigned z_shift, z_mask; 624 unsigned s_shift, s_mask; 625 626 if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { 627 if (z_shift) { 628 LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); 629 z_src = LLVMBuildLShr(builder, z_src, shift, ""); 630 } 631 632 if (z_mask != 0xffffffff) { 633 LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); 634 z_src = LLVMBuildAnd(builder, z_src, mask, ""); 635 z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); 636 z_bitmask = mask; /* used below */ 637 } 638 else { 639 z_dst = zs_dst; 640 } 641 642 lp_build_name(z_dst, "zsbuf.z"); 643 } 644 645 if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { 646 if (s_shift) { 647 LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); 648 stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); 649 stencil_shift = shift; /* used below */ 650 } 651 else { 652 stencil_vals = zs_dst; 653 } 654 655 if (s_mask != 0xffffffff) { 656 LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); 657 stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); 658 } 659 660 lp_build_name(stencil_vals, "stencil"); 661 } 662 } 663 664 if (stencil[0].enabled) { 665 666 if (face) { 667 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); 668 669 /* front_facing = face > 0.0 ? ~0 : 0 */ 670 front_facing = LLVMBuildFCmp(builder, LLVMRealUGT, face, zero, ""); 671 front_facing = LLVMBuildSExt(builder, front_facing, 672 LLVMIntType(bld.type.length*bld.type.width), 673 ""); 674 front_facing = LLVMBuildBitCast(builder, front_facing, 675 bld.int_vec_type, ""); 676 } 677 678 /* convert scalar stencil refs into vectors */ 679 stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); 680 stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); 681 682 s_pass_mask = lp_build_stencil_test(&sbld, stencil, 683 stencil_refs, stencil_vals, 684 front_facing); 685 686 /* apply stencil-fail operator */ 687 { 688 LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask); 689 stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, 690 stencil_refs, stencil_vals, 691 s_fail_mask, front_facing); 692 } 693 } 694 695 if (depth->enabled) { 696 /* compare src Z to dst Z, returning 'pass' mask */ 697 z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); 698 699 if (!stencil[0].enabled) { 700 /* We can potentially skip all remaining operations here, but only 701 * if stencil is disabled because we still need to update the stencil 702 * buffer values. Don't need to update Z buffer values. 703 */ 704 lp_build_mask_update(mask, z_pass); 705 706 if (do_branch) { 707 lp_build_mask_check(mask); 708 do_branch = FALSE; 709 } 710 } 711 712 if (depth->writemask) { 713 LLVMValueRef zselectmask = lp_build_mask_value(mask); 714 715 /* mask off bits that failed Z test */ 716 zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); 717 718 /* mask off bits that failed stencil test */ 719 if (s_pass_mask) { 720 zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); 721 } 722 723 /* if combined Z/stencil format, mask off the stencil bits */ 724 if (z_bitmask) { 725 zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); 726 } 727 728 /* Mix the old and new Z buffer values. 729 * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) 730 */ 731 z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); 732 } 733 734 if (stencil[0].enabled) { 735 /* update stencil buffer values according to z pass/fail result */ 736 LLVMValueRef z_fail_mask, z_pass_mask; 737 738 /* apply Z-fail operator */ 739 z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass); 740 stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, 741 stencil_refs, stencil_vals, 742 z_fail_mask, front_facing); 743 744 /* apply Z-pass operator */ 745 z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); 746 stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, 747 stencil_refs, stencil_vals, 748 z_pass_mask, front_facing); 749 } 750 } 751 else { 752 /* No depth test: apply Z-pass operator to stencil buffer values which 753 * passed the stencil test. 754 */ 755 s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); 756 stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, 757 stencil_refs, stencil_vals, 758 s_pass_mask, front_facing); 759 } 760 761 /* The Z bits are already in the right place but we may need to shift the 762 * stencil bits before ORing Z with Stencil to make the final pixel value. 763 */ 764 if (stencil_vals && stencil_shift) 765 stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, 766 stencil_shift, ""); 767 768 /* Finally, merge/store the z/stencil values */ 769 if ((depth->enabled && depth->writemask) || 770 (stencil[0].enabled && stencil[0].writemask)) { 771 772 if (z_dst && stencil_vals) 773 zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); 774 else if (z_dst) 775 zs_dst = z_dst; 776 else 777 zs_dst = stencil_vals; 778 779 *zs_value = zs_dst; 780 } 781 782 if (s_pass_mask) 783 lp_build_mask_update(mask, s_pass_mask); 784 785 if (depth->enabled && stencil[0].enabled) 786 lp_build_mask_update(mask, z_pass); 787 788 if (do_branch) 789 lp_build_mask_check(mask); 790 791} 792 793 794 795void 796lp_build_deferred_depth_write(LLVMBuilderRef builder, 797 struct lp_type z_src_type, 798 const struct util_format_description *format_desc, 799 struct lp_build_mask_context *mask, 800 LLVMValueRef zs_dst_ptr, 801 LLVMValueRef zs_value) 802{ 803 struct lp_type type; 804 struct lp_build_context bld; 805 LLVMValueRef z_dst; 806 807 /* XXX: pointlessly redo type logic: 808 */ 809 type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); 810 lp_build_context_init(&bld, builder, type); 811 812 z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); 813 z_dst = lp_build_select(&bld, lp_build_mask_value(mask), zs_value, z_dst); 814 815 LLVMBuildStore(builder, z_dst, zs_dst_ptr); 816} 817