lp_bld_depth.c revision 95c18abb03b035c6fa029cd0852f07fb39951279
1/************************************************************************** 2 * 3 * Copyright 2009-2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Depth/stencil testing to LLVM IR translation. 31 * 32 * To be done accurately/efficiently the depth/stencil test must be done with 33 * the same type/format of the depth/stencil buffer, which implies massaging 34 * the incoming depths to fit into place. Using a more straightforward 35 * type/format for depth/stencil values internally and only convert when 36 * flushing would avoid this, but it would most likely result in depth fighting 37 * artifacts. 38 * 39 * We are free to use a different pixel layout though. Since our basic 40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil 41 * values tiled, a quad at time. That is, a depth buffer containing 42 * 43 * Z11 Z12 Z13 Z14 ... 44 * Z21 Z22 Z23 Z24 ... 45 * Z31 Z32 Z33 Z34 ... 46 * Z41 Z42 Z43 Z44 ... 47 * ... ... ... ... ... 48 * 49 * will actually be stored in memory as 50 * 51 * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ... 52 * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... 53 * ... ... ... ... ... ... ... ... ... 54 * 55 * 56 * @author Jose Fonseca <jfonseca@vmware.com> 57 * @author Brian Paul <jfonseca@vmware.com> 58 */ 59 60#include "pipe/p_state.h" 61#include "util/u_format.h" 62 63#include "gallivm/lp_bld_type.h" 64#include "gallivm/lp_bld_arit.h" 65#include "gallivm/lp_bld_bitarit.h" 66#include "gallivm/lp_bld_const.h" 67#include "gallivm/lp_bld_conv.h" 68#include "gallivm/lp_bld_logic.h" 69#include "gallivm/lp_bld_flow.h" 70#include "gallivm/lp_bld_intr.h" 71#include "gallivm/lp_bld_debug.h" 72#include "gallivm/lp_bld_swizzle.h" 73 74#include "lp_bld_depth.h" 75 76 77/** Used to select fields from pipe_stencil_state */ 78enum stencil_op { 79 S_FAIL_OP, 80 Z_FAIL_OP, 81 Z_PASS_OP 82}; 83 84 85 86/** 87 * Do the stencil test comparison (compare FB stencil values against ref value). 88 * This will be used twice when generating two-sided stencil code. 89 * \param stencil the front/back stencil state 90 * \param stencilRef the stencil reference value, replicated as a vector 91 * \param stencilVals vector of stencil values from framebuffer 92 * \return vector mask of pass/fail values (~0 or 0) 93 */ 94static LLVMValueRef 95lp_build_stencil_test_single(struct lp_build_context *bld, 96 const struct pipe_stencil_state *stencil, 97 LLVMValueRef stencilRef, 98 LLVMValueRef stencilVals) 99{ 100 const unsigned stencilMax = 255; /* XXX fix */ 101 struct lp_type type = bld->type; 102 LLVMValueRef res; 103 104 assert(type.sign); 105 106 assert(stencil->enabled); 107 108 if (stencil->valuemask != stencilMax) { 109 /* compute stencilRef = stencilRef & valuemask */ 110 LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask); 111 stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, ""); 112 /* compute stencilVals = stencilVals & valuemask */ 113 stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, ""); 114 } 115 116 res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); 117 118 return res; 119} 120 121 122/** 123 * Do the one or two-sided stencil test comparison. 124 * \sa lp_build_stencil_test_single 125 * \param front_facing an integer vector mask, indicating front (~0) or back 126 * (0) facing polygon. If NULL, assume front-facing. 127 */ 128static LLVMValueRef 129lp_build_stencil_test(struct lp_build_context *bld, 130 const struct pipe_stencil_state stencil[2], 131 LLVMValueRef stencilRefs[2], 132 LLVMValueRef stencilVals, 133 LLVMValueRef front_facing) 134{ 135 LLVMValueRef res; 136 137 assert(stencil[0].enabled); 138 139 /* do front face test */ 140 res = lp_build_stencil_test_single(bld, &stencil[0], 141 stencilRefs[0], stencilVals); 142 143 if (stencil[1].enabled && front_facing) { 144 /* do back face test */ 145 LLVMValueRef back_res; 146 147 back_res = lp_build_stencil_test_single(bld, &stencil[1], 148 stencilRefs[1], stencilVals); 149 150 res = lp_build_select(bld, front_facing, res, back_res); 151 } 152 153 return res; 154} 155 156 157/** 158 * Apply the stencil operator (add/sub/keep/etc) to the given vector 159 * of stencil values. 160 * \return new stencil values vector 161 */ 162static LLVMValueRef 163lp_build_stencil_op_single(struct lp_build_context *bld, 164 const struct pipe_stencil_state *stencil, 165 enum stencil_op op, 166 LLVMValueRef stencilRef, 167 LLVMValueRef stencilVals) 168 169{ 170 struct lp_type type = bld->type; 171 LLVMValueRef res; 172 LLVMValueRef max = lp_build_const_int_vec(type, 0xff); 173 unsigned stencil_op; 174 175 assert(type.sign); 176 177 switch (op) { 178 case S_FAIL_OP: 179 stencil_op = stencil->fail_op; 180 break; 181 case Z_FAIL_OP: 182 stencil_op = stencil->zfail_op; 183 break; 184 case Z_PASS_OP: 185 stencil_op = stencil->zpass_op; 186 break; 187 default: 188 assert(0 && "Invalid stencil_op mode"); 189 stencil_op = PIPE_STENCIL_OP_KEEP; 190 } 191 192 switch (stencil_op) { 193 case PIPE_STENCIL_OP_KEEP: 194 res = stencilVals; 195 /* we can return early for this case */ 196 return res; 197 case PIPE_STENCIL_OP_ZERO: 198 res = bld->zero; 199 break; 200 case PIPE_STENCIL_OP_REPLACE: 201 res = stencilRef; 202 break; 203 case PIPE_STENCIL_OP_INCR: 204 res = lp_build_add(bld, stencilVals, bld->one); 205 res = lp_build_min(bld, res, max); 206 break; 207 case PIPE_STENCIL_OP_DECR: 208 res = lp_build_sub(bld, stencilVals, bld->one); 209 res = lp_build_max(bld, res, bld->zero); 210 break; 211 case PIPE_STENCIL_OP_INCR_WRAP: 212 res = lp_build_add(bld, stencilVals, bld->one); 213 res = LLVMBuildAnd(bld->builder, res, max, ""); 214 break; 215 case PIPE_STENCIL_OP_DECR_WRAP: 216 res = lp_build_sub(bld, stencilVals, bld->one); 217 res = LLVMBuildAnd(bld->builder, res, max, ""); 218 break; 219 case PIPE_STENCIL_OP_INVERT: 220 res = LLVMBuildNot(bld->builder, stencilVals, ""); 221 res = LLVMBuildAnd(bld->builder, res, max, ""); 222 break; 223 default: 224 assert(0 && "bad stencil op mode"); 225 res = bld->undef; 226 } 227 228 return res; 229} 230 231 232/** 233 * Do the one or two-sided stencil test op/update. 234 */ 235static LLVMValueRef 236lp_build_stencil_op(struct lp_build_context *bld, 237 const struct pipe_stencil_state stencil[2], 238 enum stencil_op op, 239 LLVMValueRef stencilRefs[2], 240 LLVMValueRef stencilVals, 241 LLVMValueRef mask, 242 LLVMValueRef front_facing) 243 244{ 245 LLVMValueRef res; 246 247 assert(stencil[0].enabled); 248 249 /* do front face op */ 250 res = lp_build_stencil_op_single(bld, &stencil[0], op, 251 stencilRefs[0], stencilVals); 252 253 if (stencil[1].enabled && front_facing) { 254 /* do back face op */ 255 LLVMValueRef back_res; 256 257 back_res = lp_build_stencil_op_single(bld, &stencil[1], op, 258 stencilRefs[1], stencilVals); 259 260 res = lp_build_select(bld, front_facing, res, back_res); 261 } 262 263 if (stencil->writemask != 0xff) { 264 /* mask &= stencil->writemask */ 265 LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask); 266 mask = LLVMBuildAnd(bld->builder, mask, writemask, ""); 267 /* res = (res & mask) | (stencilVals & ~mask) */ 268 res = lp_build_select_bitwise(bld, writemask, res, stencilVals); 269 } 270 else { 271 /* res = mask ? res : stencilVals */ 272 res = lp_build_select(bld, mask, res, stencilVals); 273 } 274 275 return res; 276} 277 278 279 280/** 281 * Return a type appropriate for depth/stencil testing. 282 */ 283struct lp_type 284lp_depth_type(const struct util_format_description *format_desc, 285 unsigned length) 286{ 287 struct lp_type type; 288 unsigned swizzle; 289 290 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 291 assert(format_desc->block.width == 1); 292 assert(format_desc->block.height == 1); 293 294 swizzle = format_desc->swizzle[0]; 295 assert(swizzle < 4); 296 297 memset(&type, 0, sizeof type); 298 type.width = format_desc->block.bits; 299 300 if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { 301 type.floating = TRUE; 302 assert(swizzle == 0); 303 assert(format_desc->channel[swizzle].size == format_desc->block.bits); 304 } 305 else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { 306 assert(format_desc->block.bits <= 32); 307 if(format_desc->channel[swizzle].normalized) 308 type.norm = TRUE; 309 } 310 else 311 assert(0); 312 313 assert(type.width <= length); 314 type.length = length / type.width; 315 316 return type; 317} 318 319 320/** 321 * Compute bitmask and bit shift to apply to the incoming fragment Z values 322 * and the Z buffer values needed before doing the Z comparison. 323 * 324 * Note that we leave the Z bits in the position that we find them 325 * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us 326 * get by with fewer bit twiddling steps. 327 */ 328static boolean 329get_z_shift_and_mask(const struct util_format_description *format_desc, 330 unsigned *shift, unsigned *mask) 331{ 332 const unsigned total_bits = format_desc->block.bits; 333 unsigned z_swizzle; 334 unsigned chan; 335 unsigned padding_left, padding_right; 336 337 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 338 assert(format_desc->block.width == 1); 339 assert(format_desc->block.height == 1); 340 341 z_swizzle = format_desc->swizzle[0]; 342 343 if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) 344 return FALSE; 345 346 padding_right = 0; 347 for (chan = 0; chan < z_swizzle; ++chan) 348 padding_right += format_desc->channel[chan].size; 349 350 padding_left = 351 total_bits - (padding_right + format_desc->channel[z_swizzle].size); 352 353 if (padding_left || padding_right) { 354 unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; 355 unsigned long long mask_right = (1ULL << (padding_right)) - 1; 356 *mask = mask_left ^ mask_right; 357 } 358 else { 359 *mask = 0xffffffff; 360 } 361 362 *shift = padding_left; 363 364 return TRUE; 365} 366 367 368/** 369 * Compute bitmask and bit shift to apply to the framebuffer pixel values 370 * to put the stencil bits in the least significant position. 371 * (i.e. 0x000000ff) 372 */ 373static boolean 374get_s_shift_and_mask(const struct util_format_description *format_desc, 375 unsigned *shift, unsigned *mask) 376{ 377 unsigned s_swizzle; 378 unsigned chan, sz; 379 380 s_swizzle = format_desc->swizzle[1]; 381 382 if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) 383 return FALSE; 384 385 *shift = 0; 386 for (chan = 0; chan < s_swizzle; chan++) 387 *shift += format_desc->channel[chan].size; 388 389 sz = format_desc->channel[s_swizzle].size; 390 *mask = (1U << sz) - 1U; 391 392 return TRUE; 393} 394 395 396/** 397 * Perform the occlusion test and increase the counter. 398 * Test the depth mask. Add the number of channel which has none zero mask 399 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. 400 * The counter will add 4. 401 * 402 * \param type holds element type of the mask vector. 403 * \param maskvalue is the depth test mask. 404 * \param counter is a pointer of the uint32 counter. 405 */ 406void 407lp_build_occlusion_count(LLVMBuilderRef builder, 408 struct lp_type type, 409 LLVMValueRef maskvalue, 410 LLVMValueRef counter) 411{ 412 LLVMValueRef countmask = lp_build_const_int_vec(type, 1); 413 LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); 414 LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16); 415 LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti"); 416 LLVMValueRef maskarray[4] = { 417 LLVMConstInt(LLVMInt32Type(), 0, 0), 418 LLVMConstInt(LLVMInt32Type(), 4, 0), 419 LLVMConstInt(LLVMInt32Type(), 8, 0), 420 LLVMConstInt(LLVMInt32Type(), 12, 0), 421 }; 422 LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4); 423 LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev"); 424 LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle"); 425 LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle); 426 LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig"); 427 LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr"); 428 LLVMBuildStore(builder, incr, counter); 429} 430 431 432 433/** 434 * Generate code for performing depth and/or stencil tests. 435 * We operate on a vector of values (typically a 2x2 quad). 436 * 437 * \param depth the depth test state 438 * \param stencil the front/back stencil state 439 * \param type the data type of the fragment depth/stencil values 440 * \param format_desc description of the depth/stencil surface 441 * \param mask the alive/dead pixel mask for the quad (vector) 442 * \param stencil_refs the front/back stencil ref values (scalar) 443 * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) 444 * \param zs_dst_ptr pointer to depth/stencil values in framebuffer 445 * \param facing contains float value indicating front/back facing polygon 446 */ 447void 448lp_build_depth_stencil_test(LLVMBuilderRef builder, 449 const struct pipe_depth_state *depth, 450 const struct pipe_stencil_state stencil[2], 451 struct lp_type z_src_type, 452 const struct util_format_description *format_desc, 453 struct lp_build_mask_context *mask, 454 LLVMValueRef stencil_refs[2], 455 LLVMValueRef z_src, 456 LLVMValueRef zs_dst_ptr, 457 LLVMValueRef face, 458 LLVMValueRef *zs_value, 459 boolean do_branch) 460{ 461 struct lp_type z_type; 462 struct lp_build_context z_bld; 463 struct lp_build_context s_bld; 464 struct lp_type s_type; 465 LLVMValueRef zs_dst, z_dst = NULL; 466 LLVMValueRef stencil_vals = NULL; 467 LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; 468 LLVMValueRef z_pass = NULL, s_pass_mask = NULL; 469 LLVMValueRef orig_mask = lp_build_mask_value(mask); 470 LLVMValueRef front_facing = NULL; 471 472 /* Prototype a simpler path: 473 */ 474 if (z_src_type.floating && 475 format_desc->format == PIPE_FORMAT_X8Z24_UNORM && 476 depth->enabled) 477 { 478 LLVMValueRef zscaled; 479 LLVMValueRef const_ffffff_float; 480 LLVMValueRef const_8_int; 481 LLVMTypeRef int32_vec_type; 482 483 /* We know the values in z_dst are all >= 0, so allow 484 * lp_build_compare to use signed compare intrinsics: 485 */ 486 z_type.floating = 0; 487 z_type.fixed = 0; 488 z_type.sign = 1; 489 z_type.norm = 1; 490 z_type.width = 32; 491 z_type.length = z_src_type.length; 492 493 int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length); 494 495 const_8_int = lp_build_const_int_vec(z_type, 8); 496 const_ffffff_float = lp_build_const_vec(z_src_type, (float)0xffffff); 497 498 zscaled = LLVMBuildFMul(builder, z_src, const_ffffff_float, "zscaled"); 499 z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src"); 500 501 /* Load current z/stencil value from z/stencil buffer */ 502 zs_dst_ptr = LLVMBuildBitCast(builder, 503 zs_dst_ptr, 504 LLVMPointerType(int32_vec_type, 0), ""); 505 z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); 506 z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst"); 507 508 /* compare src Z to dst Z, returning 'pass' mask */ 509 z_pass = lp_build_compare(builder, 510 z_type, 511 depth->func, z_src, z_dst); 512 513 lp_build_mask_update(mask, z_pass); 514 515 if (do_branch) 516 lp_build_mask_check(mask); 517 518 /* No need to worry about old stencil contents, just blend the 519 * old and new values and shift into the correct position for 520 * storage. 521 */ 522 if (depth->writemask) { 523 z_type.sign = 1; 524 lp_build_context_init(&z_bld, builder, z_type); 525 526 z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), z_src, z_dst); 527 z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); 528 *zs_value = z_dst; 529 } 530 531 return; 532 } 533 534 /* 535 * Depths are expected to be between 0 and 1, even if they are stored in 536 * floats. Setting these bits here will ensure that the lp_build_conv() call 537 * below won't try to unnecessarily clamp the incoming values. 538 */ 539 if(z_src_type.floating) { 540 z_src_type.sign = FALSE; 541 z_src_type.norm = TRUE; 542 } 543 else { 544 assert(!z_src_type.sign); 545 assert(z_src_type.norm); 546 } 547 548 /* Pick the depth type. */ 549 z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); 550 551 /* FIXME: Cope with a depth test type with a different bit width. */ 552 assert(z_type.width == z_src_type.width); 553 assert(z_type.length == z_src_type.length); 554 555 /* Convert fragment Z from float to integer */ 556 lp_build_conv(builder, z_src_type, z_type, &z_src, 1, &z_src, 1); 557 558 559 /* Sanity checking */ 560 { 561 const unsigned z_swizzle = format_desc->swizzle[0]; 562 const unsigned s_swizzle = format_desc->swizzle[1]; 563 564 assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || 565 s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); 566 567 assert(depth->enabled || stencil[0].enabled); 568 569 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 570 assert(format_desc->block.width == 1); 571 assert(format_desc->block.height == 1); 572 573 if (stencil[0].enabled) { 574 assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED || 575 format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); 576 } 577 578 assert(z_swizzle < 4); 579 assert(format_desc->block.bits == z_type.width); 580 if (z_type.floating) { 581 assert(z_swizzle == 0); 582 assert(format_desc->channel[z_swizzle].type == 583 UTIL_FORMAT_TYPE_FLOAT); 584 assert(format_desc->channel[z_swizzle].size == 585 format_desc->block.bits); 586 } 587 else { 588 assert(format_desc->channel[z_swizzle].type == 589 UTIL_FORMAT_TYPE_UNSIGNED); 590 assert(format_desc->channel[z_swizzle].normalized); 591 assert(!z_type.fixed); 592 assert(!z_type.sign); 593 assert(z_type.norm); 594 } 595 } 596 597 598 /* Setup build context for Z vals */ 599 lp_build_context_init(&z_bld, builder, z_type); 600 601 /* Setup build context for stencil vals */ 602 s_type = lp_type_int_vec(z_type.width); 603 lp_build_context_init(&s_bld, builder, s_type); 604 605 /* Load current z/stencil value from z/stencil buffer */ 606 zs_dst_ptr = LLVMBuildBitCast(builder, 607 zs_dst_ptr, 608 LLVMPointerType(z_bld.vec_type, 0), ""); 609 zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); 610 611 lp_build_name(zs_dst, "zsbufval"); 612 613 614 /* Compute and apply the Z/stencil bitmasks and shifts. 615 */ 616 { 617 unsigned z_shift, z_mask; 618 unsigned s_shift, s_mask; 619 620 if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { 621 if (z_shift) { 622 LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); 623 z_src = LLVMBuildLShr(builder, z_src, shift, ""); 624 } 625 626 if (z_mask != 0xffffffff) { 627 LLVMValueRef mask = lp_build_const_int_vec(z_type, z_mask); 628 z_src = LLVMBuildAnd(builder, z_src, mask, ""); 629 z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); 630 z_bitmask = mask; /* used below */ 631 } 632 else { 633 z_dst = zs_dst; 634 } 635 636 lp_build_name(z_dst, "zsbuf.z"); 637 } 638 639 if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { 640 if (s_shift) { 641 LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); 642 stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); 643 stencil_shift = shift; /* used below */ 644 } 645 else { 646 stencil_vals = zs_dst; 647 } 648 649 if (s_mask != 0xffffffff) { 650 LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask); 651 stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); 652 } 653 654 lp_build_name(stencil_vals, "stencil"); 655 } 656 } 657 658 if (stencil[0].enabled) { 659 660 if (face) { 661 LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); 662 663 /* front_facing = face > 0.0 ? ~0 : 0 */ 664 front_facing = LLVMBuildFCmp(builder, LLVMRealUGT, face, zero, ""); 665 front_facing = LLVMBuildSExt(builder, front_facing, 666 LLVMIntType(s_bld.type.length*s_bld.type.width), 667 ""); 668 front_facing = LLVMBuildBitCast(builder, front_facing, 669 s_bld.int_vec_type, ""); 670 } 671 672 /* convert scalar stencil refs into vectors */ 673 stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); 674 stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); 675 676 s_pass_mask = lp_build_stencil_test(&s_bld, stencil, 677 stencil_refs, stencil_vals, 678 front_facing); 679 680 /* apply stencil-fail operator */ 681 { 682 LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); 683 stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, 684 stencil_refs, stencil_vals, 685 s_fail_mask, front_facing); 686 } 687 } 688 689 if (depth->enabled) { 690 /* compare src Z to dst Z, returning 'pass' mask */ 691 z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); 692 693 if (!stencil[0].enabled) { 694 /* We can potentially skip all remaining operations here, but only 695 * if stencil is disabled because we still need to update the stencil 696 * buffer values. Don't need to update Z buffer values. 697 */ 698 lp_build_mask_update(mask, z_pass); 699 700 if (do_branch) { 701 lp_build_mask_check(mask); 702 do_branch = FALSE; 703 } 704 } 705 706 if (depth->writemask) { 707 LLVMValueRef zselectmask = lp_build_mask_value(mask); 708 709 /* mask off bits that failed Z test */ 710 zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); 711 712 /* mask off bits that failed stencil test */ 713 if (s_pass_mask) { 714 zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); 715 } 716 717 /* if combined Z/stencil format, mask off the stencil bits */ 718 if (z_bitmask) { 719 zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); 720 } 721 722 /* Mix the old and new Z buffer values. 723 * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) 724 */ 725 z_dst = lp_build_select_bitwise(&z_bld, zselectmask, z_src, z_dst); 726 } 727 728 if (stencil[0].enabled) { 729 /* update stencil buffer values according to z pass/fail result */ 730 LLVMValueRef z_fail_mask, z_pass_mask; 731 732 /* apply Z-fail operator */ 733 z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); 734 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, 735 stencil_refs, stencil_vals, 736 z_fail_mask, front_facing); 737 738 /* apply Z-pass operator */ 739 z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, ""); 740 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, 741 stencil_refs, stencil_vals, 742 z_pass_mask, front_facing); 743 } 744 } 745 else { 746 /* No depth test: apply Z-pass operator to stencil buffer values which 747 * passed the stencil test. 748 */ 749 s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, ""); 750 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, 751 stencil_refs, stencil_vals, 752 s_pass_mask, front_facing); 753 } 754 755 /* The Z bits are already in the right place but we may need to shift the 756 * stencil bits before ORing Z with Stencil to make the final pixel value. 757 */ 758 if (stencil_vals && stencil_shift) 759 stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals, 760 stencil_shift, ""); 761 762 /* Finally, merge/store the z/stencil values */ 763 if ((depth->enabled && depth->writemask) || 764 (stencil[0].enabled && stencil[0].writemask)) { 765 766 if (z_dst && stencil_vals) 767 zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, ""); 768 else if (z_dst) 769 zs_dst = z_dst; 770 else 771 zs_dst = stencil_vals; 772 773 *zs_value = zs_dst; 774 } 775 776 if (s_pass_mask) 777 lp_build_mask_update(mask, s_pass_mask); 778 779 if (depth->enabled && stencil[0].enabled) 780 lp_build_mask_update(mask, z_pass); 781 782 if (do_branch) 783 lp_build_mask_check(mask); 784 785} 786 787 788void 789lp_build_depth_write(LLVMBuilderRef builder, 790 const struct util_format_description *format_desc, 791 LLVMValueRef zs_dst_ptr, 792 LLVMValueRef zs_value) 793{ 794 zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, 795 LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); 796 797 LLVMBuildStore(builder, zs_value, zs_dst_ptr); 798} 799 800 801void 802lp_build_deferred_depth_write(LLVMBuilderRef builder, 803 struct lp_type z_src_type, 804 const struct util_format_description *format_desc, 805 struct lp_build_mask_context *mask, 806 LLVMValueRef zs_dst_ptr, 807 LLVMValueRef zs_value) 808{ 809 struct lp_type z_type; 810 struct lp_build_context z_bld; 811 LLVMValueRef z_dst; 812 813 /* XXX: pointlessly redo type logic: 814 */ 815 z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); 816 lp_build_context_init(&z_bld, builder, z_type); 817 818 zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, 819 LLVMPointerType(z_bld.vec_type, 0), ""); 820 821 z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); 822 z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); 823 824 LLVMBuildStore(builder, z_dst, zs_dst_ptr); 825} 826