lp_bld_depth.c revision 9e1050d72fb78b56b03304727abb122713a90ed1
12a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/************************************************************************** 22a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * 32a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Copyright 2009-2010 VMware, Inc. 42a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * All Rights Reserved. 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * 62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Permission is hereby granted, free of charge, to any person obtaining a 72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * copy of this software and associated documentation files (the 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * "Software"), to deal in the Software without restriction, including 92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * without limitation the rights to use, copy, modify, merge, publish, 102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * distribute, sub license, and/or sell copies of the Software, and to 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * permit persons to whom the Software is furnished to do so, subject to 122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * the following conditions: 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * 14868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) * The above copyright notice and this permission notice (including the 151e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * next paragraph) shall be included in all copies or substantial portions 162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * of the Software. 174e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) * 18868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 2390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 257d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * 262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) **************************************************************************/ 272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 284e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)/** 297d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * @file 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Depth/stencil testing to LLVM IR translation. 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * 322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * To be done accurately/efficiently the depth/stencil test must be done with 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * the same type/format of the depth/stencil buffer, which implies massaging 342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * the incoming depths to fit into place. Using a more straightforward 3590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * type/format for depth/stencil values internally and only convert when 3690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * flushing would avoid this, but it would most likely result in depth fighting 3790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * artifacts. 3890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * 3990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * We are free to use a different pixel layout though. Since our basic 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * processing unit is a quad (2x2 pixel block) we store the depth/stencil 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * values tiled, a quad at time. That is, a depth buffer containing 422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * 432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Z11 Z12 Z13 Z14 ... 442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Z21 Z22 Z23 Z24 ... 452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Z31 Z32 Z33 Z34 ... 462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Z41 Z42 Z43 Z44 ... 472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * ... ... ... ... ... 482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * 492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * will actually be stored in memory as 502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * 517d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ... 527d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ... 537d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * ... ... ... ... ... ... ... ... ... 547d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * 557d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * 567d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * @author Jose Fonseca <jfonseca@vmware.com> 572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * @author Brian Paul <jfonseca@vmware.com> 582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */ 592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "pipe/p_state.h" 612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "util/u_format.h" 62868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "util/u_cpu_detect.h" 632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "gallivm/lp_bld_type.h" 652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "gallivm/lp_bld_arit.h" 662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "gallivm/lp_bld_bitarit.h" 672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "gallivm/lp_bld_const.h" 682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "gallivm/lp_bld_conv.h" 692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "gallivm/lp_bld_logic.h" 702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "gallivm/lp_bld_flow.h" 712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "gallivm/lp_bld_intr.h" 72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "gallivm/lp_bld_debug.h" 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "gallivm/lp_bld_swizzle.h" 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "lp_bld_depth.h" 76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)/** Used to select fields from pipe_stencil_state */ 79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)enum stencil_op { 802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) S_FAIL_OP, 812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Z_FAIL_OP, 822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Z_PASS_OP 832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}; 842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 8590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 8690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/** 882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Do the stencil test comparison (compare FB stencil values against ref value). 892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * This will be used twice when generating two-sided stencil code. 902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * \param stencil the front/back stencil state 912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * \param stencilRef the stencil reference value, replicated as a vector 922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * \param stencilVals vector of stencil values from framebuffer 932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * \return vector mask of pass/fail values (~0 or 0) 942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */ 952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static LLVMValueRef 96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)lp_build_stencil_test_single(struct lp_build_context *bld, 972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const struct pipe_stencil_state *stencil, 982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMValueRef stencilRef, 992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMValueRef stencilVals) 100868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles){ 10168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) LLVMBuilderRef builder = bld->gallivm->builder; 102868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const unsigned stencilMax = 255; /* XXX fix */ 103868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) struct lp_type type = bld->type; 104868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) LLVMValueRef res; 105868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 106868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) /* 107868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values 108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * are between 0..255 so ensure we generate the fastest comparisons for 109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * wider elements. 11090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) */ 111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (type.width <= 8) { 112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) assert(!type.sign); 1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } else { 1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(type.sign); 1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 1165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) assert(stencil->enabled); 1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (stencil->valuemask != stencilMax) { 1205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* compute stencilRef = stencilRef & valuemask */ 1215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask); 1225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, ""); 1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* compute stencilVals = stencilVals & valuemask */ 1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, ""); 1257d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) } 1267d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); 1285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return res; 1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 1315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)/** 1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * Do the one or two-sided stencil test comparison. 1357d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * \sa lp_build_stencil_test_single 1367d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * \param front_facing an integer vector mask, indicating front (~0) or back 1377d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) * (0) facing polygon. If NULL, assume front-facing. 1385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) */ 1397d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)static LLVMValueRef 1407d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)lp_build_stencil_test(struct lp_build_context *bld, 1417d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const struct pipe_stencil_state stencil[2], 1427d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMValueRef stencilRefs[2], 1437d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMValueRef stencilVals, 1447d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMValueRef front_facing) 1457d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles){ 1467d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMValueRef res; 1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 148a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) assert(stencil[0].enabled); 1497d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1507d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) /* do front face test */ 1517d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) res = lp_build_stencil_test_single(bld, &stencil[0], 1527d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) stencilRefs[0], stencilVals); 1537d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1547d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) if (stencil[1].enabled && front_facing != NULL) { 1557d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) /* do back face test */ 1567d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMValueRef back_res; 1575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1587d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) back_res = lp_build_stencil_test_single(bld, &stencil[1], 1595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) stencilRefs[1], stencilVals); 1607d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1617d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) res = lp_build_select(bld, front_facing, res, back_res); 1627d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) } 1635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return res; 1657d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)} 1667d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1677d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)/** 1695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * Apply the stencil operator (add/sub/keep/etc) to the given vector 1705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * of stencil values. 1715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * \return new stencil values vector 1727d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) */ 1735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)static LLVMValueRef 1747d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)lp_build_stencil_op_single(struct lp_build_context *bld, 1757d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) const struct pipe_stencil_state *stencil, 1764e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) enum stencil_op op, 1775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) LLVMValueRef stencilRef, 1787d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMValueRef stencilVals) 1797d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1807d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles){ 1817d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMBuilderRef builder = bld->gallivm->builder; 1827d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) struct lp_type type = bld->type; 1837d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMValueRef res; 1847d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff); 1857d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) unsigned stencil_op; 1867d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1877d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) assert(type.sign); 1887d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 1897d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) switch (op) { 1907d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) case S_FAIL_OP: 1917d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) stencil_op = stencil->fail_op; 1925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) break; 1935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) case Z_FAIL_OP: 1947d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) stencil_op = stencil->zfail_op; 1957d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) break; 1967d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) case Z_PASS_OP: 1977d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) stencil_op = stencil->zpass_op; 1987d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) break; 1997d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) default: 2007d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) assert(0 && "Invalid stencil_op mode"); 2017d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) stencil_op = PIPE_STENCIL_OP_KEEP; 202868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) } 203868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) 204868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) switch (stencil_op) { 205868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) case PIPE_STENCIL_OP_KEEP: 206868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = stencilVals; 207868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) /* we can return early for this case */ 208868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) return res; 209868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) case PIPE_STENCIL_OP_ZERO: 210868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = bld->zero; 211868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) break; 2125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) case PIPE_STENCIL_OP_REPLACE: 2135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) res = stencilRef; 2145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) break; 215868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) case PIPE_STENCIL_OP_INCR: 216868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = lp_build_add(bld, stencilVals, bld->one); 217868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = lp_build_min(bld, res, max); 218868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) break; 219868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) case PIPE_STENCIL_OP_DECR: 220868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = lp_build_sub(bld, stencilVals, bld->one); 221868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = lp_build_max(bld, res, bld->zero); 222868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) break; 223868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) case PIPE_STENCIL_OP_INCR_WRAP: 224868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = lp_build_add(bld, stencilVals, bld->one); 225868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = LLVMBuildAnd(builder, res, max, ""); 2265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) break; 2275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) case PIPE_STENCIL_OP_DECR_WRAP: 2285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) res = lp_build_sub(bld, stencilVals, bld->one); 229868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = LLVMBuildAnd(builder, res, max, ""); 230868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) break; 231868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) case PIPE_STENCIL_OP_INVERT: 232868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = LLVMBuildNot(builder, stencilVals, ""); 233868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) res = LLVMBuildAnd(builder, res, max, ""); 2342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) break; 23590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) default: 23690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) assert(0 && "bad stencil op mode"); 23790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) res = bld->undef; 23890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 23990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 24090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) return res; 24190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)} 24290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 24390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 24490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)/** 245c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * Do the one or two-sided stencil test op/update. 246c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) */ 247c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)static LLVMValueRef 248c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)lp_build_stencil_op(struct lp_build_context *bld, 24990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) const struct pipe_stencil_state stencil[2], 25090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) enum stencil_op op, 25190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) LLVMValueRef stencilRefs[2], 252d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) LLVMValueRef stencilVals, 25390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) LLVMValueRef mask, 25490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) LLVMValueRef front_facing) 2552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles){ 2572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMBuilderRef builder = bld->gallivm->builder; 258d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) LLVMValueRef res; 259d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) 260d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) assert(stencil[0].enabled); 261d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) 26290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* do front face op */ 26390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) res = lp_build_stencil_op_single(bld, &stencil[0], op, 26490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) stencilRefs[0], stencilVals); 265c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 266c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (stencil[1].enabled && front_facing != NULL) { 26790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* do back face op */ 26890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) LLVMValueRef back_res; 26990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 27090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) back_res = lp_build_stencil_op_single(bld, &stencil[1], op, 27190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) stencilRefs[1], stencilVals); 27290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 27390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) res = lp_build_select(bld, front_facing, res, back_res); 27490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 275c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 276c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (stencil[0].writemask != 0xff || 277c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) { 27890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* mask &= stencil[0].writemask */ 2792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, 2802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stencil[0].writemask); 2812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) { 2825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type, 2832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stencil[1].writemask); 2842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) writemask = lp_build_select(bld, front_facing, writemask, back_writemask); 2852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 2862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) mask = LLVMBuildAnd(builder, mask, writemask, ""); 2882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) /* res = (res & mask) | (stencilVals & ~mask) */ 2892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) res = lp_build_select_bitwise(bld, mask, res, stencilVals); 2902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 2912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else { 2922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) /* res = mask ? res : stencilVals */ 2932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) res = lp_build_select(bld, mask, res, stencilVals); 2942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 2952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return res; 2972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/** 3022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Return a type appropriate for depth/stencil testing. 3032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */ 3042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)struct lp_type 3052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)lp_depth_type(const struct util_format_description *format_desc, 306c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned length) 307c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles){ 308c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) struct lp_type type; 309c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned swizzle; 310c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 311c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 312c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) assert(format_desc->block.width == 1); 313c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) assert(format_desc->block.height == 1); 314c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 315c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) swizzle = format_desc->swizzle[0]; 3165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) assert(swizzle < 4); 3175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 3185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) memset(&type, 0, sizeof type); 3195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) type.width = format_desc->block.bits; 3205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 3215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { 3225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) type.floating = TRUE; 3235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) assert(swizzle == 0); 3245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) assert(format_desc->channel[swizzle].size == format_desc->block.bits); 3255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 3265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { 3275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) assert(format_desc->block.bits <= 32); 3285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) assert(format_desc->channel[swizzle].normalized); 3295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (format_desc->channel[swizzle].size < format_desc->block.bits) { 3305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* Prefer signed integers when possible, as SSE has less support 3315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * for unsigned comparison; 3325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) */ 3335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) type.sign = TRUE; 3345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 3352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 3362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) else 3372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(0); 3382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(type.width <= length); 3402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) type.length = length / type.width; 3412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return type; 3437d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)} 3442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 34568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 34668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)/** 34768043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * Compute bitmask and bit shift to apply to the incoming fragment Z values 34868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * and the Z buffer values needed before doing the Z comparison. 34968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * 35068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * Note that we leave the Z bits in the position that we find them 35168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us 35268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * get by with fewer bit twiddling steps. 3532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */ 3542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static boolean 355c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)get_z_shift_and_mask(const struct util_format_description *format_desc, 356c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned *shift, unsigned *width, unsigned *mask) 357c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles){ 35868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) const unsigned total_bits = format_desc->block.bits; 35968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) unsigned z_swizzle; 360c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) unsigned chan; 3612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) unsigned padding_left, padding_right; 3622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 3642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(format_desc->block.width == 1); 3652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(format_desc->block.height == 1); 3662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 36790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) z_swizzle = format_desc->swizzle[0]; 3682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) 3702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return FALSE; 3712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *width = format_desc->channel[z_swizzle].size; 3732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) padding_right = 0; 3755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for (chan = 0; chan < z_swizzle; ++chan) 3765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) padding_right += format_desc->channel[chan].size; 3775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 3782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) padding_left = 3792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) total_bits - (padding_right + *width); 3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (padding_left || padding_right) { 38290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; 3832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) unsigned long long mask_right = (1ULL << (padding_right)) - 1; 3842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *mask = mask_left ^ mask_right; 385c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 386c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) else { 387c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) *mask = 0xffffffff; 388c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 389c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 39090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) *shift = padding_right; 39190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 39290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) return TRUE; 39390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)} 39490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 39590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 39690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)/** 39790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Compute bitmask and bit shift to apply to the framebuffer pixel values 39890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * to put the stencil bits in the least significant position. 39990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * (i.e. 0x000000ff) 40090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) */ 40190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)static boolean 40290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)get_s_shift_and_mask(const struct util_format_description *format_desc, 40390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) unsigned *shift, unsigned *mask) 40490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles){ 40590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) unsigned s_swizzle; 40690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) unsigned chan, sz; 40790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 40890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) s_swizzle = format_desc->swizzle[1]; 40990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 41090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) 41190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) return FALSE; 41290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 41390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) *shift = 0; 4142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) for (chan = 0; chan < s_swizzle; chan++) 4152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *shift += format_desc->channel[chan].size; 4162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) sz = format_desc->channel[s_swizzle].size; 4182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *mask = (1U << sz) - 1U; 4192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return TRUE; 4212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 422a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 423a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 4242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/** 4252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Perform the occlusion test and increase the counter. 4262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Test the depth mask. Add the number of channel which has none zero mask 427c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. 428c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) * The counter will add 4. 4292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * 4302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * \param type holds element type of the mask vector. 4312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * \param maskvalue is the depth test mask. 4322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * \param counter is a pointer of the uint32 counter. 4332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */ 4342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void 4352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)lp_build_occlusion_count(struct gallivm_state *gallivm, 4362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) struct lp_type type, 4372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMValueRef maskvalue, 4382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMValueRef counter) 4392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles){ 4402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMBuilderRef builder = gallivm->builder; 4412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMContextRef context = gallivm->context; 4422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); 4432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LLVMValueRef count, newcount; 4442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(type.length <= 16); 4462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(type.floating); 4472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if(util_cpu_caps.has_sse && type.length == 4) { 4492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const char *movmskintr = "llvm.x86.sse.movmsk.ps"; 450868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const char *popcntintr = "llvm.ctpop.i32"; 451868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, 452868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) lp_build_vec_type(gallivm, type), ""); 453868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) bits = lp_build_intrinsic_unary(builder, movmskintr, 454868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) LLVMInt32TypeInContext(context), bits); 455868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) count = lp_build_intrinsic_unary(builder, popcntintr, 456868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) LLVMInt32TypeInContext(context), bits); 457868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) } 458868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) else if(util_cpu_caps.has_avx && type.length == 8) { 459868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; 460868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const char *popcntintr = "llvm.ctpop.i32"; 461868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, 462868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) lp_build_vec_type(gallivm, type), ""); 463868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) bits = lp_build_intrinsic_unary(builder, movmskintr, 464868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) LLVMInt32TypeInContext(context), bits); 465868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) count = lp_build_intrinsic_unary(builder, popcntintr, 466868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) LLVMInt32TypeInContext(context), bits); 467868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) } 468868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) else { 4692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) unsigned i; 470 LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); 471 LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8); 472 LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4); 473 LLVMValueRef shufflev, countd; 474 LLVMValueRef shuffles[16]; 475 const char *popcntintr = NULL; 476 477 countv = LLVMBuildBitCast(builder, countv, i8vntype, ""); 478 479 for (i = 0; i < type.length; i++) { 480 shuffles[i] = lp_build_const_int32(gallivm, 4*i); 481 } 482 483 shufflev = LLVMConstVector(shuffles, type.length); 484 countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, ""); 485 countd = LLVMBuildBitCast(builder, countd, counttype, "countd"); 486 487 /* 488 * XXX FIXME 489 * this is bad on cpus without popcount (on x86 supported by intel 490 * nehalem, amd barcelona, and up - not tied to sse42). 491 * Would be much faster to just sum the 4 elements of the vector with 492 * some horizontal add (shuffle/add/shuffle/add after the initial and). 493 */ 494 switch (type.length) { 495 case 4: 496 popcntintr = "llvm.ctpop.i32"; 497 break; 498 case 8: 499 popcntintr = "llvm.ctpop.i64"; 500 break; 501 case 16: 502 popcntintr = "llvm.ctpop.i128"; 503 break; 504 default: 505 assert(0); 506 } 507 count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd); 508 509 if (type.length > 4) { 510 count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), ""); 511 } 512 } 513 newcount = LLVMBuildLoad(builder, counter, "origcount"); 514 newcount = LLVMBuildAdd(builder, newcount, count, "newcount"); 515 LLVMBuildStore(builder, newcount, counter); 516} 517 518 519 520/** 521 * Generate code for performing depth and/or stencil tests. 522 * We operate on a vector of values (typically n 2x2 quads). 523 * 524 * \param depth the depth test state 525 * \param stencil the front/back stencil state 526 * \param type the data type of the fragment depth/stencil values 527 * \param format_desc description of the depth/stencil surface 528 * \param mask the alive/dead pixel mask for the quad (vector) 529 * \param stencil_refs the front/back stencil ref values (scalar) 530 * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) 531 * \param zs_dst_ptr pointer to depth/stencil values in framebuffer 532 * \param face contains boolean value indicating front/back facing polygon 533 */ 534void 535lp_build_depth_stencil_test(struct gallivm_state *gallivm, 536 const struct pipe_depth_state *depth, 537 const struct pipe_stencil_state stencil[2], 538 struct lp_type z_src_type, 539 const struct util_format_description *format_desc, 540 struct lp_build_mask_context *mask, 541 LLVMValueRef stencil_refs[2], 542 LLVMValueRef z_src, 543 LLVMValueRef zs_dst_ptr, 544 LLVMValueRef face, 545 LLVMValueRef *zs_value, 546 boolean do_branch) 547{ 548 LLVMBuilderRef builder = gallivm->builder; 549 struct lp_type z_type; 550 struct lp_build_context z_bld; 551 struct lp_build_context s_bld; 552 struct lp_type s_type; 553 unsigned z_shift = 0, z_width = 0, z_mask = 0; 554 LLVMValueRef zs_dst, z_dst = NULL; 555 LLVMValueRef stencil_vals = NULL; 556 LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; 557 LLVMValueRef z_pass = NULL, s_pass_mask = NULL; 558 LLVMValueRef orig_mask = lp_build_mask_value(mask); 559 LLVMValueRef front_facing = NULL; 560 561 562 /* 563 * Depths are expected to be between 0 and 1, even if they are stored in 564 * floats. Setting these bits here will ensure that the lp_build_conv() call 565 * below won't try to unnecessarily clamp the incoming values. 566 */ 567 if(z_src_type.floating) { 568 z_src_type.sign = FALSE; 569 z_src_type.norm = TRUE; 570 } 571 else { 572 assert(!z_src_type.sign); 573 assert(z_src_type.norm); 574 } 575 576 /* Pick the depth type. */ 577 z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); 578 579 /* FIXME: Cope with a depth test type with a different bit width. */ 580 assert(z_type.width == z_src_type.width); 581 assert(z_type.length == z_src_type.length); 582 583 /* FIXME: for non-float depth/stencil might generate better code 584 * if we'd always split it up to use 128bit operations. 585 * For stencil we'd almost certainly want to pack to 8xi16 values, 586 * for z just run twice. 587 */ 588 589 /* Sanity checking */ 590 { 591 const unsigned z_swizzle = format_desc->swizzle[0]; 592 const unsigned s_swizzle = format_desc->swizzle[1]; 593 594 assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || 595 s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); 596 597 assert(depth->enabled || stencil[0].enabled); 598 599 assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); 600 assert(format_desc->block.width == 1); 601 assert(format_desc->block.height == 1); 602 603 if (stencil[0].enabled) { 604 assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_UINT || 605 format_desc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM); 606 } 607 608 assert(z_swizzle < 4); 609 assert(format_desc->block.bits == z_type.width); 610 if (z_type.floating) { 611 assert(z_swizzle == 0); 612 assert(format_desc->channel[z_swizzle].type == 613 UTIL_FORMAT_TYPE_FLOAT); 614 assert(format_desc->channel[z_swizzle].size == 615 format_desc->block.bits); 616 } 617 else { 618 assert(format_desc->channel[z_swizzle].type == 619 UTIL_FORMAT_TYPE_UNSIGNED); 620 assert(format_desc->channel[z_swizzle].normalized); 621 assert(!z_type.fixed); 622 } 623 } 624 625 626 /* Setup build context for Z vals */ 627 lp_build_context_init(&z_bld, gallivm, z_type); 628 629 /* Setup build context for stencil vals */ 630 s_type = lp_int_type(z_type); 631 lp_build_context_init(&s_bld, gallivm, s_type); 632 633 /* Load current z/stencil value from z/stencil buffer */ 634 zs_dst_ptr = LLVMBuildBitCast(builder, 635 zs_dst_ptr, 636 LLVMPointerType(z_bld.vec_type, 0), ""); 637 zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); 638 639 lp_build_name(zs_dst, "zs_dst"); 640 641 642 /* Compute and apply the Z/stencil bitmasks and shifts. 643 */ 644 { 645 unsigned s_shift, s_mask; 646 647 if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { 648 if (z_mask != 0xffffffff) { 649 z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); 650 } 651 652 /* 653 * Align the framebuffer Z 's LSB to the right. 654 */ 655 if (z_shift) { 656 LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); 657 z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); 658 } else if (z_bitmask) { 659 /* TODO: Instead of loading a mask from memory and ANDing, it's 660 * probably faster to just shake the bits with two shifts. */ 661 z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); 662 } else { 663 z_dst = zs_dst; 664 lp_build_name(z_dst, "z_dst"); 665 } 666 } 667 668 if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { 669 if (s_shift) { 670 LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); 671 stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); 672 stencil_shift = shift; /* used below */ 673 } 674 else { 675 stencil_vals = zs_dst; 676 } 677 678 if (s_mask != 0xffffffff) { 679 LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); 680 stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); 681 } 682 683 lp_build_name(stencil_vals, "s_dst"); 684 } 685 } 686 687 if (stencil[0].enabled) { 688 689 if (face) { 690 LLVMValueRef zero = lp_build_const_int32(gallivm, 0); 691 692 /* front_facing = face != 0 ? ~0 : 0 */ 693 front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); 694 front_facing = LLVMBuildSExt(builder, front_facing, 695 LLVMIntTypeInContext(gallivm->context, 696 s_bld.type.length*s_bld.type.width), 697 ""); 698 front_facing = LLVMBuildBitCast(builder, front_facing, 699 s_bld.int_vec_type, ""); 700 } 701 702 /* convert scalar stencil refs into vectors */ 703 stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); 704 stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); 705 706 s_pass_mask = lp_build_stencil_test(&s_bld, stencil, 707 stencil_refs, stencil_vals, 708 front_facing); 709 710 /* apply stencil-fail operator */ 711 { 712 LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); 713 stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, 714 stencil_refs, stencil_vals, 715 s_fail_mask, front_facing); 716 } 717 } 718 719 if (depth->enabled) { 720 /* 721 * Convert fragment Z to the desired type, aligning the LSB to the right. 722 */ 723 724 assert(z_type.width == z_src_type.width); 725 assert(z_type.length == z_src_type.length); 726 assert(lp_check_value(z_src_type, z_src)); 727 if (z_src_type.floating) { 728 /* 729 * Convert from floating point values 730 */ 731 732 if (!z_type.floating) { 733 z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, 734 z_src_type, 735 z_width, 736 z_src); 737 } 738 } else { 739 /* 740 * Convert from unsigned normalized values. 741 */ 742 743 assert(!z_src_type.sign); 744 assert(!z_src_type.fixed); 745 assert(z_src_type.norm); 746 assert(!z_type.floating); 747 if (z_src_type.width > z_width) { 748 LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, 749 z_src_type.width - z_width); 750 z_src = LLVMBuildLShr(builder, z_src, shift, ""); 751 } 752 } 753 assert(lp_check_value(z_type, z_src)); 754 755 lp_build_name(z_src, "z_src"); 756 757 /* compare src Z to dst Z, returning 'pass' mask */ 758 z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); 759 760 if (!stencil[0].enabled) { 761 /* We can potentially skip all remaining operations here, but only 762 * if stencil is disabled because we still need to update the stencil 763 * buffer values. Don't need to update Z buffer values. 764 */ 765 lp_build_mask_update(mask, z_pass); 766 767 if (do_branch) { 768 lp_build_mask_check(mask); 769 do_branch = FALSE; 770 } 771 } 772 773 if (depth->writemask) { 774 LLVMValueRef zselectmask; 775 776 /* mask off bits that failed Z test */ 777 zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); 778 779 /* mask off bits that failed stencil test */ 780 if (s_pass_mask) { 781 zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); 782 } 783 784 /* Mix the old and new Z buffer values. 785 * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] 786 */ 787 z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); 788 } 789 790 if (stencil[0].enabled) { 791 /* update stencil buffer values according to z pass/fail result */ 792 LLVMValueRef z_fail_mask, z_pass_mask; 793 794 /* apply Z-fail operator */ 795 z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); 796 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, 797 stencil_refs, stencil_vals, 798 z_fail_mask, front_facing); 799 800 /* apply Z-pass operator */ 801 z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); 802 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, 803 stencil_refs, stencil_vals, 804 z_pass_mask, front_facing); 805 } 806 } 807 else { 808 /* No depth test: apply Z-pass operator to stencil buffer values which 809 * passed the stencil test. 810 */ 811 s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, ""); 812 stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, 813 stencil_refs, stencil_vals, 814 s_pass_mask, front_facing); 815 } 816 817 /* Put Z and ztencil bits in the right place */ 818 if (z_dst && z_shift) { 819 LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); 820 z_dst = LLVMBuildShl(builder, z_dst, shift, ""); 821 } 822 if (stencil_vals && stencil_shift) 823 stencil_vals = LLVMBuildShl(builder, stencil_vals, 824 stencil_shift, ""); 825 826 /* Finally, merge/store the z/stencil values */ 827 if ((depth->enabled && depth->writemask) || 828 (stencil[0].enabled && stencil[0].writemask)) { 829 830 if (z_dst && stencil_vals) 831 zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, ""); 832 else if (z_dst) 833 zs_dst = z_dst; 834 else 835 zs_dst = stencil_vals; 836 837 *zs_value = zs_dst; 838 } 839 840 if (s_pass_mask) 841 lp_build_mask_update(mask, s_pass_mask); 842 843 if (depth->enabled && stencil[0].enabled) 844 lp_build_mask_update(mask, z_pass); 845 846 if (do_branch) 847 lp_build_mask_check(mask); 848 849} 850 851 852void 853lp_build_depth_write(LLVMBuilderRef builder, 854 const struct util_format_description *format_desc, 855 LLVMValueRef zs_dst_ptr, 856 LLVMValueRef zs_value) 857{ 858 zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, 859 LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); 860 861 LLVMBuildStore(builder, zs_value, zs_dst_ptr); 862} 863 864 865void 866lp_build_deferred_depth_write(struct gallivm_state *gallivm, 867 struct lp_type z_src_type, 868 const struct util_format_description *format_desc, 869 struct lp_build_mask_context *mask, 870 LLVMValueRef zs_dst_ptr, 871 LLVMValueRef zs_value) 872{ 873 struct lp_type z_type; 874 struct lp_build_context z_bld; 875 LLVMValueRef z_dst; 876 LLVMBuilderRef builder = gallivm->builder; 877 878 /* XXX: pointlessly redo type logic: 879 */ 880 z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); 881 lp_build_context_init(&z_bld, gallivm, z_type); 882 883 zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, 884 LLVMPointerType(z_bld.vec_type, 0), ""); 885 886 z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); 887 z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); 888 889 LLVMBuildStore(builder, z_dst, zs_dst_ptr); 890} 891