lp_bld_depth.c revision aa4cb5e2d8d48c7dcc9653c61a9e25494e3e7b2a
1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Depth/stencil testing to LLVM IR translation.
31 *
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
37 * artifacts.
38 *
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
42 *
43 *  Z11 Z12 Z13 Z14 ...
44 *  Z21 Z22 Z23 Z24 ...
45 *  Z31 Z32 Z33 Z34 ...
46 *  Z41 Z42 Z43 Z44 ...
47 *  ... ... ... ... ...
48 *
49 * will actually be stored in memory as
50 *
51 *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 *  ... ... ... ... ... ... ... ... ...
54 *
55 *
56 * Stencil test:
57 * Two-sided stencil test is supported but probably not as efficient as
58 * it could be.  Currently, we use if/then/else constructs to do the
59 * operations for front vs. back-facing polygons.  We could probably do
60 * both the front and back arithmetic then use a Select() instruction to
61 * choose the result depending on polyon orientation.  We'd have to
62 * measure performance both ways and see which is better.
63 *
64 * @author Jose Fonseca <jfonseca@vmware.com>
65 */
66
67#include "pipe/p_state.h"
68#include "util/u_format.h"
69
70#include "gallivm/lp_bld_type.h"
71#include "gallivm/lp_bld_arit.h"
72#include "gallivm/lp_bld_bitarit.h"
73#include "gallivm/lp_bld_const.h"
74#include "gallivm/lp_bld_conv.h"
75#include "gallivm/lp_bld_logic.h"
76#include "gallivm/lp_bld_flow.h"
77#include "gallivm/lp_bld_intr.h"
78#include "gallivm/lp_bld_debug.h"
79#include "gallivm/lp_bld_swizzle.h"
80
81#include "lp_bld_depth.h"
82
83
84/** Used to select fields from pipe_stencil_state */
85enum stencil_op {
86   S_FAIL_OP,
87   Z_FAIL_OP,
88   Z_PASS_OP
89};
90
91
92
93/**
94 * Do the stencil test comparison (compare FB stencil values against ref value).
95 * This will be used twice when generating two-sided stencil code.
96 * \param stencil  the front/back stencil state
97 * \param stencilRef  the stencil reference value, replicated as a vector
98 * \param stencilVals  vector of stencil values from framebuffer
99 * \return vector mask of pass/fail values (~0 or 0)
100 */
101static LLVMValueRef
102lp_build_stencil_test_single(struct lp_build_context *bld,
103                             const struct pipe_stencil_state *stencil,
104                             LLVMValueRef stencilRef,
105                             LLVMValueRef stencilVals)
106{
107   const unsigned stencilMax = 255; /* XXX fix */
108   struct lp_type type = bld->type;
109   LLVMValueRef res;
110
111   assert(type.sign);
112
113   assert(stencil->enabled);
114
115   if (stencil->valuemask != stencilMax) {
116      /* compute stencilRef = stencilRef & valuemask */
117      LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
118      stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
119      /* compute stencilVals = stencilVals & valuemask */
120      stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
121   }
122
123   res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
124
125   return res;
126}
127
128
129/**
130 * Do the one or two-sided stencil test comparison.
131 * \sa lp_build_stencil_test_single
132 * \param front_facing  an integer vector mask, indicating front (~0) or back
133 *                      (0) facing polygon. If NULL, assume front-facing.
134 */
135static LLVMValueRef
136lp_build_stencil_test(struct lp_build_context *bld,
137                      const struct pipe_stencil_state stencil[2],
138                      LLVMValueRef stencilRefs[2],
139                      LLVMValueRef stencilVals,
140                      LLVMValueRef front_facing)
141{
142   LLVMValueRef res;
143
144   assert(stencil[0].enabled);
145
146   /* do front face test */
147   res = lp_build_stencil_test_single(bld, &stencil[0],
148                                      stencilRefs[0], stencilVals);
149
150   if (stencil[1].enabled && front_facing) {
151      /* do back face test */
152      LLVMValueRef back_res;
153
154      back_res = lp_build_stencil_test_single(bld, &stencil[1],
155                                              stencilRefs[1], stencilVals);
156
157      res = lp_build_select(bld, front_facing, res, back_res);
158   }
159
160   return res;
161}
162
163
164/**
165 * Apply the stencil operator (add/sub/keep/etc) to the given vector
166 * of stencil values.
167 * \return  new stencil values vector
168 */
169static LLVMValueRef
170lp_build_stencil_op_single(struct lp_build_context *bld,
171                           const struct pipe_stencil_state *stencil,
172                           enum stencil_op op,
173                           LLVMValueRef stencilRef,
174                           LLVMValueRef stencilVals)
175
176{
177   struct lp_type type = bld->type;
178   LLVMValueRef res;
179   LLVMValueRef max = lp_build_const_int_vec(type, 0xff);
180   unsigned stencil_op;
181
182   assert(type.sign);
183
184   switch (op) {
185   case S_FAIL_OP:
186      stencil_op = stencil->fail_op;
187      break;
188   case Z_FAIL_OP:
189      stencil_op = stencil->zfail_op;
190      break;
191   case Z_PASS_OP:
192      stencil_op = stencil->zpass_op;
193      break;
194   default:
195      assert(0 && "Invalid stencil_op mode");
196      stencil_op = PIPE_STENCIL_OP_KEEP;
197   }
198
199   switch (stencil_op) {
200   case PIPE_STENCIL_OP_KEEP:
201      res = stencilVals;
202      /* we can return early for this case */
203      return res;
204   case PIPE_STENCIL_OP_ZERO:
205      res = bld->zero;
206      break;
207   case PIPE_STENCIL_OP_REPLACE:
208      res = stencilRef;
209      break;
210   case PIPE_STENCIL_OP_INCR:
211      res = lp_build_add(bld, stencilVals, bld->one);
212      res = lp_build_min(bld, res, max);
213      break;
214   case PIPE_STENCIL_OP_DECR:
215      res = lp_build_sub(bld, stencilVals, bld->one);
216      res = lp_build_max(bld, res, bld->zero);
217      break;
218   case PIPE_STENCIL_OP_INCR_WRAP:
219      res = lp_build_add(bld, stencilVals, bld->one);
220      res = LLVMBuildAnd(bld->builder, res, max, "");
221      break;
222   case PIPE_STENCIL_OP_DECR_WRAP:
223      res = lp_build_sub(bld, stencilVals, bld->one);
224      res = LLVMBuildAnd(bld->builder, res, max, "");
225      break;
226   case PIPE_STENCIL_OP_INVERT:
227      res = LLVMBuildNot(bld->builder, stencilVals, "");
228      res = LLVMBuildAnd(bld->builder, res, max, "");
229      break;
230   default:
231      assert(0 && "bad stencil op mode");
232      res = bld->undef;
233   }
234
235   return res;
236}
237
238
239/**
240 * Do the one or two-sided stencil test op/update.
241 */
242static LLVMValueRef
243lp_build_stencil_op(struct lp_build_context *bld,
244                    const struct pipe_stencil_state stencil[2],
245                    enum stencil_op op,
246                    LLVMValueRef stencilRefs[2],
247                    LLVMValueRef stencilVals,
248                    LLVMValueRef mask,
249                    LLVMValueRef front_facing)
250
251{
252   LLVMValueRef res;
253
254   assert(stencil[0].enabled);
255
256   /* do front face op */
257   res = lp_build_stencil_op_single(bld, &stencil[0], op,
258                                     stencilRefs[0], stencilVals);
259
260   if (stencil[1].enabled && front_facing) {
261      /* do back face op */
262      LLVMValueRef back_res;
263
264      back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
265                                            stencilRefs[1], stencilVals);
266
267      res = lp_build_select(bld, front_facing, res, back_res);
268   }
269
270   if (stencil->writemask != 0xff) {
271      /* mask &= stencil->writemask */
272      LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask);
273      mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
274      /* res = (res & mask) | (stencilVals & ~mask) */
275      res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
276   }
277   else {
278      /* res = mask ? res : stencilVals */
279      res = lp_build_select(bld, mask, res, stencilVals);
280   }
281
282   return res;
283}
284
285
286
287/**
288 * Return a type appropriate for depth/stencil testing.
289 */
290struct lp_type
291lp_depth_type(const struct util_format_description *format_desc,
292              unsigned length)
293{
294   struct lp_type type;
295   unsigned swizzle;
296
297   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
298   assert(format_desc->block.width == 1);
299   assert(format_desc->block.height == 1);
300
301   swizzle = format_desc->swizzle[0];
302   assert(swizzle < 4);
303
304   memset(&type, 0, sizeof type);
305   type.width = format_desc->block.bits;
306
307   if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
308      type.floating = TRUE;
309      assert(swizzle == 0);
310      assert(format_desc->channel[swizzle].size == format_desc->block.bits);
311   }
312   else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
313      assert(format_desc->block.bits <= 32);
314      if(format_desc->channel[swizzle].normalized)
315         type.norm = TRUE;
316   }
317   else
318      assert(0);
319
320   assert(type.width <= length);
321   type.length = length / type.width;
322
323   return type;
324}
325
326
327/**
328 * Compute bitmask and bit shift to apply to the incoming fragment Z values
329 * and the Z buffer values needed before doing the Z comparison.
330 *
331 * Note that we leave the Z bits in the position that we find them
332 * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
333 * get by with fewer bit twiddling steps.
334 */
335static boolean
336get_z_shift_and_mask(const struct util_format_description *format_desc,
337                     unsigned *shift, unsigned *mask)
338{
339   const unsigned total_bits = format_desc->block.bits;
340   unsigned z_swizzle;
341   unsigned chan;
342   unsigned padding_left, padding_right;
343
344   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
345   assert(format_desc->block.width == 1);
346   assert(format_desc->block.height == 1);
347
348   z_swizzle = format_desc->swizzle[0];
349
350   if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
351      return FALSE;
352
353   padding_right = 0;
354   for (chan = 0; chan < z_swizzle; ++chan)
355      padding_right += format_desc->channel[chan].size;
356
357   padding_left =
358      total_bits - (padding_right + format_desc->channel[z_swizzle].size);
359
360   if (padding_left || padding_right) {
361      unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
362      unsigned long long mask_right = (1ULL << (padding_right)) - 1;
363      *mask = mask_left ^ mask_right;
364   }
365   else {
366      *mask = 0xffffffff;
367   }
368
369   *shift = padding_left;
370
371   return TRUE;
372}
373
374
375/**
376 * Compute bitmask and bit shift to apply to the framebuffer pixel values
377 * to put the stencil bits in the least significant position.
378 * (i.e. 0x000000ff)
379 */
380static boolean
381get_s_shift_and_mask(const struct util_format_description *format_desc,
382                     unsigned *shift, unsigned *mask)
383{
384   unsigned s_swizzle;
385   unsigned chan, sz;
386
387   s_swizzle = format_desc->swizzle[1];
388
389   if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
390      return FALSE;
391
392   *shift = 0;
393   for (chan = 0; chan < s_swizzle; chan++)
394      *shift += format_desc->channel[chan].size;
395
396   sz = format_desc->channel[s_swizzle].size;
397   *mask = (1U << sz) - 1U;
398
399   return TRUE;
400}
401
402
403/**
404 * Perform the occlusion test and increase the counter.
405 * Test the depth mask. Add the number of channel which has none zero mask
406 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
407 * The counter will add 4.
408 *
409 * \param type holds element type of the mask vector.
410 * \param maskvalue is the depth test mask.
411 * \param counter is a pointer of the uint32 counter.
412 */
413static void
414lp_build_occlusion_count(LLVMBuilderRef builder,
415                         struct lp_type type,
416                         LLVMValueRef maskvalue,
417                         LLVMValueRef counter)
418{
419   LLVMValueRef countmask = lp_build_const_int_vec(type, 1);
420   LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
421   LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16);
422   LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
423   LLVMValueRef maskarray[4] = {
424      LLVMConstInt(LLVMInt32Type(), 0, 0),
425      LLVMConstInt(LLVMInt32Type(), 4, 0),
426      LLVMConstInt(LLVMInt32Type(), 8, 0),
427      LLVMConstInt(LLVMInt32Type(), 12, 0),
428   };
429   LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
430   LLVMValueRef shufflev =  LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
431   LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle");
432   LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle);
433   LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
434   LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
435   LLVMBuildStore(builder, incr, counter);
436}
437
438
439
440/**
441 * Generate code for performing depth and/or stencil tests.
442 * We operate on a vector of values (typically a 2x2 quad).
443 *
444 * \param depth  the depth test state
445 * \param stencil  the front/back stencil state
446 * \param type  the data type of the fragment depth/stencil values
447 * \param format_desc  description of the depth/stencil surface
448 * \param mask  the alive/dead pixel mask for the quad (vector)
449 * \param stencil_refs  the front/back stencil ref values (scalar)
450 * \param z_src  the incoming depth/stencil values (a 2x2 quad, float32)
451 * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
452 * \param facing  contains float value indicating front/back facing polygon
453 */
454void
455lp_build_depth_stencil_test(LLVMBuilderRef builder,
456                            const struct pipe_depth_state *depth,
457                            const struct pipe_stencil_state stencil[2],
458                            struct lp_type z_src_type,
459                            const struct util_format_description *format_desc,
460                            struct lp_build_mask_context *mask,
461                            LLVMValueRef stencil_refs[2],
462                            LLVMValueRef z_src,
463                            LLVMValueRef zs_dst_ptr,
464                            LLVMValueRef face,
465                            LLVMValueRef counter,
466                            boolean do_branch)
467{
468   struct lp_type type;
469   struct lp_build_context bld;
470   struct lp_build_context sbld;
471   struct lp_type s_type;
472   LLVMValueRef zs_dst, z_dst = NULL;
473   LLVMValueRef stencil_vals = NULL;
474   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
475   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
476   LLVMValueRef orig_mask = mask->value;
477   LLVMValueRef front_facing = NULL;
478
479   /* Prototype a simpler path:
480    */
481   if (z_src_type.floating &&
482       format_desc->format == PIPE_FORMAT_X8Z24_UNORM &&
483       depth->enabled)
484   {
485      LLVMValueRef zscaled;
486      LLVMValueRef const_ffffff_float;
487      LLVMValueRef const_8_int;
488      LLVMTypeRef int32_vec_type;
489
490      /* We know the values in z_dst are all >= 0, so allow
491       * lp_build_compare to use signed compare intrinsics:
492       */
493      type.floating = 0;
494      type.fixed = 0;
495      type.sign = 1;
496      type.norm = 1;
497      type.width = 32;
498      type.length = z_src_type.length;
499
500      int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length);
501
502      const_8_int = lp_build_const_int_vec(type, 8);
503      const_ffffff_float = lp_build_const_vec(z_src_type, (float)0xffffff);
504
505      zscaled = LLVMBuildFMul(builder, z_src, const_ffffff_float, "zscaled");
506      z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src");
507
508      /* Load current z/stencil value from z/stencil buffer */
509      z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
510      z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst");
511
512      /* compare src Z to dst Z, returning 'pass' mask */
513      z_pass = lp_build_compare(builder,
514                                type,
515                                depth->func, z_src, z_dst);
516
517      lp_build_mask_update(mask, z_pass);
518
519      if (do_branch)
520         lp_build_mask_check(mask);
521
522      /* No need to worry about old stencil contents, just blend the
523       * old and new values and shift into the correct position for
524       * storage.
525       */
526      if (depth->writemask) {
527         type.sign = 0;
528         lp_build_context_init(&bld, builder, type);
529
530         z_dst = lp_build_select(&bld, mask->value, z_src, z_dst);
531         z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst");
532         LLVMBuildStore(builder, z_dst, zs_dst_ptr);
533      }
534
535      if (counter)
536         lp_build_occlusion_count(builder, type, mask->value, counter);
537
538      return;
539   }
540
541   /*
542    * Depths are expected to be between 0 and 1, even if they are stored in
543    * floats. Setting these bits here will ensure that the lp_build_conv() call
544    * below won't try to unnecessarily clamp the incoming values.
545    */
546   if(z_src_type.floating) {
547      z_src_type.sign = FALSE;
548      z_src_type.norm = TRUE;
549   }
550   else {
551      assert(!z_src_type.sign);
552      assert(z_src_type.norm);
553   }
554
555   /* Pick the depth type. */
556   type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
557
558   /* FIXME: Cope with a depth test type with a different bit width. */
559   assert(type.width == z_src_type.width);
560   assert(type.length == z_src_type.length);
561
562   /* Convert fragment Z from float to integer */
563   lp_build_conv(builder, z_src_type, type, &z_src, 1, &z_src, 1);
564
565   zs_dst_ptr = LLVMBuildBitCast(builder,
566                                 zs_dst_ptr,
567                                 LLVMPointerType(lp_build_vec_type(type), 0), "");
568
569
570
571   /* Sanity checking */
572   {
573      const unsigned z_swizzle = format_desc->swizzle[0];
574      const unsigned s_swizzle = format_desc->swizzle[1];
575
576      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
577             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
578
579      assert(depth->enabled || stencil[0].enabled);
580
581      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
582      assert(format_desc->block.width == 1);
583      assert(format_desc->block.height == 1);
584
585      if (stencil[0].enabled) {
586         assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
587                format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
588      }
589
590      assert(z_swizzle < 4);
591      assert(format_desc->block.bits == type.width);
592      if (type.floating) {
593         assert(z_swizzle == 0);
594         assert(format_desc->channel[z_swizzle].type ==
595                UTIL_FORMAT_TYPE_FLOAT);
596         assert(format_desc->channel[z_swizzle].size ==
597                format_desc->block.bits);
598      }
599      else {
600         assert(format_desc->channel[z_swizzle].type ==
601                UTIL_FORMAT_TYPE_UNSIGNED);
602         assert(format_desc->channel[z_swizzle].normalized);
603         assert(!type.fixed);
604         assert(!type.sign);
605         assert(type.norm);
606      }
607   }
608
609
610   /* Setup build context for Z vals */
611   lp_build_context_init(&bld, builder, type);
612
613   /* Setup build context for stencil vals */
614   s_type = lp_type_int_vec(type.width);
615   lp_build_context_init(&sbld, builder, s_type);
616
617   /* Load current z/stencil value from z/stencil buffer */
618   zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
619
620   lp_build_name(zs_dst, "zsbufval");
621
622
623   /* Compute and apply the Z/stencil bitmasks and shifts.
624    */
625   {
626      unsigned z_shift, z_mask;
627      unsigned s_shift, s_mask;
628
629      if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) {
630         if (z_shift) {
631            LLVMValueRef shift = lp_build_const_int_vec(type, z_shift);
632            z_src = LLVMBuildLShr(builder, z_src, shift, "");
633         }
634
635         if (z_mask != 0xffffffff) {
636            LLVMValueRef mask = lp_build_const_int_vec(type, z_mask);
637            z_src = LLVMBuildAnd(builder, z_src, mask, "");
638            z_dst = LLVMBuildAnd(builder, zs_dst, mask, "");
639            z_bitmask = mask;  /* used below */
640         }
641         else {
642            z_dst = zs_dst;
643         }
644
645         lp_build_name(z_dst, "zsbuf.z");
646      }
647
648      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
649         if (s_shift) {
650            LLVMValueRef shift = lp_build_const_int_vec(type, s_shift);
651            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
652            stencil_shift = shift;  /* used below */
653         }
654         else {
655            stencil_vals = zs_dst;
656         }
657
658         if (s_mask != 0xffffffff) {
659            LLVMValueRef mask = lp_build_const_int_vec(type, s_mask);
660            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
661         }
662
663         lp_build_name(stencil_vals, "stencil");
664      }
665   }
666
667   if (stencil[0].enabled) {
668
669      if (face) {
670         LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
671
672         /* front_facing = face > 0.0 ? ~0 : 0 */
673         front_facing = LLVMBuildFCmp(builder, LLVMRealUGT, face, zero, "");
674         front_facing = LLVMBuildSExt(builder, front_facing,
675                                      LLVMIntType(bld.type.length*bld.type.width),
676                                      "");
677         front_facing = LLVMBuildBitCast(builder, front_facing,
678                                         bld.int_vec_type, "");
679      }
680
681      /* convert scalar stencil refs into vectors */
682      stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
683      stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);
684
685      s_pass_mask = lp_build_stencil_test(&sbld, stencil,
686                                          stencil_refs, stencil_vals,
687                                          front_facing);
688
689      /* apply stencil-fail operator */
690      {
691         LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask);
692         stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP,
693                                            stencil_refs, stencil_vals,
694                                            s_fail_mask, front_facing);
695      }
696   }
697
698   if (depth->enabled) {
699      /* compare src Z to dst Z, returning 'pass' mask */
700      z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);
701
702      if (!stencil[0].enabled) {
703         /* We can potentially skip all remaining operations here, but only
704          * if stencil is disabled because we still need to update the stencil
705          * buffer values.  Don't need to update Z buffer values.
706          */
707         lp_build_mask_update(mask, z_pass);
708
709         if (do_branch) {
710            lp_build_mask_check(mask);
711            do_branch = FALSE;
712         }
713      }
714
715      if (depth->writemask) {
716         LLVMValueRef zselectmask = mask->value;
717
718         /* mask off bits that failed Z test */
719         zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, "");
720
721         /* mask off bits that failed stencil test */
722         if (s_pass_mask) {
723            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
724         }
725
726         /* if combined Z/stencil format, mask off the stencil bits */
727         if (z_bitmask) {
728            zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, "");
729         }
730
731         /* Mix the old and new Z buffer values.
732          * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i])
733          */
734         z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst);
735      }
736
737      if (stencil[0].enabled) {
738         /* update stencil buffer values according to z pass/fail result */
739         LLVMValueRef z_fail_mask, z_pass_mask;
740
741         /* apply Z-fail operator */
742         z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass);
743         stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP,
744                                            stencil_refs, stencil_vals,
745                                            z_fail_mask, front_facing);
746
747         /* apply Z-pass operator */
748         z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
749         stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
750                                            stencil_refs, stencil_vals,
751                                            z_pass_mask, front_facing);
752      }
753   }
754   else {
755      /* No depth test: apply Z-pass operator to stencil buffer values which
756       * passed the stencil test.
757       */
758      s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
759      stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
760                                         stencil_refs, stencil_vals,
761                                         s_pass_mask, front_facing);
762   }
763
764   /* The Z bits are already in the right place but we may need to shift the
765    * stencil bits before ORing Z with Stencil to make the final pixel value.
766    */
767   if (stencil_vals && stencil_shift)
768      stencil_vals = LLVMBuildShl(bld.builder, stencil_vals,
769                                  stencil_shift, "");
770
771   /* Finally, merge/store the z/stencil values */
772   if ((depth->enabled && depth->writemask) ||
773       (stencil[0].enabled && stencil[0].writemask)) {
774
775      if (z_dst && stencil_vals)
776         zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
777      else if (z_dst)
778         zs_dst = z_dst;
779      else
780         zs_dst = stencil_vals;
781
782      LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
783   }
784
785   if (s_pass_mask)
786      lp_build_mask_update(mask, s_pass_mask);
787
788   if (depth->enabled && stencil[0].enabled)
789      lp_build_mask_update(mask, z_pass);
790
791   if (do_branch)
792      lp_build_mask_check(mask);
793
794   if (counter)
795      lp_build_occlusion_count(builder, type, mask->value, counter);
796}
797