lp_bld_depth.c revision 33abbd4fbdb3149df5ecc296b04a79225962e433
1/**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Depth/stencil testing to LLVM IR translation.
31 *
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
37 * artifacts.
38 *
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
42 *
43 *  Z11 Z12 Z13 Z14 ...
44 *  Z21 Z22 Z23 Z24 ...
45 *  Z31 Z32 Z33 Z34 ...
46 *  Z41 Z42 Z43 Z44 ...
47 *  ... ... ... ... ...
48 *
49 * will actually be stored in memory as
50 *
51 *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 *  ... ... ... ... ... ... ... ... ...
54 *
55 *
56 * @author Jose Fonseca <jfonseca@vmware.com>
57 * @author Brian Paul <jfonseca@vmware.com>
58 */
59
60#include "pipe/p_state.h"
61#include "util/u_format.h"
62
63#include "gallivm/lp_bld_type.h"
64#include "gallivm/lp_bld_arit.h"
65#include "gallivm/lp_bld_bitarit.h"
66#include "gallivm/lp_bld_const.h"
67#include "gallivm/lp_bld_conv.h"
68#include "gallivm/lp_bld_logic.h"
69#include "gallivm/lp_bld_flow.h"
70#include "gallivm/lp_bld_intr.h"
71#include "gallivm/lp_bld_debug.h"
72#include "gallivm/lp_bld_swizzle.h"
73
74#include "lp_bld_depth.h"
75
76
77/** Used to select fields from pipe_stencil_state */
78enum stencil_op {
79   S_FAIL_OP,
80   Z_FAIL_OP,
81   Z_PASS_OP
82};
83
84
85
86/**
87 * Do the stencil test comparison (compare FB stencil values against ref value).
88 * This will be used twice when generating two-sided stencil code.
89 * \param stencil  the front/back stencil state
90 * \param stencilRef  the stencil reference value, replicated as a vector
91 * \param stencilVals  vector of stencil values from framebuffer
92 * \return vector mask of pass/fail values (~0 or 0)
93 */
94static LLVMValueRef
95lp_build_stencil_test_single(struct lp_build_context *bld,
96                             const struct pipe_stencil_state *stencil,
97                             LLVMValueRef stencilRef,
98                             LLVMValueRef stencilVals)
99{
100   LLVMBuilderRef builder = bld->gallivm->builder;
101   const unsigned stencilMax = 255; /* XXX fix */
102   struct lp_type type = bld->type;
103   LLVMValueRef res;
104
105   assert(type.sign);
106
107   assert(stencil->enabled);
108
109   if (stencil->valuemask != stencilMax) {
110      /* compute stencilRef = stencilRef & valuemask */
111      LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
112      stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
113      /* compute stencilVals = stencilVals & valuemask */
114      stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
115   }
116
117   res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
118
119   return res;
120}
121
122
123/**
124 * Do the one or two-sided stencil test comparison.
125 * \sa lp_build_stencil_test_single
126 * \param front_facing  an integer vector mask, indicating front (~0) or back
127 *                      (0) facing polygon. If NULL, assume front-facing.
128 */
129static LLVMValueRef
130lp_build_stencil_test(struct lp_build_context *bld,
131                      const struct pipe_stencil_state stencil[2],
132                      LLVMValueRef stencilRefs[2],
133                      LLVMValueRef stencilVals,
134                      LLVMValueRef front_facing)
135{
136   LLVMValueRef res;
137
138   assert(stencil[0].enabled);
139
140   /* do front face test */
141   res = lp_build_stencil_test_single(bld, &stencil[0],
142                                      stencilRefs[0], stencilVals);
143
144   if (stencil[1].enabled && front_facing != NULL) {
145      /* do back face test */
146      LLVMValueRef back_res;
147
148      back_res = lp_build_stencil_test_single(bld, &stencil[1],
149                                              stencilRefs[1], stencilVals);
150
151      res = lp_build_select(bld, front_facing, res, back_res);
152   }
153
154   return res;
155}
156
157
158/**
159 * Apply the stencil operator (add/sub/keep/etc) to the given vector
160 * of stencil values.
161 * \return  new stencil values vector
162 */
163static LLVMValueRef
164lp_build_stencil_op_single(struct lp_build_context *bld,
165                           const struct pipe_stencil_state *stencil,
166                           enum stencil_op op,
167                           LLVMValueRef stencilRef,
168                           LLVMValueRef stencilVals)
169
170{
171   LLVMBuilderRef builder = bld->gallivm->builder;
172   struct lp_type type = bld->type;
173   LLVMValueRef res;
174   LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
175   unsigned stencil_op;
176
177   assert(type.sign);
178
179   switch (op) {
180   case S_FAIL_OP:
181      stencil_op = stencil->fail_op;
182      break;
183   case Z_FAIL_OP:
184      stencil_op = stencil->zfail_op;
185      break;
186   case Z_PASS_OP:
187      stencil_op = stencil->zpass_op;
188      break;
189   default:
190      assert(0 && "Invalid stencil_op mode");
191      stencil_op = PIPE_STENCIL_OP_KEEP;
192   }
193
194   switch (stencil_op) {
195   case PIPE_STENCIL_OP_KEEP:
196      res = stencilVals;
197      /* we can return early for this case */
198      return res;
199   case PIPE_STENCIL_OP_ZERO:
200      res = bld->zero;
201      break;
202   case PIPE_STENCIL_OP_REPLACE:
203      res = stencilRef;
204      break;
205   case PIPE_STENCIL_OP_INCR:
206      res = lp_build_add(bld, stencilVals, bld->one);
207      res = lp_build_min(bld, res, max);
208      break;
209   case PIPE_STENCIL_OP_DECR:
210      res = lp_build_sub(bld, stencilVals, bld->one);
211      res = lp_build_max(bld, res, bld->zero);
212      break;
213   case PIPE_STENCIL_OP_INCR_WRAP:
214      res = lp_build_add(bld, stencilVals, bld->one);
215      res = LLVMBuildAnd(builder, res, max, "");
216      break;
217   case PIPE_STENCIL_OP_DECR_WRAP:
218      res = lp_build_sub(bld, stencilVals, bld->one);
219      res = LLVMBuildAnd(builder, res, max, "");
220      break;
221   case PIPE_STENCIL_OP_INVERT:
222      res = LLVMBuildNot(builder, stencilVals, "");
223      res = LLVMBuildAnd(builder, res, max, "");
224      break;
225   default:
226      assert(0 && "bad stencil op mode");
227      res = bld->undef;
228   }
229
230   return res;
231}
232
233
234/**
235 * Do the one or two-sided stencil test op/update.
236 */
237static LLVMValueRef
238lp_build_stencil_op(struct lp_build_context *bld,
239                    const struct pipe_stencil_state stencil[2],
240                    enum stencil_op op,
241                    LLVMValueRef stencilRefs[2],
242                    LLVMValueRef stencilVals,
243                    LLVMValueRef mask,
244                    LLVMValueRef front_facing)
245
246{
247   LLVMBuilderRef builder = bld->gallivm->builder;
248   LLVMValueRef res;
249
250   assert(stencil[0].enabled);
251
252   /* do front face op */
253   res = lp_build_stencil_op_single(bld, &stencil[0], op,
254                                     stencilRefs[0], stencilVals);
255
256   if (stencil[1].enabled && front_facing != NULL) {
257      /* do back face op */
258      LLVMValueRef back_res;
259
260      back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
261                                            stencilRefs[1], stencilVals);
262
263      res = lp_build_select(bld, front_facing, res, back_res);
264   }
265
266   /* XXX what about the back-face writemask? */
267   if (stencil[0].writemask != 0xff) {
268      /* mask &= stencil[0].writemask */
269      LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
270                                                      stencil[0].writemask);
271      mask = LLVMBuildAnd(builder, mask, writemask, "");
272      /* res = (res & mask) | (stencilVals & ~mask) */
273      res = lp_build_select_bitwise(bld, mask, res, stencilVals);
274   }
275   else {
276      /* res = mask ? res : stencilVals */
277      res = lp_build_select(bld, mask, res, stencilVals);
278   }
279
280   return res;
281}
282
283
284
285/**
286 * Return a type appropriate for depth/stencil testing.
287 */
288struct lp_type
289lp_depth_type(const struct util_format_description *format_desc,
290              unsigned length)
291{
292   struct lp_type type;
293   unsigned swizzle;
294
295   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
296   assert(format_desc->block.width == 1);
297   assert(format_desc->block.height == 1);
298
299   swizzle = format_desc->swizzle[0];
300   assert(swizzle < 4);
301
302   memset(&type, 0, sizeof type);
303   type.width = format_desc->block.bits;
304
305   if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
306      type.floating = TRUE;
307      assert(swizzle == 0);
308      assert(format_desc->channel[swizzle].size == format_desc->block.bits);
309   }
310   else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
311      assert(format_desc->block.bits <= 32);
312      assert(format_desc->channel[swizzle].normalized);
313      if (format_desc->channel[swizzle].size < format_desc->block.bits) {
314         /* Prefer signed integers when possible, as SSE has less support
315          * for unsigned comparison;
316          */
317         type.sign = TRUE;
318      }
319   }
320   else
321      assert(0);
322
323   assert(type.width <= length);
324   type.length = length / type.width;
325
326   return type;
327}
328
329
330/**
331 * Compute bitmask and bit shift to apply to the incoming fragment Z values
332 * and the Z buffer values needed before doing the Z comparison.
333 *
334 * Note that we leave the Z bits in the position that we find them
335 * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
336 * get by with fewer bit twiddling steps.
337 */
338static boolean
339get_z_shift_and_mask(const struct util_format_description *format_desc,
340                     unsigned *shift, unsigned *width, unsigned *mask)
341{
342   const unsigned total_bits = format_desc->block.bits;
343   unsigned z_swizzle;
344   unsigned chan;
345   unsigned padding_left, padding_right;
346
347   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
348   assert(format_desc->block.width == 1);
349   assert(format_desc->block.height == 1);
350
351   z_swizzle = format_desc->swizzle[0];
352
353   if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
354      return FALSE;
355
356   *width = format_desc->channel[z_swizzle].size;
357
358   padding_right = 0;
359   for (chan = 0; chan < z_swizzle; ++chan)
360      padding_right += format_desc->channel[chan].size;
361
362   padding_left =
363      total_bits - (padding_right + *width);
364
365   if (padding_left || padding_right) {
366      unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
367      unsigned long long mask_right = (1ULL << (padding_right)) - 1;
368      *mask = mask_left ^ mask_right;
369   }
370   else {
371      *mask = 0xffffffff;
372   }
373
374   *shift = padding_right;
375
376   return TRUE;
377}
378
379
380/**
381 * Compute bitmask and bit shift to apply to the framebuffer pixel values
382 * to put the stencil bits in the least significant position.
383 * (i.e. 0x000000ff)
384 */
385static boolean
386get_s_shift_and_mask(const struct util_format_description *format_desc,
387                     unsigned *shift, unsigned *mask)
388{
389   unsigned s_swizzle;
390   unsigned chan, sz;
391
392   s_swizzle = format_desc->swizzle[1];
393
394   if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
395      return FALSE;
396
397   *shift = 0;
398   for (chan = 0; chan < s_swizzle; chan++)
399      *shift += format_desc->channel[chan].size;
400
401   sz = format_desc->channel[s_swizzle].size;
402   *mask = (1U << sz) - 1U;
403
404   return TRUE;
405}
406
407
408/**
409 * Perform the occlusion test and increase the counter.
410 * Test the depth mask. Add the number of channel which has none zero mask
411 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
412 * The counter will add 4.
413 *
414 * \param type holds element type of the mask vector.
415 * \param maskvalue is the depth test mask.
416 * \param counter is a pointer of the uint32 counter.
417 */
418void
419lp_build_occlusion_count(struct gallivm_state *gallivm,
420                         struct lp_type type,
421                         LLVMValueRef maskvalue,
422                         LLVMValueRef counter)
423{
424   LLVMBuilderRef builder = gallivm->builder;
425   LLVMContextRef context = gallivm->context;
426   LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
427   LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
428   LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8TypeInContext(context), 16);
429   LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
430   LLVMValueRef maskarray[4] = {
431      lp_build_const_int32(gallivm, 0),
432      lp_build_const_int32(gallivm, 4),
433      lp_build_const_int32(gallivm, 8),
434      lp_build_const_int32(gallivm, 12)
435   };
436   LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
437   LLVMValueRef shufflev =  LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
438   LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32TypeInContext(context), "shuffle");
439   LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32TypeInContext(context), shuffle);
440   LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
441   LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
442   LLVMBuildStore(builder, incr, counter);
443}
444
445
446
447/**
448 * Generate code for performing depth and/or stencil tests.
449 * We operate on a vector of values (typically a 2x2 quad).
450 *
451 * \param depth  the depth test state
452 * \param stencil  the front/back stencil state
453 * \param type  the data type of the fragment depth/stencil values
454 * \param format_desc  description of the depth/stencil surface
455 * \param mask  the alive/dead pixel mask for the quad (vector)
456 * \param stencil_refs  the front/back stencil ref values (scalar)
457 * \param z_src  the incoming depth/stencil values (a 2x2 quad, float32)
458 * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
459 * \param facing  contains boolean value indicating front/back facing polygon
460 */
461void
462lp_build_depth_stencil_test(struct gallivm_state *gallivm,
463                            const struct pipe_depth_state *depth,
464                            const struct pipe_stencil_state stencil[2],
465                            struct lp_type z_src_type,
466                            const struct util_format_description *format_desc,
467                            struct lp_build_mask_context *mask,
468                            LLVMValueRef stencil_refs[2],
469                            LLVMValueRef z_src,
470                            LLVMValueRef zs_dst_ptr,
471                            LLVMValueRef face,
472                            LLVMValueRef *zs_value,
473                            boolean do_branch)
474{
475   LLVMBuilderRef builder = gallivm->builder;
476   struct lp_type z_type;
477   struct lp_build_context z_bld;
478   struct lp_build_context s_bld;
479   struct lp_type s_type;
480   unsigned z_shift = 0, z_width = 0, z_mask = 0;
481   LLVMValueRef zs_dst, z_dst = NULL;
482   LLVMValueRef stencil_vals = NULL;
483   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
484   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
485   LLVMValueRef orig_mask = lp_build_mask_value(mask);
486   LLVMValueRef front_facing = NULL;
487
488
489   /*
490    * Depths are expected to be between 0 and 1, even if they are stored in
491    * floats. Setting these bits here will ensure that the lp_build_conv() call
492    * below won't try to unnecessarily clamp the incoming values.
493    */
494   if(z_src_type.floating) {
495      z_src_type.sign = FALSE;
496      z_src_type.norm = TRUE;
497   }
498   else {
499      assert(!z_src_type.sign);
500      assert(z_src_type.norm);
501   }
502
503   /* Pick the depth type. */
504   z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
505
506   /* FIXME: Cope with a depth test type with a different bit width. */
507   assert(z_type.width == z_src_type.width);
508   assert(z_type.length == z_src_type.length);
509
510   /* Sanity checking */
511   {
512      const unsigned z_swizzle = format_desc->swizzle[0];
513      const unsigned s_swizzle = format_desc->swizzle[1];
514
515      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
516             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
517
518      assert(depth->enabled || stencil[0].enabled);
519
520      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
521      assert(format_desc->block.width == 1);
522      assert(format_desc->block.height == 1);
523
524      if (stencil[0].enabled) {
525         assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
526                format_desc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM);
527      }
528
529      assert(z_swizzle < 4);
530      assert(format_desc->block.bits == z_type.width);
531      if (z_type.floating) {
532         assert(z_swizzle == 0);
533         assert(format_desc->channel[z_swizzle].type ==
534                UTIL_FORMAT_TYPE_FLOAT);
535         assert(format_desc->channel[z_swizzle].size ==
536                format_desc->block.bits);
537      }
538      else {
539         assert(format_desc->channel[z_swizzle].type ==
540                UTIL_FORMAT_TYPE_UNSIGNED);
541         assert(format_desc->channel[z_swizzle].normalized);
542         assert(!z_type.fixed);
543      }
544   }
545
546
547   /* Setup build context for Z vals */
548   lp_build_context_init(&z_bld, gallivm, z_type);
549
550   /* Setup build context for stencil vals */
551   s_type = lp_type_int_vec(z_type.width);
552   lp_build_context_init(&s_bld, gallivm, s_type);
553
554   /* Load current z/stencil value from z/stencil buffer */
555   zs_dst_ptr = LLVMBuildBitCast(builder,
556                                 zs_dst_ptr,
557                                 LLVMPointerType(z_bld.vec_type, 0), "");
558   zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
559
560   lp_build_name(zs_dst, "zs_dst");
561
562
563   /* Compute and apply the Z/stencil bitmasks and shifts.
564    */
565   {
566      unsigned s_shift, s_mask;
567
568      if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
569         if (z_mask != 0xffffffff) {
570            z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
571         }
572
573         /*
574          * Align the framebuffer Z 's LSB to the right.
575          */
576         if (z_shift) {
577            LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
578            z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
579         } else if (z_bitmask) {
580	    /* TODO: Instead of loading a mask from memory and ANDing, it's
581	     * probably faster to just shake the bits with two shifts. */
582            z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
583         } else {
584            z_dst = zs_dst;
585            lp_build_name(z_dst, "z_dst");
586         }
587      }
588
589      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
590         if (s_shift) {
591            LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
592            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
593            stencil_shift = shift;  /* used below */
594         }
595         else {
596            stencil_vals = zs_dst;
597         }
598
599         if (s_mask != 0xffffffff) {
600            LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
601            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
602         }
603
604         lp_build_name(stencil_vals, "s_dst");
605      }
606   }
607
608   if (stencil[0].enabled) {
609
610      if (face) {
611         LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
612
613         /* front_facing = face != 0 ? ~0 : 0 */
614         front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
615         front_facing = LLVMBuildSExt(builder, front_facing,
616                                      LLVMIntTypeInContext(gallivm->context,
617                                             s_bld.type.length*s_bld.type.width),
618                                      "");
619         front_facing = LLVMBuildBitCast(builder, front_facing,
620                                         s_bld.int_vec_type, "");
621      }
622
623      /* convert scalar stencil refs into vectors */
624      stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
625      stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
626
627      s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
628                                          stencil_refs, stencil_vals,
629                                          front_facing);
630
631      /* apply stencil-fail operator */
632      {
633         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
634         stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
635                                            stencil_refs, stencil_vals,
636                                            s_fail_mask, front_facing);
637      }
638   }
639
640   if (depth->enabled) {
641      /*
642       * Convert fragment Z to the desired type, aligning the LSB to the right.
643       */
644
645      assert(z_type.width == z_src_type.width);
646      assert(z_type.length == z_src_type.length);
647      assert(lp_check_value(z_src_type, z_src));
648      if (z_src_type.floating) {
649         /*
650          * Convert from floating point values
651          */
652
653         if (!z_type.floating) {
654            z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
655                                                            z_src_type,
656                                                            z_width,
657                                                            z_src);
658         }
659      } else {
660         /*
661          * Convert from unsigned normalized values.
662          */
663
664         assert(!z_src_type.sign);
665         assert(!z_src_type.fixed);
666         assert(z_src_type.norm);
667         assert(!z_type.floating);
668         if (z_src_type.width > z_width) {
669            LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
670                                                        z_src_type.width - z_width);
671            z_src = LLVMBuildLShr(builder, z_src, shift, "");
672         }
673      }
674      assert(lp_check_value(z_type, z_src));
675
676      lp_build_name(z_src, "z_src");
677
678      /* compare src Z to dst Z, returning 'pass' mask */
679      z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
680
681      if (!stencil[0].enabled) {
682         /* We can potentially skip all remaining operations here, but only
683          * if stencil is disabled because we still need to update the stencil
684          * buffer values.  Don't need to update Z buffer values.
685          */
686         lp_build_mask_update(mask, z_pass);
687
688         if (do_branch) {
689            lp_build_mask_check(mask);
690            do_branch = FALSE;
691         }
692      }
693
694      if (depth->writemask) {
695         LLVMValueRef zselectmask;
696
697         /* mask off bits that failed Z test */
698         zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
699
700         /* mask off bits that failed stencil test */
701         if (s_pass_mask) {
702            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
703         }
704
705         /* Mix the old and new Z buffer values.
706          * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
707          */
708         z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
709      }
710
711      if (stencil[0].enabled) {
712         /* update stencil buffer values according to z pass/fail result */
713         LLVMValueRef z_fail_mask, z_pass_mask;
714
715         /* apply Z-fail operator */
716         z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
717         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
718                                            stencil_refs, stencil_vals,
719                                            z_fail_mask, front_facing);
720
721         /* apply Z-pass operator */
722         z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
723         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
724                                            stencil_refs, stencil_vals,
725                                            z_pass_mask, front_facing);
726      }
727   }
728   else {
729      /* No depth test: apply Z-pass operator to stencil buffer values which
730       * passed the stencil test.
731       */
732      s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
733      stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
734                                         stencil_refs, stencil_vals,
735                                         s_pass_mask, front_facing);
736   }
737
738   /* Put Z and ztencil bits in the right place */
739   if (z_dst && z_shift) {
740      LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
741      z_dst = LLVMBuildShl(builder, z_dst, shift, "");
742   }
743   if (stencil_vals && stencil_shift)
744      stencil_vals = LLVMBuildShl(builder, stencil_vals,
745                                  stencil_shift, "");
746
747   /* Finally, merge/store the z/stencil values */
748   if ((depth->enabled && depth->writemask) ||
749       (stencil[0].enabled && stencil[0].writemask)) {
750
751      if (z_dst && stencil_vals)
752         zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
753      else if (z_dst)
754         zs_dst = z_dst;
755      else
756         zs_dst = stencil_vals;
757
758      *zs_value = zs_dst;
759   }
760
761   if (s_pass_mask)
762      lp_build_mask_update(mask, s_pass_mask);
763
764   if (depth->enabled && stencil[0].enabled)
765      lp_build_mask_update(mask, z_pass);
766
767   if (do_branch)
768      lp_build_mask_check(mask);
769
770}
771
772
773void
774lp_build_depth_write(LLVMBuilderRef builder,
775                     const struct util_format_description *format_desc,
776                     LLVMValueRef zs_dst_ptr,
777                     LLVMValueRef zs_value)
778{
779   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
780                                 LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
781
782   LLVMBuildStore(builder, zs_value, zs_dst_ptr);
783}
784
785
786void
787lp_build_deferred_depth_write(struct gallivm_state *gallivm,
788                              struct lp_type z_src_type,
789                              const struct util_format_description *format_desc,
790                              struct lp_build_mask_context *mask,
791                              LLVMValueRef zs_dst_ptr,
792                              LLVMValueRef zs_value)
793{
794   struct lp_type z_type;
795   struct lp_build_context z_bld;
796   LLVMValueRef z_dst;
797   LLVMBuilderRef builder = gallivm->builder;
798
799   /* XXX: pointlessly redo type logic:
800    */
801   z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
802   lp_build_context_init(&z_bld, gallivm, z_type);
803
804   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
805                                 LLVMPointerType(z_bld.vec_type, 0), "");
806
807   z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
808   z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
809
810   LLVMBuildStore(builder, z_dst, zs_dst_ptr);
811}
812