lp_bld_depth.c revision 0a1c9001037a13b69b157994e7983aa3dee158d3
1/**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Depth/stencil testing to LLVM IR translation.
31 *
32 * To be done accurately/efficiently the depth/stencil test must be done with
33 * the same type/format of the depth/stencil buffer, which implies massaging
34 * the incoming depths to fit into place. Using a more straightforward
35 * type/format for depth/stencil values internally and only convert when
36 * flushing would avoid this, but it would most likely result in depth fighting
37 * artifacts.
38 *
39 * We are free to use a different pixel layout though. Since our basic
40 * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41 * values tiled, a quad at time. That is, a depth buffer containing
42 *
43 *  Z11 Z12 Z13 Z14 ...
44 *  Z21 Z22 Z23 Z24 ...
45 *  Z31 Z32 Z33 Z34 ...
46 *  Z41 Z42 Z43 Z44 ...
47 *  ... ... ... ... ...
48 *
49 * will actually be stored in memory as
50 *
51 *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52 *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53 *  ... ... ... ... ... ... ... ... ...
54 *
55 *
56 * @author Jose Fonseca <jfonseca@vmware.com>
57 * @author Brian Paul <jfonseca@vmware.com>
58 */
59
60#include "pipe/p_state.h"
61#include "util/u_format.h"
62
63#include "gallivm/lp_bld_type.h"
64#include "gallivm/lp_bld_arit.h"
65#include "gallivm/lp_bld_bitarit.h"
66#include "gallivm/lp_bld_const.h"
67#include "gallivm/lp_bld_conv.h"
68#include "gallivm/lp_bld_logic.h"
69#include "gallivm/lp_bld_flow.h"
70#include "gallivm/lp_bld_intr.h"
71#include "gallivm/lp_bld_debug.h"
72#include "gallivm/lp_bld_swizzle.h"
73
74#include "lp_bld_depth.h"
75
76
77/** Used to select fields from pipe_stencil_state */
78enum stencil_op {
79   S_FAIL_OP,
80   Z_FAIL_OP,
81   Z_PASS_OP
82};
83
84
85
86/**
87 * Do the stencil test comparison (compare FB stencil values against ref value).
88 * This will be used twice when generating two-sided stencil code.
89 * \param stencil  the front/back stencil state
90 * \param stencilRef  the stencil reference value, replicated as a vector
91 * \param stencilVals  vector of stencil values from framebuffer
92 * \return vector mask of pass/fail values (~0 or 0)
93 */
94static LLVMValueRef
95lp_build_stencil_test_single(struct lp_build_context *bld,
96                             const struct pipe_stencil_state *stencil,
97                             LLVMValueRef stencilRef,
98                             LLVMValueRef stencilVals)
99{
100   const unsigned stencilMax = 255; /* XXX fix */
101   struct lp_type type = bld->type;
102   LLVMValueRef res;
103
104   assert(type.sign);
105
106   assert(stencil->enabled);
107
108   if (stencil->valuemask != stencilMax) {
109      /* compute stencilRef = stencilRef & valuemask */
110      LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
111      stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
112      /* compute stencilVals = stencilVals & valuemask */
113      stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
114   }
115
116   res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
117
118   return res;
119}
120
121
122/**
123 * Do the one or two-sided stencil test comparison.
124 * \sa lp_build_stencil_test_single
125 * \param front_facing  an integer vector mask, indicating front (~0) or back
126 *                      (0) facing polygon. If NULL, assume front-facing.
127 */
128static LLVMValueRef
129lp_build_stencil_test(struct lp_build_context *bld,
130                      const struct pipe_stencil_state stencil[2],
131                      LLVMValueRef stencilRefs[2],
132                      LLVMValueRef stencilVals,
133                      LLVMValueRef front_facing)
134{
135   LLVMValueRef res;
136
137   assert(stencil[0].enabled);
138
139   /* do front face test */
140   res = lp_build_stencil_test_single(bld, &stencil[0],
141                                      stencilRefs[0], stencilVals);
142
143   if (stencil[1].enabled && front_facing) {
144      /* do back face test */
145      LLVMValueRef back_res;
146
147      back_res = lp_build_stencil_test_single(bld, &stencil[1],
148                                              stencilRefs[1], stencilVals);
149
150      res = lp_build_select(bld, front_facing, res, back_res);
151   }
152
153   return res;
154}
155
156
157/**
158 * Apply the stencil operator (add/sub/keep/etc) to the given vector
159 * of stencil values.
160 * \return  new stencil values vector
161 */
162static LLVMValueRef
163lp_build_stencil_op_single(struct lp_build_context *bld,
164                           const struct pipe_stencil_state *stencil,
165                           enum stencil_op op,
166                           LLVMValueRef stencilRef,
167                           LLVMValueRef stencilVals)
168
169{
170   struct lp_type type = bld->type;
171   LLVMValueRef res;
172   LLVMValueRef max = lp_build_const_int_vec(type, 0xff);
173   unsigned stencil_op;
174
175   assert(type.sign);
176
177   switch (op) {
178   case S_FAIL_OP:
179      stencil_op = stencil->fail_op;
180      break;
181   case Z_FAIL_OP:
182      stencil_op = stencil->zfail_op;
183      break;
184   case Z_PASS_OP:
185      stencil_op = stencil->zpass_op;
186      break;
187   default:
188      assert(0 && "Invalid stencil_op mode");
189      stencil_op = PIPE_STENCIL_OP_KEEP;
190   }
191
192   switch (stencil_op) {
193   case PIPE_STENCIL_OP_KEEP:
194      res = stencilVals;
195      /* we can return early for this case */
196      return res;
197   case PIPE_STENCIL_OP_ZERO:
198      res = bld->zero;
199      break;
200   case PIPE_STENCIL_OP_REPLACE:
201      res = stencilRef;
202      break;
203   case PIPE_STENCIL_OP_INCR:
204      res = lp_build_add(bld, stencilVals, bld->one);
205      res = lp_build_min(bld, res, max);
206      break;
207   case PIPE_STENCIL_OP_DECR:
208      res = lp_build_sub(bld, stencilVals, bld->one);
209      res = lp_build_max(bld, res, bld->zero);
210      break;
211   case PIPE_STENCIL_OP_INCR_WRAP:
212      res = lp_build_add(bld, stencilVals, bld->one);
213      res = LLVMBuildAnd(bld->builder, res, max, "");
214      break;
215   case PIPE_STENCIL_OP_DECR_WRAP:
216      res = lp_build_sub(bld, stencilVals, bld->one);
217      res = LLVMBuildAnd(bld->builder, res, max, "");
218      break;
219   case PIPE_STENCIL_OP_INVERT:
220      res = LLVMBuildNot(bld->builder, stencilVals, "");
221      res = LLVMBuildAnd(bld->builder, res, max, "");
222      break;
223   default:
224      assert(0 && "bad stencil op mode");
225      res = bld->undef;
226   }
227
228   return res;
229}
230
231
232/**
233 * Do the one or two-sided stencil test op/update.
234 */
235static LLVMValueRef
236lp_build_stencil_op(struct lp_build_context *bld,
237                    const struct pipe_stencil_state stencil[2],
238                    enum stencil_op op,
239                    LLVMValueRef stencilRefs[2],
240                    LLVMValueRef stencilVals,
241                    LLVMValueRef mask,
242                    LLVMValueRef front_facing)
243
244{
245   LLVMValueRef res;
246
247   assert(stencil[0].enabled);
248
249   /* do front face op */
250   res = lp_build_stencil_op_single(bld, &stencil[0], op,
251                                     stencilRefs[0], stencilVals);
252
253   if (stencil[1].enabled && front_facing) {
254      /* do back face op */
255      LLVMValueRef back_res;
256
257      back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
258                                            stencilRefs[1], stencilVals);
259
260      res = lp_build_select(bld, front_facing, res, back_res);
261   }
262
263   if (stencil->writemask != 0xff) {
264      /* mask &= stencil->writemask */
265      LLVMValueRef writemask = lp_build_const_int_vec(bld->type, stencil->writemask);
266      mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
267      /* res = (res & mask) | (stencilVals & ~mask) */
268      res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
269   }
270   else {
271      /* res = mask ? res : stencilVals */
272      res = lp_build_select(bld, mask, res, stencilVals);
273   }
274
275   return res;
276}
277
278
279
280/**
281 * Return a type appropriate for depth/stencil testing.
282 */
283struct lp_type
284lp_depth_type(const struct util_format_description *format_desc,
285              unsigned length)
286{
287   struct lp_type type;
288   unsigned swizzle;
289
290   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
291   assert(format_desc->block.width == 1);
292   assert(format_desc->block.height == 1);
293
294   swizzle = format_desc->swizzle[0];
295   assert(swizzle < 4);
296
297   memset(&type, 0, sizeof type);
298   type.width = format_desc->block.bits;
299
300   if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
301      type.floating = TRUE;
302      assert(swizzle == 0);
303      assert(format_desc->channel[swizzle].size == format_desc->block.bits);
304   }
305   else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
306      assert(format_desc->block.bits <= 32);
307      assert(format_desc->channel[swizzle].normalized);
308      if (format_desc->channel[swizzle].size < format_desc->block.bits) {
309         /* Prefer signed integers when possible, as SSE has less support
310          * for unsigned comparison;
311          */
312         type.sign = TRUE;
313      }
314   }
315   else
316      assert(0);
317
318   assert(type.width <= length);
319   type.length = length / type.width;
320
321   return type;
322}
323
324
325/**
326 * Compute bitmask and bit shift to apply to the incoming fragment Z values
327 * and the Z buffer values needed before doing the Z comparison.
328 *
329 * Note that we leave the Z bits in the position that we find them
330 * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
331 * get by with fewer bit twiddling steps.
332 */
333static void
334get_z_shift_and_mask(const struct util_format_description *format_desc,
335                     unsigned *shift, unsigned *width, unsigned *mask)
336{
337   const unsigned total_bits = format_desc->block.bits;
338   unsigned z_swizzle;
339   unsigned chan;
340   unsigned padding_left, padding_right;
341
342   assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
343   assert(format_desc->block.width == 1);
344   assert(format_desc->block.height == 1);
345
346   z_swizzle = format_desc->swizzle[0];
347
348   assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
349
350   *width = format_desc->channel[z_swizzle].size;
351
352   padding_right = 0;
353   for (chan = 0; chan < z_swizzle; ++chan)
354      padding_right += format_desc->channel[chan].size;
355
356   padding_left =
357      total_bits - (padding_right + *width);
358
359   if (padding_left || padding_right) {
360      unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
361      unsigned long long mask_right = (1ULL << (padding_right)) - 1;
362      *mask = mask_left ^ mask_right;
363   }
364   else {
365      *mask = 0xffffffff;
366   }
367
368   *shift = padding_right;
369}
370
371
372/**
373 * Compute bitmask and bit shift to apply to the framebuffer pixel values
374 * to put the stencil bits in the least significant position.
375 * (i.e. 0x000000ff)
376 */
377static boolean
378get_s_shift_and_mask(const struct util_format_description *format_desc,
379                     unsigned *shift, unsigned *mask)
380{
381   unsigned s_swizzle;
382   unsigned chan, sz;
383
384   s_swizzle = format_desc->swizzle[1];
385
386   if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
387      return FALSE;
388
389   *shift = 0;
390   for (chan = 0; chan < s_swizzle; chan++)
391      *shift += format_desc->channel[chan].size;
392
393   sz = format_desc->channel[s_swizzle].size;
394   *mask = (1U << sz) - 1U;
395
396   return TRUE;
397}
398
399
400/**
401 * Perform the occlusion test and increase the counter.
402 * Test the depth mask. Add the number of channel which has none zero mask
403 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
404 * The counter will add 4.
405 *
406 * \param type holds element type of the mask vector.
407 * \param maskvalue is the depth test mask.
408 * \param counter is a pointer of the uint32 counter.
409 */
410void
411lp_build_occlusion_count(LLVMBuilderRef builder,
412                         struct lp_type type,
413                         LLVMValueRef maskvalue,
414                         LLVMValueRef counter)
415{
416   LLVMValueRef countmask = lp_build_const_int_vec(type, 1);
417   LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
418   LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16);
419   LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
420   LLVMValueRef maskarray[4] = {
421      LLVMConstInt(LLVMInt32Type(), 0, 0),
422      LLVMConstInt(LLVMInt32Type(), 4, 0),
423      LLVMConstInt(LLVMInt32Type(), 8, 0),
424      LLVMConstInt(LLVMInt32Type(), 12, 0),
425   };
426   LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
427   LLVMValueRef shufflev =  LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
428   LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle");
429   LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle);
430   LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
431   LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
432   LLVMBuildStore(builder, incr, counter);
433}
434
435
436
437/**
438 * Generate code for performing depth and/or stencil tests.
439 * We operate on a vector of values (typically a 2x2 quad).
440 *
441 * \param depth  the depth test state
442 * \param stencil  the front/back stencil state
443 * \param type  the data type of the fragment depth/stencil values
444 * \param format_desc  description of the depth/stencil surface
445 * \param mask  the alive/dead pixel mask for the quad (vector)
446 * \param stencil_refs  the front/back stencil ref values (scalar)
447 * \param z_src  the incoming depth/stencil values (a 2x2 quad, float32)
448 * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
449 * \param facing  contains boolean value indicating front/back facing polygon
450 */
451void
452lp_build_depth_stencil_test(LLVMBuilderRef builder,
453                            const struct pipe_depth_state *depth,
454                            const struct pipe_stencil_state stencil[2],
455                            struct lp_type z_src_type,
456                            const struct util_format_description *format_desc,
457                            struct lp_build_mask_context *mask,
458                            LLVMValueRef stencil_refs[2],
459                            LLVMValueRef z_src,
460                            LLVMValueRef zs_dst_ptr,
461                            LLVMValueRef face,
462                            LLVMValueRef *zs_value,
463                            boolean do_branch)
464{
465   struct lp_type z_type;
466   struct lp_build_context z_bld;
467   struct lp_build_context s_bld;
468   struct lp_type s_type;
469   unsigned z_shift, z_width, z_mask;
470   LLVMValueRef zs_dst, z_dst = NULL;
471   LLVMValueRef stencil_vals = NULL;
472   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
473   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
474   LLVMValueRef orig_mask = lp_build_mask_value(mask);
475   LLVMValueRef front_facing = NULL;
476
477
478   /*
479    * Depths are expected to be between 0 and 1, even if they are stored in
480    * floats. Setting these bits here will ensure that the lp_build_conv() call
481    * below won't try to unnecessarily clamp the incoming values.
482    */
483   if(z_src_type.floating) {
484      z_src_type.sign = FALSE;
485      z_src_type.norm = TRUE;
486   }
487   else {
488      assert(!z_src_type.sign);
489      assert(z_src_type.norm);
490   }
491
492   /* Pick the depth type. */
493   z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
494
495   /* FIXME: Cope with a depth test type with a different bit width. */
496   assert(z_type.width == z_src_type.width);
497   assert(z_type.length == z_src_type.length);
498
499   /* Sanity checking */
500   {
501      const unsigned z_swizzle = format_desc->swizzle[0];
502      const unsigned s_swizzle = format_desc->swizzle[1];
503
504      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
505             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
506
507      assert(depth->enabled || stencil[0].enabled);
508
509      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
510      assert(format_desc->block.width == 1);
511      assert(format_desc->block.height == 1);
512
513      if (stencil[0].enabled) {
514         assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
515                format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
516      }
517
518      assert(z_swizzle < 4);
519      assert(format_desc->block.bits == z_type.width);
520      if (z_type.floating) {
521         assert(z_swizzle == 0);
522         assert(format_desc->channel[z_swizzle].type ==
523                UTIL_FORMAT_TYPE_FLOAT);
524         assert(format_desc->channel[z_swizzle].size ==
525                format_desc->block.bits);
526      }
527      else {
528         assert(format_desc->channel[z_swizzle].type ==
529                UTIL_FORMAT_TYPE_UNSIGNED);
530         assert(format_desc->channel[z_swizzle].normalized);
531         assert(!z_type.fixed);
532      }
533   }
534
535
536   /* Setup build context for Z vals */
537   lp_build_context_init(&z_bld, builder, z_type);
538
539   /* Setup build context for stencil vals */
540   s_type = lp_type_int_vec(z_type.width);
541   lp_build_context_init(&s_bld, builder, s_type);
542
543   /* Load current z/stencil value from z/stencil buffer */
544   zs_dst_ptr = LLVMBuildBitCast(builder,
545                                 zs_dst_ptr,
546                                 LLVMPointerType(z_bld.vec_type, 0), "");
547   zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
548
549   lp_build_name(zs_dst, "zs_dst");
550
551
552   /* Compute and apply the Z/stencil bitmasks and shifts.
553    */
554   {
555      unsigned s_shift, s_mask;
556
557      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
558         if (s_shift) {
559            LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift);
560            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
561            stencil_shift = shift;  /* used below */
562         }
563         else {
564            stencil_vals = zs_dst;
565         }
566
567         if (s_mask != 0xffffffff) {
568            LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask);
569            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
570         }
571
572         lp_build_name(stencil_vals, "s_dst");
573      }
574   }
575
576   if (stencil[0].enabled) {
577
578      if (face) {
579         LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
580
581         /* front_facing = face != 0 ? ~0 : 0 */
582         front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
583         front_facing = LLVMBuildSExt(builder, front_facing,
584                                      LLVMIntType(s_bld.type.length*s_bld.type.width),
585                                      "");
586         front_facing = LLVMBuildBitCast(builder, front_facing,
587                                         s_bld.int_vec_type, "");
588      }
589
590      /* convert scalar stencil refs into vectors */
591      stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
592      stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
593
594      s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
595                                          stencil_refs, stencil_vals,
596                                          front_facing);
597
598      /* apply stencil-fail operator */
599      {
600         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
601         stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
602                                            stencil_refs, stencil_vals,
603                                            s_fail_mask, front_facing);
604      }
605   }
606
607   if (depth->enabled) {
608      get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
609
610      /*
611       * Convert fragment Z to the desired type, aligning the LSB to the right.
612       */
613
614      assert(z_type.width == z_src_type.width);
615      assert(z_type.length == z_src_type.length);
616      assert(lp_check_value(z_src_type, z_src));
617      if (z_src_type.floating) {
618         /*
619          * Convert from floating point values
620          */
621
622         if (!z_type.floating) {
623            z_src = lp_build_clamped_float_to_unsigned_norm(builder,
624                                                            z_src_type,
625                                                            z_width,
626                                                            z_src);
627         }
628      } else {
629         /*
630          * Convert from unsigned normalized values.
631          */
632
633         assert(!z_src_type.sign);
634         assert(!z_src_type.fixed);
635         assert(z_src_type.norm);
636         assert(!z_type.floating);
637         if (z_src_type.width > z_width) {
638            LLVMValueRef shift = lp_build_const_int_vec(z_src_type,
639                                                        z_src_type.width - z_width);
640            z_src = LLVMBuildLShr(builder, z_src, shift, "");
641         }
642      }
643      assert(lp_check_value(z_type, z_src));
644
645      lp_build_name(z_src, "z_src");
646
647      if (z_mask != 0xffffffff) {
648         z_bitmask = lp_build_const_int_vec(z_type, z_mask);
649      }
650
651      /*
652       * Align the framebuffer Z 's LSB to the right.
653       */
654      if (z_shift) {
655         LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
656         z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
657      } else if (z_bitmask) {
658         z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
659      } else {
660         z_dst = zs_dst;
661         lp_build_name(z_dst, "z_dst");
662      }
663
664      /* compare src Z to dst Z, returning 'pass' mask */
665      z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
666
667      if (!stencil[0].enabled) {
668         /* We can potentially skip all remaining operations here, but only
669          * if stencil is disabled because we still need to update the stencil
670          * buffer values.  Don't need to update Z buffer values.
671          */
672         lp_build_mask_update(mask, z_pass);
673
674         if (do_branch) {
675            lp_build_mask_check(mask);
676            do_branch = FALSE;
677         }
678      }
679
680      if (depth->writemask) {
681         LLVMValueRef zselectmask;
682
683         /* mask off bits that failed Z test */
684         zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
685
686         /* mask off bits that failed stencil test */
687         if (s_pass_mask) {
688            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
689         }
690
691         /* Mix the old and new Z buffer values.
692          * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
693          */
694         z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
695      }
696
697      if (stencil[0].enabled) {
698         /* update stencil buffer values according to z pass/fail result */
699         LLVMValueRef z_fail_mask, z_pass_mask;
700
701         /* apply Z-fail operator */
702         z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
703         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
704                                            stencil_refs, stencil_vals,
705                                            z_fail_mask, front_facing);
706
707         /* apply Z-pass operator */
708         z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, "");
709         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
710                                            stencil_refs, stencil_vals,
711                                            z_pass_mask, front_facing);
712      }
713   }
714   else {
715      /* No depth test: apply Z-pass operator to stencil buffer values which
716       * passed the stencil test.
717       */
718      s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, "");
719      stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
720                                         stencil_refs, stencil_vals,
721                                         s_pass_mask, front_facing);
722   }
723
724   /* Put Z and ztencil bits in the right place */
725   if (z_dst && z_shift) {
726      LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift);
727      z_dst = LLVMBuildShl(builder, z_dst, shift, "");
728   }
729   if (stencil_vals && stencil_shift)
730      stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals,
731                                  stencil_shift, "");
732
733   /* Finally, merge/store the z/stencil values */
734   if ((depth->enabled && depth->writemask) ||
735       (stencil[0].enabled && stencil[0].writemask)) {
736
737      if (z_dst && stencil_vals)
738         zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, "");
739      else if (z_dst)
740         zs_dst = z_dst;
741      else
742         zs_dst = stencil_vals;
743
744      *zs_value = zs_dst;
745   }
746
747   if (s_pass_mask)
748      lp_build_mask_update(mask, s_pass_mask);
749
750   if (depth->enabled && stencil[0].enabled)
751      lp_build_mask_update(mask, z_pass);
752
753   if (do_branch)
754      lp_build_mask_check(mask);
755
756}
757
758
759void
760lp_build_depth_write(LLVMBuilderRef builder,
761                     const struct util_format_description *format_desc,
762                     LLVMValueRef zs_dst_ptr,
763                     LLVMValueRef zs_value)
764{
765   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
766                                 LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
767
768   LLVMBuildStore(builder, zs_value, zs_dst_ptr);
769}
770
771
772void
773lp_build_deferred_depth_write(LLVMBuilderRef builder,
774                              struct lp_type z_src_type,
775                              const struct util_format_description *format_desc,
776                              struct lp_build_mask_context *mask,
777                              LLVMValueRef zs_dst_ptr,
778                              LLVMValueRef zs_value)
779{
780   struct lp_type z_type;
781   struct lp_build_context z_bld;
782   LLVMValueRef z_dst;
783
784   /* XXX: pointlessly redo type logic:
785    */
786   z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
787   lp_build_context_init(&z_bld, builder, z_type);
788
789   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
790                                 LLVMPointerType(z_bld.vec_type, 0), "");
791
792   z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
793   z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
794
795   LLVMBuildStore(builder, z_dst, zs_dst_ptr);
796}
797