1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * Copyright 2010 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29/**
30 * \brief  Quad depth / stencil testing
31 */
32
33#include "pipe/p_defines.h"
34#include "util/u_format.h"
35#include "util/u_math.h"
36#include "util/u_memory.h"
37#include "tgsi/tgsi_scan.h"
38#include "sp_context.h"
39#include "sp_quad.h"
40#include "sp_quad_pipe.h"
41#include "sp_tile_cache.h"
42#include "sp_state.h"           /* for sp_fragment_shader */
43
44
45struct depth_data {
46   struct pipe_surface *ps;
47   enum pipe_format format;
48   unsigned bzzzz[TGSI_QUAD_SIZE];  /**< Z values fetched from depth buffer */
49   unsigned qzzzz[TGSI_QUAD_SIZE];  /**< Z values from the quad */
50   ubyte stencilVals[TGSI_QUAD_SIZE];
51   boolean use_shader_stencil_refs;
52   ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
53   struct softpipe_cached_tile *tile;
54};
55
56
57
58static void
59get_depth_stencil_values( struct depth_data *data,
60                          const struct quad_header *quad )
61{
62   unsigned j;
63   const struct softpipe_cached_tile *tile = data->tile;
64
65   switch (data->format) {
66   case PIPE_FORMAT_Z16_UNORM:
67      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
68         int x = quad->input.x0 % TILE_SIZE + (j & 1);
69         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
70         data->bzzzz[j] = tile->data.depth16[y][x];
71      }
72      break;
73   case PIPE_FORMAT_Z32_UNORM:
74      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
75         int x = quad->input.x0 % TILE_SIZE + (j & 1);
76         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
77         data->bzzzz[j] = tile->data.depth32[y][x];
78      }
79      break;
80   case PIPE_FORMAT_Z24X8_UNORM:
81   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
82      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
83         int x = quad->input.x0 % TILE_SIZE + (j & 1);
84         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
85         data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
86         data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
87      }
88      break;
89   case PIPE_FORMAT_X8Z24_UNORM:
90   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
91      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
92         int x = quad->input.x0 % TILE_SIZE + (j & 1);
93         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
94         data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
95         data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
96      }
97      break;
98   case PIPE_FORMAT_S8_UINT:
99      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
100         int x = quad->input.x0 % TILE_SIZE + (j & 1);
101         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
102         data->bzzzz[j] = 0;
103         data->stencilVals[j] = tile->data.stencil8[y][x];
104      }
105      break;
106   case PIPE_FORMAT_Z32_FLOAT:
107      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
108         int x = quad->input.x0 % TILE_SIZE + (j & 1);
109         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
110         data->bzzzz[j] = tile->data.depth32[y][x];
111      }
112      break;
113   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
114      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
115         int x = quad->input.x0 % TILE_SIZE + (j & 1);
116         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
117         data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
118         data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
119      }
120      break;
121   default:
122      assert(0);
123   }
124}
125
126
127/**
128 * If the shader has not been run, interpolate the depth values
129 * ourselves.
130 */
131static void
132interpolate_quad_depth( struct quad_header *quad )
133{
134   const float fx = (float) quad->input.x0;
135   const float fy = (float) quad->input.y0;
136   const float dzdx = quad->posCoef->dadx[2];
137   const float dzdy = quad->posCoef->dady[2];
138   const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
139
140   quad->output.depth[0] = z0;
141   quad->output.depth[1] = z0 + dzdx;
142   quad->output.depth[2] = z0 + dzdy;
143   quad->output.depth[3] = z0 + dzdx + dzdy;
144}
145
146
147/**
148 * Compute the depth_data::qzzzz[] values from the float fragment Z values.
149 */
150static void
151convert_quad_depth( struct depth_data *data,
152                    const struct quad_header *quad )
153{
154   unsigned j;
155
156   /* Convert quad's float depth values to int depth values (qzzzz).
157    * If the Z buffer stores integer values, we _have_ to do the depth
158    * compares with integers (not floats).  Otherwise, the float->int->float
159    * conversion of Z values (which isn't an identity function) will cause
160    * Z-fighting errors.
161    */
162   switch (data->format) {
163   case PIPE_FORMAT_Z16_UNORM:
164      {
165         float scale = 65535.0;
166
167         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
168            data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
169         }
170      }
171      break;
172   case PIPE_FORMAT_Z32_UNORM:
173      {
174         double scale = (double) (uint) ~0UL;
175
176         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
177            data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
178         }
179      }
180      break;
181   case PIPE_FORMAT_Z24X8_UNORM:
182   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
183      {
184         float scale = (float) ((1 << 24) - 1);
185
186         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
187            data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
188         }
189      }
190      break;
191   case PIPE_FORMAT_X8Z24_UNORM:
192   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
193      {
194         float scale = (float) ((1 << 24) - 1);
195
196         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
197            data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
198         }
199      }
200      break;
201   case PIPE_FORMAT_Z32_FLOAT:
202   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
203      {
204         union fi fui;
205
206         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
207            fui.f = quad->output.depth[j];
208            data->qzzzz[j] = fui.ui;
209         }
210      }
211      break;
212   default:
213      assert(0);
214   }
215}
216
217
218/**
219 * Compute the depth_data::shader_stencil_refs[] values from the float
220 * fragment stencil values.
221 */
222static void
223convert_quad_stencil( struct depth_data *data,
224                      const struct quad_header *quad )
225{
226   unsigned j;
227
228   data->use_shader_stencil_refs = TRUE;
229   /* Copy quads stencil values
230    */
231   switch (data->format) {
232   case PIPE_FORMAT_Z24X8_UNORM:
233   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
234   case PIPE_FORMAT_X8Z24_UNORM:
235   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
236   case PIPE_FORMAT_S8_UINT:
237   case PIPE_FORMAT_Z32_FLOAT:
238   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
239      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
240         data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
241      }
242      break;
243   default:
244      assert(0);
245   }
246}
247
248
249/**
250 * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
251 */
252static void
253write_depth_stencil_values( struct depth_data *data,
254                            struct quad_header *quad )
255{
256   struct softpipe_cached_tile *tile = data->tile;
257   unsigned j;
258
259   /* put updated Z values back into cached tile */
260   switch (data->format) {
261   case PIPE_FORMAT_Z16_UNORM:
262      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
263         int x = quad->input.x0 % TILE_SIZE + (j & 1);
264         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
265         tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
266      }
267      break;
268   case PIPE_FORMAT_Z24X8_UNORM:
269   case PIPE_FORMAT_Z32_UNORM:
270      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
271         int x = quad->input.x0 % TILE_SIZE + (j & 1);
272         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
273         tile->data.depth32[y][x] = data->bzzzz[j];
274      }
275      break;
276   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
277      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
278         int x = quad->input.x0 % TILE_SIZE + (j & 1);
279         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
280         tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
281      }
282      break;
283   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
284      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
285         int x = quad->input.x0 % TILE_SIZE + (j & 1);
286         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
287         tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
288      }
289      break;
290   case PIPE_FORMAT_X8Z24_UNORM:
291      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
292         int x = quad->input.x0 % TILE_SIZE + (j & 1);
293         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
294         tile->data.depth32[y][x] = data->bzzzz[j] << 8;
295      }
296      break;
297   case PIPE_FORMAT_S8_UINT:
298      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
299         int x = quad->input.x0 % TILE_SIZE + (j & 1);
300         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
301         tile->data.stencil8[y][x] = data->stencilVals[j];
302      }
303      break;
304   case PIPE_FORMAT_Z32_FLOAT:
305      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
306         int x = quad->input.x0 % TILE_SIZE + (j & 1);
307         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
308         tile->data.depth32[y][x] = data->bzzzz[j];
309      }
310      break;
311   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
312      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
313         int x = quad->input.x0 % TILE_SIZE + (j & 1);
314         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
315         tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
316      }
317      break;
318   default:
319      assert(0);
320   }
321}
322
323
324
325/** Only 8-bit stencil supported */
326#define STENCIL_MAX 0xff
327
328
329/**
330 * Do the basic stencil test (compare stencil buffer values against the
331 * reference value.
332 *
333 * \param data->stencilVals  the stencil values from the stencil buffer
334 * \param func  the stencil func (PIPE_FUNC_x)
335 * \param ref  the stencil reference value
336 * \param valMask  the stencil value mask indicating which bits of the stencil
337 *                 values and ref value are to be used.
338 * \return mask indicating which pixels passed the stencil test
339 */
340static unsigned
341do_stencil_test(struct depth_data *data,
342                unsigned func,
343                unsigned ref, unsigned valMask)
344{
345   unsigned passMask = 0x0;
346   unsigned j;
347   ubyte refs[TGSI_QUAD_SIZE];
348
349   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
350      if (data->use_shader_stencil_refs)
351         refs[j] = data->shader_stencil_refs[j] & valMask;
352      else
353         refs[j] = ref & valMask;
354   }
355
356   switch (func) {
357   case PIPE_FUNC_NEVER:
358      /* passMask = 0x0 */
359      break;
360   case PIPE_FUNC_LESS:
361      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
362         if (refs[j] < (data->stencilVals[j] & valMask)) {
363            passMask |= (1 << j);
364         }
365      }
366      break;
367   case PIPE_FUNC_EQUAL:
368      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
369         if (refs[j] == (data->stencilVals[j] & valMask)) {
370            passMask |= (1 << j);
371         }
372      }
373      break;
374   case PIPE_FUNC_LEQUAL:
375      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
376         if (refs[j] <= (data->stencilVals[j] & valMask)) {
377            passMask |= (1 << j);
378         }
379      }
380      break;
381   case PIPE_FUNC_GREATER:
382      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
383         if (refs[j] > (data->stencilVals[j] & valMask)) {
384            passMask |= (1 << j);
385         }
386      }
387      break;
388   case PIPE_FUNC_NOTEQUAL:
389      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
390         if (refs[j] != (data->stencilVals[j] & valMask)) {
391            passMask |= (1 << j);
392         }
393      }
394      break;
395   case PIPE_FUNC_GEQUAL:
396      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
397         if (refs[j] >= (data->stencilVals[j] & valMask)) {
398            passMask |= (1 << j);
399         }
400      }
401      break;
402   case PIPE_FUNC_ALWAYS:
403      passMask = MASK_ALL;
404      break;
405   default:
406      assert(0);
407   }
408
409   return passMask;
410}
411
412
413/**
414 * Apply the stencil operator to stencil values.
415 *
416 * \param data->stencilVals  the stencil buffer values (read and written)
417 * \param mask  indicates which pixels to update
418 * \param op  the stencil operator (PIPE_STENCIL_OP_x)
419 * \param ref  the stencil reference value
420 * \param wrtMask  writemask controlling which bits are changed in the
421 *                 stencil values
422 */
423static void
424apply_stencil_op(struct depth_data *data,
425                 unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
426{
427   unsigned j;
428   ubyte newstencil[TGSI_QUAD_SIZE];
429   ubyte refs[TGSI_QUAD_SIZE];
430
431   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
432      newstencil[j] = data->stencilVals[j];
433      if (data->use_shader_stencil_refs)
434         refs[j] = data->shader_stencil_refs[j];
435      else
436         refs[j] = ref;
437   }
438
439   switch (op) {
440   case PIPE_STENCIL_OP_KEEP:
441      /* no-op */
442      break;
443   case PIPE_STENCIL_OP_ZERO:
444      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
445         if (mask & (1 << j)) {
446            newstencil[j] = 0;
447         }
448      }
449      break;
450   case PIPE_STENCIL_OP_REPLACE:
451      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
452         if (mask & (1 << j)) {
453            newstencil[j] = refs[j];
454         }
455      }
456      break;
457   case PIPE_STENCIL_OP_INCR:
458      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
459         if (mask & (1 << j)) {
460            if (data->stencilVals[j] < STENCIL_MAX) {
461               newstencil[j] = data->stencilVals[j] + 1;
462            }
463         }
464      }
465      break;
466   case PIPE_STENCIL_OP_DECR:
467      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
468         if (mask & (1 << j)) {
469            if (data->stencilVals[j] > 0) {
470               newstencil[j] = data->stencilVals[j] - 1;
471            }
472         }
473      }
474      break;
475   case PIPE_STENCIL_OP_INCR_WRAP:
476      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
477         if (mask & (1 << j)) {
478            newstencil[j] = data->stencilVals[j] + 1;
479         }
480      }
481      break;
482   case PIPE_STENCIL_OP_DECR_WRAP:
483      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
484         if (mask & (1 << j)) {
485            newstencil[j] = data->stencilVals[j] - 1;
486         }
487      }
488      break;
489   case PIPE_STENCIL_OP_INVERT:
490      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
491         if (mask & (1 << j)) {
492            newstencil[j] = ~data->stencilVals[j];
493         }
494      }
495      break;
496   default:
497      assert(0);
498   }
499
500   /*
501    * update the stencil values
502    */
503   if (wrtMask != STENCIL_MAX) {
504      /* apply bit-wise stencil buffer writemask */
505      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
506         data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
507      }
508   }
509   else {
510      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
511         data->stencilVals[j] = newstencil[j];
512      }
513   }
514}
515
516
517
518/**
519 * To increase efficiency, we should probably have multiple versions
520 * of this function that are specifically for Z16, Z32 and FP Z buffers.
521 * Try to effectively do that with codegen...
522 */
523static boolean
524depth_test_quad(struct quad_stage *qs,
525                struct depth_data *data,
526                struct quad_header *quad)
527{
528   struct softpipe_context *softpipe = qs->softpipe;
529   unsigned zmask = 0;
530   unsigned j;
531
532   switch (softpipe->depth_stencil->depth.func) {
533   case PIPE_FUNC_NEVER:
534      /* zmask = 0 */
535      break;
536   case PIPE_FUNC_LESS:
537      /* Note this is pretty much a single sse or cell instruction.
538       * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
539       */
540      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
541	 if (data->qzzzz[j] < data->bzzzz[j])
542	    zmask |= 1 << j;
543      }
544      break;
545   case PIPE_FUNC_EQUAL:
546      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
547	 if (data->qzzzz[j] == data->bzzzz[j])
548	    zmask |= 1 << j;
549      }
550      break;
551   case PIPE_FUNC_LEQUAL:
552      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
553	 if (data->qzzzz[j] <= data->bzzzz[j])
554	    zmask |= (1 << j);
555      }
556      break;
557   case PIPE_FUNC_GREATER:
558      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
559	 if (data->qzzzz[j] > data->bzzzz[j])
560	    zmask |= (1 << j);
561      }
562      break;
563   case PIPE_FUNC_NOTEQUAL:
564      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
565	 if (data->qzzzz[j] != data->bzzzz[j])
566	    zmask |= (1 << j);
567      }
568      break;
569   case PIPE_FUNC_GEQUAL:
570      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
571	 if (data->qzzzz[j] >= data->bzzzz[j])
572	    zmask |= (1 << j);
573      }
574      break;
575   case PIPE_FUNC_ALWAYS:
576      zmask = MASK_ALL;
577      break;
578   default:
579      assert(0);
580   }
581
582   quad->inout.mask &= zmask;
583   if (quad->inout.mask == 0)
584      return FALSE;
585
586   /* Update our internal copy only if writemask set.  Even if
587    * depth.writemask is FALSE, may still need to write out buffer
588    * data due to stencil changes.
589    */
590   if (softpipe->depth_stencil->depth.writemask) {
591      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
592         if (quad->inout.mask & (1 << j)) {
593            data->bzzzz[j] = data->qzzzz[j];
594         }
595      }
596   }
597
598   return TRUE;
599}
600
601
602
603/**
604 * Do stencil (and depth) testing.  Stenciling depends on the outcome of
605 * depth testing.
606 */
607static void
608depth_stencil_test_quad(struct quad_stage *qs,
609                        struct depth_data *data,
610                        struct quad_header *quad)
611{
612   struct softpipe_context *softpipe = qs->softpipe;
613   unsigned func, zFailOp, zPassOp, failOp;
614   ubyte ref, wrtMask, valMask;
615   uint face = quad->input.facing;
616
617   if (!softpipe->depth_stencil->stencil[1].enabled) {
618      /* single-sided stencil test, use front (face=0) state */
619      face = 0;
620   }
621
622   /* 0 = front-face, 1 = back-face */
623   assert(face == 0 || face == 1);
624
625   /* choose front or back face function, operator, etc */
626   /* XXX we could do these initializations once per primitive */
627   func    = softpipe->depth_stencil->stencil[face].func;
628   failOp  = softpipe->depth_stencil->stencil[face].fail_op;
629   zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
630   zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
631   ref     = softpipe->stencil_ref.ref_value[face];
632   wrtMask = softpipe->depth_stencil->stencil[face].writemask;
633   valMask = softpipe->depth_stencil->stencil[face].valuemask;
634
635   /* do the stencil test first */
636   {
637      unsigned passMask, failMask;
638      passMask = do_stencil_test(data, func, ref, valMask);
639      failMask = quad->inout.mask & ~passMask;
640      quad->inout.mask &= passMask;
641
642      if (failOp != PIPE_STENCIL_OP_KEEP) {
643         apply_stencil_op(data, failMask, failOp, ref, wrtMask);
644      }
645   }
646
647   if (quad->inout.mask) {
648      /* now the pixels that passed the stencil test are depth tested */
649      if (softpipe->depth_stencil->depth.enabled) {
650         const unsigned origMask = quad->inout.mask;
651
652         depth_test_quad(qs, data, quad);  /* quad->mask is updated */
653
654         /* update stencil buffer values according to z pass/fail result */
655         if (zFailOp != PIPE_STENCIL_OP_KEEP) {
656            const unsigned zFailMask = origMask & ~quad->inout.mask;
657            apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
658         }
659
660         if (zPassOp != PIPE_STENCIL_OP_KEEP) {
661            const unsigned zPassMask = origMask & quad->inout.mask;
662            apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
663         }
664      }
665      else {
666         /* no depth test, apply Zpass operator to stencil buffer values */
667         apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
668      }
669   }
670}
671
672
673#define ALPHATEST( FUNC, COMP )                                         \
674   static unsigned                                                      \
675   alpha_test_quads_##FUNC( struct quad_stage *qs,                      \
676                           struct quad_header *quads[],                 \
677                           unsigned nr )                                \
678   {                                                                    \
679      const float ref = qs->softpipe->depth_stencil->alpha.ref_value;   \
680      const uint cbuf = 0; /* only output[0].alpha is tested */         \
681      unsigned pass_nr = 0;                                             \
682      unsigned i;                                                       \
683                                                                        \
684      for (i = 0; i < nr; i++) {                                        \
685         const float *aaaa = quads[i]->output.color[cbuf][3];           \
686         unsigned passMask = 0;                                         \
687                                                                        \
688         if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
689         if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
690         if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
691         if (aaaa[3] COMP ref) passMask |= (1 << 3);                    \
692                                                                        \
693         quads[i]->inout.mask &= passMask;                              \
694                                                                        \
695         if (quads[i]->inout.mask)                                      \
696            quads[pass_nr++] = quads[i];                                \
697      }                                                                 \
698                                                                        \
699      return pass_nr;                                                   \
700   }
701
702
703ALPHATEST( LESS,     < )
704ALPHATEST( EQUAL,    == )
705ALPHATEST( LEQUAL,   <= )
706ALPHATEST( GREATER,  > )
707ALPHATEST( NOTEQUAL, != )
708ALPHATEST( GEQUAL,   >= )
709
710
711/* XXX: Incorporate into shader using KILP.
712 */
713static unsigned
714alpha_test_quads(struct quad_stage *qs,
715                 struct quad_header *quads[],
716                 unsigned nr)
717{
718   switch (qs->softpipe->depth_stencil->alpha.func) {
719   case PIPE_FUNC_LESS:
720      return alpha_test_quads_LESS( qs, quads, nr );
721   case PIPE_FUNC_EQUAL:
722      return alpha_test_quads_EQUAL( qs, quads, nr );
723   case PIPE_FUNC_LEQUAL:
724      return alpha_test_quads_LEQUAL( qs, quads, nr );
725   case PIPE_FUNC_GREATER:
726      return alpha_test_quads_GREATER( qs, quads, nr );
727   case PIPE_FUNC_NOTEQUAL:
728      return alpha_test_quads_NOTEQUAL( qs, quads, nr );
729   case PIPE_FUNC_GEQUAL:
730      return alpha_test_quads_GEQUAL( qs, quads, nr );
731   case PIPE_FUNC_ALWAYS:
732      return nr;
733   case PIPE_FUNC_NEVER:
734   default:
735      return 0;
736   }
737}
738
739
740static unsigned mask_count[16] =
741{
742   0,                           /* 0x0 */
743   1,                           /* 0x1 */
744   1,                           /* 0x2 */
745   2,                           /* 0x3 */
746   1,                           /* 0x4 */
747   2,                           /* 0x5 */
748   2,                           /* 0x6 */
749   3,                           /* 0x7 */
750   1,                           /* 0x8 */
751   2,                           /* 0x9 */
752   2,                           /* 0xa */
753   3,                           /* 0xb */
754   2,                           /* 0xc */
755   3,                           /* 0xd */
756   3,                           /* 0xe */
757   4,                           /* 0xf */
758};
759
760
761
762/**
763 * General depth/stencil test function.  Used when there's no fast-path.
764 */
765static void
766depth_test_quads_fallback(struct quad_stage *qs,
767                          struct quad_header *quads[],
768                          unsigned nr)
769{
770   unsigned i, pass = 0;
771   const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
772   boolean interp_depth = !fsInfo->writes_z;
773   boolean shader_stencil_ref = fsInfo->writes_stencil;
774   struct depth_data data;
775
776   data.use_shader_stencil_refs = FALSE;
777
778   if (qs->softpipe->depth_stencil->alpha.enabled) {
779      nr = alpha_test_quads(qs, quads, nr);
780   }
781
782   if (qs->softpipe->framebuffer.zsbuf &&
783         (qs->softpipe->depth_stencil->depth.enabled ||
784          qs->softpipe->depth_stencil->stencil[0].enabled)) {
785
786      data.ps = qs->softpipe->framebuffer.zsbuf;
787      data.format = data.ps->format;
788      data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
789                                     quads[0]->input.x0,
790                                     quads[0]->input.y0);
791
792      for (i = 0; i < nr; i++) {
793         get_depth_stencil_values(&data, quads[i]);
794
795         if (qs->softpipe->depth_stencil->depth.enabled) {
796            if (interp_depth)
797               interpolate_quad_depth(quads[i]);
798
799            convert_quad_depth(&data, quads[i]);
800         }
801
802         if (qs->softpipe->depth_stencil->stencil[0].enabled) {
803            if (shader_stencil_ref)
804               convert_quad_stencil(&data, quads[i]);
805
806            depth_stencil_test_quad(qs, &data, quads[i]);
807            write_depth_stencil_values(&data, quads[i]);
808         }
809         else {
810            if (!depth_test_quad(qs, &data, quads[i]))
811               continue;
812
813            if (qs->softpipe->depth_stencil->depth.writemask)
814               write_depth_stencil_values(&data, quads[i]);
815         }
816
817         quads[pass++] = quads[i];
818      }
819
820      nr = pass;
821   }
822
823   if (qs->softpipe->active_query_count) {
824      for (i = 0; i < nr; i++)
825         qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
826   }
827
828   if (nr)
829      qs->next->run(qs->next, quads, nr);
830}
831
832
833/**
834 * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
835 */
836
837#define NAME depth_interp_z16_less_write
838#define OPERATOR <
839#include "sp_quad_depth_test_tmp.h"
840
841#define NAME depth_interp_z16_equal_write
842#define OPERATOR ==
843#include "sp_quad_depth_test_tmp.h"
844
845#define NAME depth_interp_z16_lequal_write
846#define OPERATOR <=
847#include "sp_quad_depth_test_tmp.h"
848
849#define NAME depth_interp_z16_greater_write
850#define OPERATOR >
851#include "sp_quad_depth_test_tmp.h"
852
853#define NAME depth_interp_z16_notequal_write
854#define OPERATOR !=
855#include "sp_quad_depth_test_tmp.h"
856
857#define NAME depth_interp_z16_gequal_write
858#define OPERATOR >=
859#include "sp_quad_depth_test_tmp.h"
860
861#define NAME depth_interp_z16_always_write
862#define ALWAYS 1
863#include "sp_quad_depth_test_tmp.h"
864
865
866
867static void
868depth_noop(struct quad_stage *qs,
869           struct quad_header *quads[],
870           unsigned nr)
871{
872   qs->next->run(qs->next, quads, nr);
873}
874
875
876
877static void
878choose_depth_test(struct quad_stage *qs,
879                  struct quad_header *quads[],
880                  unsigned nr)
881{
882   const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
883
884   boolean interp_depth = !fsInfo->writes_z;
885
886   boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
887
888   boolean depth = qs->softpipe->depth_stencil->depth.enabled;
889
890   unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
891
892   boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
893
894   boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask;
895
896   boolean occlusion = qs->softpipe->active_query_count;
897
898   if(!qs->softpipe->framebuffer.zsbuf)
899      depth = depthwrite = stencil = FALSE;
900
901   /* default */
902   qs->run = depth_test_quads_fallback;
903
904   /* look for special cases */
905   if (!alpha &&
906       !depth &&
907       !occlusion &&
908       !stencil) {
909      qs->run = depth_noop;
910   }
911   else if (!alpha &&
912            interp_depth &&
913            depth &&
914            depthwrite &&
915            !occlusion &&
916            !stencil)
917   {
918      if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
919         switch (depthfunc) {
920         case PIPE_FUNC_NEVER:
921            qs->run = depth_test_quads_fallback;
922            break;
923         case PIPE_FUNC_LESS:
924            qs->run = depth_interp_z16_less_write;
925            break;
926         case PIPE_FUNC_EQUAL:
927            qs->run = depth_interp_z16_equal_write;
928            break;
929         case PIPE_FUNC_LEQUAL:
930            qs->run = depth_interp_z16_lequal_write;
931            break;
932         case PIPE_FUNC_GREATER:
933            qs->run = depth_interp_z16_greater_write;
934            break;
935         case PIPE_FUNC_NOTEQUAL:
936            qs->run = depth_interp_z16_notequal_write;
937            break;
938         case PIPE_FUNC_GEQUAL:
939            qs->run = depth_interp_z16_gequal_write;
940            break;
941         case PIPE_FUNC_ALWAYS:
942            qs->run = depth_interp_z16_always_write;
943            break;
944         default:
945            qs->run = depth_test_quads_fallback;
946            break;
947         }
948      }
949   }
950
951   /* next quad/fragment stage */
952   qs->run( qs, quads, nr );
953}
954
955
956
957static void
958depth_test_begin(struct quad_stage *qs)
959{
960   qs->run = choose_depth_test;
961   qs->next->begin(qs->next);
962}
963
964
965static void
966depth_test_destroy(struct quad_stage *qs)
967{
968   FREE( qs );
969}
970
971
972struct quad_stage *
973sp_quad_depth_test_stage(struct softpipe_context *softpipe)
974{
975   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
976
977   stage->softpipe = softpipe;
978   stage->begin = depth_test_begin;
979   stage->run = choose_depth_test;
980   stage->destroy = depth_test_destroy;
981
982   return stage;
983}
984