1/**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * \brief  Primitive rasterization/rendering (points, lines, triangles)
30 *
31 * \author  Keith Whitwell <keithw@vmware.com>
32 * \author  Brian Paul
33 */
34
35#include "sp_context.h"
36#include "sp_quad.h"
37#include "sp_quad_pipe.h"
38#include "sp_setup.h"
39#include "sp_state.h"
40#include "draw/draw_context.h"
41#include "pipe/p_shader_tokens.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44
45
46#define DEBUG_VERTS 0
47#define DEBUG_FRAGS 0
48
49
50/**
51 * Triangle edge info
52 */
53struct edge {
54   float dx;		/**< X(v1) - X(v0), used only during setup */
55   float dy;		/**< Y(v1) - Y(v0), used only during setup */
56   float dxdy;		/**< dx/dy */
57   float sx, sy;	/**< first sample point coord */
58   int lines;		/**< number of lines on this edge */
59};
60
61
62/**
63 * Max number of quads (2x2 pixel blocks) to process per batch.
64 * This can't be arbitrarily increased since we depend on some 32-bit
65 * bitmasks (two bits per quad).
66 */
67#define MAX_QUADS 16
68
69
70/**
71 * Triangle setup info.
72 * Also used for line drawing (taking some liberties).
73 */
74struct setup_context {
75   struct softpipe_context *softpipe;
76
77   /* Vertices are just an array of floats making up each attribute in
78    * turn.  Currently fixed at 4 floats, but should change in time.
79    * Codegen will help cope with this.
80    */
81   const float (*vmax)[4];
82   const float (*vmid)[4];
83   const float (*vmin)[4];
84   const float (*vprovoke)[4];
85
86   struct edge ebot;
87   struct edge etop;
88   struct edge emaj;
89
90   float oneoverarea;
91   int facing;
92
93   float pixel_offset;
94   unsigned max_layer;
95
96   struct quad_header quad[MAX_QUADS];
97   struct quad_header *quad_ptrs[MAX_QUADS];
98   unsigned count;
99
100   struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
101   struct tgsi_interp_coef posCoef;  /* For Z, W */
102
103   struct {
104      int left[2];   /**< [0] = row0, [1] = row1 */
105      int right[2];
106      int y;
107   } span;
108
109#if DEBUG_FRAGS
110   uint numFragsEmitted;  /**< per primitive */
111   uint numFragsWritten;  /**< per primitive */
112#endif
113
114   unsigned cull_face;		/* which faces cull */
115   unsigned nr_vertex_attrs;
116};
117
118
119
120
121
122
123
124/**
125 * Clip setup->quad against the scissor/surface bounds.
126 */
127static inline void
128quad_clip(struct setup_context *setup, struct quad_header *quad)
129{
130   unsigned viewport_index = quad[0].input.viewport_index;
131   const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect[viewport_index];
132   const int minx = (int) cliprect->minx;
133   const int maxx = (int) cliprect->maxx;
134   const int miny = (int) cliprect->miny;
135   const int maxy = (int) cliprect->maxy;
136
137   if (quad->input.x0 >= maxx ||
138       quad->input.y0 >= maxy ||
139       quad->input.x0 + 1 < minx ||
140       quad->input.y0 + 1 < miny) {
141      /* totally clipped */
142      quad->inout.mask = 0x0;
143      return;
144   }
145   if (quad->input.x0 < minx)
146      quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
147   if (quad->input.y0 < miny)
148      quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
149   if (quad->input.x0 == maxx - 1)
150      quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
151   if (quad->input.y0 == maxy - 1)
152      quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
153}
154
155
156/**
157 * Emit a quad (pass to next stage) with clipping.
158 */
159static inline void
160clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
161{
162   quad_clip(setup, quad);
163
164   if (quad->inout.mask) {
165      struct softpipe_context *sp = setup->softpipe;
166
167#if DEBUG_FRAGS
168      setup->numFragsEmitted += util_bitcount(quad->inout.mask);
169#endif
170
171      sp->quad.first->run( sp->quad.first, &quad, 1 );
172   }
173}
174
175
176
177/**
178 * Given an X or Y coordinate, return the block/quad coordinate that it
179 * belongs to.
180 */
181static inline int
182block(int x)
183{
184   return x & ~(2-1);
185}
186
187
188static inline int
189block_x(int x)
190{
191   return x & ~(16-1);
192}
193
194
195/**
196 * Render a horizontal span of quads
197 */
198static void
199flush_spans(struct setup_context *setup)
200{
201   const int step = MAX_QUADS;
202   const int xleft0 = setup->span.left[0];
203   const int xleft1 = setup->span.left[1];
204   const int xright0 = setup->span.right[0];
205   const int xright1 = setup->span.right[1];
206   struct quad_stage *pipe = setup->softpipe->quad.first;
207
208   const int minleft = block_x(MIN2(xleft0, xleft1));
209   const int maxright = MAX2(xright0, xright1);
210   int x;
211
212   /* process quads in horizontal chunks of 16 */
213   for (x = minleft; x < maxright; x += step) {
214      unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
215      unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
216      unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
217      unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
218      unsigned lx = x;
219      unsigned q = 0;
220
221      unsigned skipmask_left0 = (1U << skip_left0) - 1U;
222      unsigned skipmask_left1 = (1U << skip_left1) - 1U;
223
224      /* These calculations fail when step == 32 and skip_right == 0.
225       */
226      unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
227      unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
228
229      unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
230      unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
231
232      if (mask0 | mask1) {
233         do {
234            unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
235            if (quadmask) {
236               setup->quad[q].input.x0 = lx;
237               setup->quad[q].input.y0 = setup->span.y;
238               setup->quad[q].input.facing = setup->facing;
239               setup->quad[q].inout.mask = quadmask;
240               setup->quad_ptrs[q] = &setup->quad[q];
241               q++;
242#if DEBUG_FRAGS
243               setup->numFragsEmitted += util_bitcount(quadmask);
244#endif
245            }
246            mask0 >>= 2;
247            mask1 >>= 2;
248            lx += 2;
249         } while (mask0 | mask1);
250
251         pipe->run( pipe, setup->quad_ptrs, q );
252      }
253   }
254
255
256   setup->span.y = 0;
257   setup->span.right[0] = 0;
258   setup->span.right[1] = 0;
259   setup->span.left[0] = 1000000;     /* greater than right[0] */
260   setup->span.left[1] = 1000000;     /* greater than right[1] */
261}
262
263
264#if DEBUG_VERTS
265static void
266print_vertex(const struct setup_context *setup,
267             const float (*v)[4])
268{
269   int i;
270   debug_printf("   Vertex: (%p)\n", (void *) v);
271   for (i = 0; i < setup->nr_vertex_attrs; i++) {
272      debug_printf("     %d: %f %f %f %f\n",  i,
273              v[i][0], v[i][1], v[i][2], v[i][3]);
274      if (util_is_inf_or_nan(v[i][0])) {
275         debug_printf("   NaN!\n");
276      }
277   }
278}
279#endif
280
281
282/**
283 * Sort the vertices from top to bottom order, setting up the triangle
284 * edge fields (ebot, emaj, etop).
285 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
286 */
287static boolean
288setup_sort_vertices(struct setup_context *setup,
289                    float det,
290                    const float (*v0)[4],
291                    const float (*v1)[4],
292                    const float (*v2)[4])
293{
294   if (setup->softpipe->rasterizer->flatshade_first)
295      setup->vprovoke = v0;
296   else
297      setup->vprovoke = v2;
298
299   /* determine bottom to top order of vertices */
300   {
301      float y0 = v0[0][1];
302      float y1 = v1[0][1];
303      float y2 = v2[0][1];
304      if (y0 <= y1) {
305	 if (y1 <= y2) {
306	    /* y0<=y1<=y2 */
307	    setup->vmin = v0;
308	    setup->vmid = v1;
309	    setup->vmax = v2;
310	 }
311	 else if (y2 <= y0) {
312	    /* y2<=y0<=y1 */
313	    setup->vmin = v2;
314	    setup->vmid = v0;
315	    setup->vmax = v1;
316	 }
317	 else {
318	    /* y0<=y2<=y1 */
319	    setup->vmin = v0;
320	    setup->vmid = v2;
321	    setup->vmax = v1;
322	 }
323      }
324      else {
325	 if (y0 <= y2) {
326	    /* y1<=y0<=y2 */
327	    setup->vmin = v1;
328	    setup->vmid = v0;
329	    setup->vmax = v2;
330	 }
331	 else if (y2 <= y1) {
332	    /* y2<=y1<=y0 */
333	    setup->vmin = v2;
334	    setup->vmid = v1;
335	    setup->vmax = v0;
336	 }
337	 else {
338	    /* y1<=y2<=y0 */
339	    setup->vmin = v1;
340	    setup->vmid = v2;
341	    setup->vmax = v0;
342	 }
343      }
344   }
345
346   setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
347   setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
348   setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
349   setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
350   setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
351   setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
352
353   /*
354    * Compute triangle's area.  Use 1/area to compute partial
355    * derivatives of attributes later.
356    *
357    * The area will be the same as prim->det, but the sign may be
358    * different depending on how the vertices get sorted above.
359    *
360    * To determine whether the primitive is front or back facing we
361    * use the prim->det value because its sign is correct.
362    */
363   {
364      const float area = (setup->emaj.dx * setup->ebot.dy -
365			    setup->ebot.dx * setup->emaj.dy);
366
367      setup->oneoverarea = 1.0f / area;
368
369      /*
370      debug_printf("%s one-over-area %f  area %f  det %f\n",
371                   __FUNCTION__, setup->oneoverarea, area, det );
372      */
373      if (util_is_inf_or_nan(setup->oneoverarea))
374         return FALSE;
375   }
376
377   /* We need to know if this is a front or back-facing triangle for:
378    *  - the GLSL gl_FrontFacing fragment attribute (bool)
379    *  - two-sided stencil test
380    * 0 = front-facing, 1 = back-facing
381    */
382   setup->facing =
383      ((det < 0.0) ^
384       (setup->softpipe->rasterizer->front_ccw));
385
386   {
387      unsigned face = setup->facing == 0 ? PIPE_FACE_FRONT : PIPE_FACE_BACK;
388
389      if (face & setup->cull_face)
390	 return FALSE;
391   }
392
393
394   /* Prepare pixel offset for rasterisation:
395    *  - pixel center (0.5, 0.5) for GL, or
396    *  - assume (0.0, 0.0) for other APIs.
397    */
398   if (setup->softpipe->rasterizer->half_pixel_center) {
399      setup->pixel_offset = 0.5f;
400   } else {
401      setup->pixel_offset = 0.0f;
402   }
403
404   return TRUE;
405}
406
407
408/* Apply cylindrical wrapping to v0, v1, v2 coordinates, if enabled.
409 * Input coordinates must be in [0, 1] range, otherwise results are undefined.
410 * Some combinations of coordinates produce invalid results,
411 * but this behaviour is acceptable.
412 */
413static void
414tri_apply_cylindrical_wrap(float v0,
415                           float v1,
416                           float v2,
417                           uint cylindrical_wrap,
418                           float output[3])
419{
420   if (cylindrical_wrap) {
421      float delta;
422
423      delta = v1 - v0;
424      if (delta > 0.5f) {
425         v0 += 1.0f;
426      }
427      else if (delta < -0.5f) {
428         v1 += 1.0f;
429      }
430
431      delta = v2 - v1;
432      if (delta > 0.5f) {
433         v1 += 1.0f;
434      }
435      else if (delta < -0.5f) {
436         v2 += 1.0f;
437      }
438
439      delta = v0 - v2;
440      if (delta > 0.5f) {
441         v2 += 1.0f;
442      }
443      else if (delta < -0.5f) {
444         v0 += 1.0f;
445      }
446   }
447
448   output[0] = v0;
449   output[1] = v1;
450   output[2] = v2;
451}
452
453
454/**
455 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
456 * The value value comes from vertex[slot][i].
457 * The result will be put into setup->coef[slot].a0[i].
458 * \param slot  which attribute slot
459 * \param i  which component of the slot (0..3)
460 */
461static void
462const_coeff(struct setup_context *setup,
463            struct tgsi_interp_coef *coef,
464            uint vertSlot, uint i)
465{
466   assert(i <= 3);
467
468   coef->dadx[i] = 0;
469   coef->dady[i] = 0;
470
471   /* need provoking vertex info!
472    */
473   coef->a0[i] = setup->vprovoke[vertSlot][i];
474}
475
476
477/**
478 * Compute a0, dadx and dady for a linearly interpolated coefficient,
479 * for a triangle.
480 * v[0], v[1] and v[2] are vmin, vmid and vmax, respectively.
481 */
482static void
483tri_linear_coeff(struct setup_context *setup,
484                 struct tgsi_interp_coef *coef,
485                 uint i,
486                 const float v[3])
487{
488   float botda = v[1] - v[0];
489   float majda = v[2] - v[0];
490   float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
491   float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
492   float dadx = a * setup->oneoverarea;
493   float dady = b * setup->oneoverarea;
494
495   assert(i <= 3);
496
497   coef->dadx[i] = dadx;
498   coef->dady[i] = dady;
499
500   /* calculate a0 as the value which would be sampled for the
501    * fragment at (0,0), taking into account that we want to sample at
502    * pixel centers, in other words (pixel_offset, pixel_offset).
503    *
504    * this is neat but unfortunately not a good way to do things for
505    * triangles with very large values of dadx or dady as it will
506    * result in the subtraction and re-addition from a0 of a very
507    * large number, which means we'll end up loosing a lot of the
508    * fractional bits and precision from a0.  the way to fix this is
509    * to define a0 as the sample at a pixel center somewhere near vmin
510    * instead - i'll switch to this later.
511    */
512   coef->a0[i] = (v[0] -
513                  (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
514                   dady * (setup->vmin[0][1] - setup->pixel_offset)));
515}
516
517
518/**
519 * Compute a0, dadx and dady for a perspective-corrected interpolant,
520 * for a triangle.
521 * We basically multiply the vertex value by 1/w before computing
522 * the plane coefficients (a0, dadx, dady).
523 * Later, when we compute the value at a particular fragment position we'll
524 * divide the interpolated value by the interpolated W at that fragment.
525 * v[0], v[1] and v[2] are vmin, vmid and vmax, respectively.
526 */
527static void
528tri_persp_coeff(struct setup_context *setup,
529                struct tgsi_interp_coef *coef,
530                uint i,
531                const float v[3])
532{
533   /* premultiply by 1/w  (v[0][3] is always W):
534    */
535   float mina = v[0] * setup->vmin[0][3];
536   float mida = v[1] * setup->vmid[0][3];
537   float maxa = v[2] * setup->vmax[0][3];
538   float botda = mida - mina;
539   float majda = maxa - mina;
540   float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
541   float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
542   float dadx = a * setup->oneoverarea;
543   float dady = b * setup->oneoverarea;
544
545   assert(i <= 3);
546
547   coef->dadx[i] = dadx;
548   coef->dady[i] = dady;
549   coef->a0[i] = (mina -
550                  (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
551                   dady * (setup->vmin[0][1] - setup->pixel_offset)));
552}
553
554
555/**
556 * Special coefficient setup for gl_FragCoord.
557 * X and Y are trivial, though Y may have to be inverted for OpenGL.
558 * Z and W are copied from posCoef which should have already been computed.
559 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
560 */
561static void
562setup_fragcoord_coeff(struct setup_context *setup, uint slot)
563{
564   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
565   boolean origin_lower_left =
566         fsInfo->properties[TGSI_PROPERTY_FS_COORD_ORIGIN];
567   boolean pixel_center_integer =
568         fsInfo->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER];
569
570   /*X*/
571   setup->coef[slot].a0[0] = pixel_center_integer ? 0.0f : 0.5f;
572   setup->coef[slot].dadx[0] = 1.0f;
573   setup->coef[slot].dady[0] = 0.0f;
574   /*Y*/
575   setup->coef[slot].a0[1] =
576		   (origin_lower_left ? setup->softpipe->framebuffer.height-1 : 0)
577		   + (pixel_center_integer ? 0.0f : 0.5f);
578   setup->coef[slot].dadx[1] = 0.0f;
579   setup->coef[slot].dady[1] = origin_lower_left ? -1.0f : 1.0f;
580   /*Z*/
581   setup->coef[slot].a0[2] = setup->posCoef.a0[2];
582   setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
583   setup->coef[slot].dady[2] = setup->posCoef.dady[2];
584   /*W*/
585   setup->coef[slot].a0[3] = setup->posCoef.a0[3];
586   setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
587   setup->coef[slot].dady[3] = setup->posCoef.dady[3];
588}
589
590
591
592/**
593 * Compute the setup->coef[] array dadx, dady, a0 values.
594 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
595 */
596static void
597setup_tri_coefficients(struct setup_context *setup)
598{
599   struct softpipe_context *softpipe = setup->softpipe;
600   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
601   const struct sp_setup_info *sinfo = &softpipe->setup_info;
602   uint fragSlot;
603   float v[3];
604
605   assert(sinfo->valid);
606
607   /* z and w are done by linear interpolation:
608    */
609   v[0] = setup->vmin[0][2];
610   v[1] = setup->vmid[0][2];
611   v[2] = setup->vmax[0][2];
612   tri_linear_coeff(setup, &setup->posCoef, 2, v);
613
614   v[0] = setup->vmin[0][3];
615   v[1] = setup->vmid[0][3];
616   v[2] = setup->vmax[0][3];
617   tri_linear_coeff(setup, &setup->posCoef, 3, v);
618
619   /* setup interpolation for all the remaining attributes:
620    */
621   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
622      const uint vertSlot = sinfo->attrib[fragSlot].src_index;
623      uint j;
624
625      switch (sinfo->attrib[fragSlot].interp) {
626      case SP_INTERP_CONSTANT:
627         for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
628            const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
629         }
630         break;
631      case SP_INTERP_LINEAR:
632         for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
633            tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
634                                       setup->vmid[vertSlot][j],
635                                       setup->vmax[vertSlot][j],
636                                       fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
637                                       v);
638            tri_linear_coeff(setup, &setup->coef[fragSlot], j, v);
639         }
640         break;
641      case SP_INTERP_PERSPECTIVE:
642         for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
643            tri_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
644                                       setup->vmid[vertSlot][j],
645                                       setup->vmax[vertSlot][j],
646                                       fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
647                                       v);
648            tri_persp_coeff(setup, &setup->coef[fragSlot], j, v);
649         }
650         break;
651      case SP_INTERP_POS:
652         setup_fragcoord_coeff(setup, fragSlot);
653         break;
654      default:
655         assert(0);
656      }
657
658      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
659         /* convert 0 to 1.0 and 1 to -1.0 */
660         setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
661         setup->coef[fragSlot].dadx[0] = 0.0;
662         setup->coef[fragSlot].dady[0] = 0.0;
663      }
664
665      if (0) {
666         for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
667            debug_printf("attr[%d].%c: a0:%f dx:%f dy:%f\n",
668                         fragSlot, "xyzw"[j],
669                         setup->coef[fragSlot].a0[j],
670                         setup->coef[fragSlot].dadx[j],
671                         setup->coef[fragSlot].dady[j]);
672         }
673      }
674   }
675}
676
677
678static void
679setup_tri_edges(struct setup_context *setup)
680{
681   float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
682   float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
683
684   float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
685   float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
686   float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
687
688   setup->emaj.sy = ceilf(vmin_y);
689   setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
690   setup->emaj.dxdy = setup->emaj.dy ? setup->emaj.dx / setup->emaj.dy : .0f;
691   setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
692
693   setup->etop.sy = ceilf(vmid_y);
694   setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
695   setup->etop.dxdy = setup->etop.dy ? setup->etop.dx / setup->etop.dy : .0f;
696   setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
697
698   setup->ebot.sy = ceilf(vmin_y);
699   setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
700   setup->ebot.dxdy = setup->ebot.dy ? setup->ebot.dx / setup->ebot.dy : .0f;
701   setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
702}
703
704
705/**
706 * Render the upper or lower half of a triangle.
707 * Scissoring/cliprect is applied here too.
708 */
709static void
710subtriangle(struct setup_context *setup,
711            struct edge *eleft,
712            struct edge *eright,
713            int lines,
714            unsigned viewport_index)
715{
716   const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect[viewport_index];
717   const int minx = (int) cliprect->minx;
718   const int maxx = (int) cliprect->maxx;
719   const int miny = (int) cliprect->miny;
720   const int maxy = (int) cliprect->maxy;
721   int y, start_y, finish_y;
722   int sy = (int)eleft->sy;
723
724   assert((int)eleft->sy == (int) eright->sy);
725   assert(lines >= 0);
726
727   /* clip top/bottom */
728   start_y = sy;
729   if (start_y < miny)
730      start_y = miny;
731
732   finish_y = sy + lines;
733   if (finish_y > maxy)
734      finish_y = maxy;
735
736   start_y -= sy;
737   finish_y -= sy;
738
739   /*
740   debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
741   */
742
743   for (y = start_y; y < finish_y; y++) {
744
745      /* avoid accumulating adds as floats don't have the precision to
746       * accurately iterate large triangle edges that way.  luckily we
747       * can just multiply these days.
748       *
749       * this is all drowned out by the attribute interpolation anyway.
750       */
751      int left = (int)(eleft->sx + y * eleft->dxdy);
752      int right = (int)(eright->sx + y * eright->dxdy);
753
754      /* clip left/right */
755      if (left < minx)
756         left = minx;
757      if (right > maxx)
758         right = maxx;
759
760      if (left < right) {
761         int _y = sy + y;
762         if (block(_y) != setup->span.y) {
763            flush_spans(setup);
764            setup->span.y = block(_y);
765         }
766
767         setup->span.left[_y&1] = left;
768         setup->span.right[_y&1] = right;
769      }
770   }
771
772
773   /* save the values so that emaj can be restarted:
774    */
775   eleft->sx += lines * eleft->dxdy;
776   eright->sx += lines * eright->dxdy;
777   eleft->sy += lines;
778   eright->sy += lines;
779}
780
781
782/**
783 * Recalculate prim's determinant.  This is needed as we don't have
784 * get this information through the vbuf_render interface & we must
785 * calculate it here.
786 */
787static float
788calc_det(const float (*v0)[4],
789         const float (*v1)[4],
790         const float (*v2)[4])
791{
792   /* edge vectors e = v0 - v2, f = v1 - v2 */
793   const float ex = v0[0][0] - v2[0][0];
794   const float ey = v0[0][1] - v2[0][1];
795   const float fx = v1[0][0] - v2[0][0];
796   const float fy = v1[0][1] - v2[0][1];
797
798   /* det = cross(e,f).z */
799   return ex * fy - ey * fx;
800}
801
802
803/**
804 * Do setup for triangle rasterization, then render the triangle.
805 */
806void
807sp_setup_tri(struct setup_context *setup,
808             const float (*v0)[4],
809             const float (*v1)[4],
810             const float (*v2)[4])
811{
812   float det;
813   uint layer = 0;
814   unsigned viewport_index = 0;
815#if DEBUG_VERTS
816   debug_printf("Setup triangle:\n");
817   print_vertex(setup, v0);
818   print_vertex(setup, v1);
819   print_vertex(setup, v2);
820#endif
821
822   if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
823      return;
824
825   det = calc_det(v0, v1, v2);
826   /*
827   debug_printf("%s\n", __FUNCTION__ );
828   */
829
830#if DEBUG_FRAGS
831   setup->numFragsEmitted = 0;
832   setup->numFragsWritten = 0;
833#endif
834
835   if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
836      return;
837
838   setup_tri_coefficients( setup );
839   setup_tri_edges( setup );
840
841   assert(setup->softpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
842
843   setup->span.y = 0;
844   setup->span.right[0] = 0;
845   setup->span.right[1] = 0;
846   /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
847   if (setup->softpipe->layer_slot > 0) {
848      layer = *(unsigned *)setup->vprovoke[setup->softpipe->layer_slot];
849      layer = MIN2(layer, setup->max_layer);
850   }
851   setup->quad[0].input.layer = layer;
852
853   if (setup->softpipe->viewport_index_slot > 0) {
854      unsigned *udata = (unsigned*)v0[setup->softpipe->viewport_index_slot];
855      viewport_index = sp_clamp_viewport_idx(*udata);
856   }
857   setup->quad[0].input.viewport_index = viewport_index;
858
859   /*   init_constant_attribs( setup ); */
860
861   if (setup->oneoverarea < 0.0) {
862      /* emaj on left:
863       */
864      subtriangle(setup, &setup->emaj, &setup->ebot, setup->ebot.lines, viewport_index);
865      subtriangle(setup, &setup->emaj, &setup->etop, setup->etop.lines, viewport_index);
866   }
867   else {
868      /* emaj on right:
869       */
870      subtriangle(setup, &setup->ebot, &setup->emaj, setup->ebot.lines, viewport_index);
871      subtriangle(setup, &setup->etop, &setup->emaj, setup->etop.lines, viewport_index);
872   }
873
874   flush_spans( setup );
875
876   if (setup->softpipe->active_statistics_queries) {
877      setup->softpipe->pipeline_statistics.c_primitives++;
878   }
879
880#if DEBUG_FRAGS
881   printf("Tri: %u frags emitted, %u written\n",
882          setup->numFragsEmitted,
883          setup->numFragsWritten);
884#endif
885}
886
887
888/* Apply cylindrical wrapping to v0, v1 coordinates, if enabled.
889 * Input coordinates must be in [0, 1] range, otherwise results are undefined.
890 */
891static void
892line_apply_cylindrical_wrap(float v0,
893                            float v1,
894                            uint cylindrical_wrap,
895                            float output[2])
896{
897   if (cylindrical_wrap) {
898      float delta;
899
900      delta = v1 - v0;
901      if (delta > 0.5f) {
902         v0 += 1.0f;
903      }
904      else if (delta < -0.5f) {
905         v1 += 1.0f;
906      }
907   }
908
909   output[0] = v0;
910   output[1] = v1;
911}
912
913
914/**
915 * Compute a0, dadx and dady for a linearly interpolated coefficient,
916 * for a line.
917 * v[0] and v[1] are vmin and vmax, respectively.
918 */
919static void
920line_linear_coeff(const struct setup_context *setup,
921                  struct tgsi_interp_coef *coef,
922                  uint i,
923                  const float v[2])
924{
925   const float da = v[1] - v[0];
926   const float dadx = da * setup->emaj.dx * setup->oneoverarea;
927   const float dady = da * setup->emaj.dy * setup->oneoverarea;
928   coef->dadx[i] = dadx;
929   coef->dady[i] = dady;
930   coef->a0[i] = (v[0] -
931                  (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
932                   dady * (setup->vmin[0][1] - setup->pixel_offset)));
933}
934
935
936/**
937 * Compute a0, dadx and dady for a perspective-corrected interpolant,
938 * for a line.
939 * v[0] and v[1] are vmin and vmax, respectively.
940 */
941static void
942line_persp_coeff(const struct setup_context *setup,
943                 struct tgsi_interp_coef *coef,
944                 uint i,
945                 const float v[2])
946{
947   const float a0 = v[0] * setup->vmin[0][3];
948   const float a1 = v[1] * setup->vmax[0][3];
949   const float da = a1 - a0;
950   const float dadx = da * setup->emaj.dx * setup->oneoverarea;
951   const float dady = da * setup->emaj.dy * setup->oneoverarea;
952   coef->dadx[i] = dadx;
953   coef->dady[i] = dady;
954   coef->a0[i] = (a0 -
955                  (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
956                   dady * (setup->vmin[0][1] - setup->pixel_offset)));
957}
958
959
960/**
961 * Compute the setup->coef[] array dadx, dady, a0 values.
962 * Must be called after setup->vmin,vmax are initialized.
963 */
964static boolean
965setup_line_coefficients(struct setup_context *setup,
966                        const float (*v0)[4],
967                        const float (*v1)[4])
968{
969   struct softpipe_context *softpipe = setup->softpipe;
970   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
971   const struct sp_setup_info *sinfo = &softpipe->setup_info;
972   uint fragSlot;
973   float area;
974   float v[2];
975
976   assert(sinfo->valid);
977
978   /* use setup->vmin, vmax to point to vertices */
979   if (softpipe->rasterizer->flatshade_first)
980      setup->vprovoke = v0;
981   else
982      setup->vprovoke = v1;
983   setup->vmin = v0;
984   setup->vmax = v1;
985
986   setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
987   setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
988
989   /* NOTE: this is not really area but something proportional to it */
990   area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
991   if (area == 0.0f || util_is_inf_or_nan(area))
992      return FALSE;
993   setup->oneoverarea = 1.0f / area;
994
995   /* z and w are done by linear interpolation:
996    */
997   v[0] = setup->vmin[0][2];
998   v[1] = setup->vmax[0][2];
999   line_linear_coeff(setup, &setup->posCoef, 2, v);
1000
1001   v[0] = setup->vmin[0][3];
1002   v[1] = setup->vmax[0][3];
1003   line_linear_coeff(setup, &setup->posCoef, 3, v);
1004
1005   /* setup interpolation for all the remaining attributes:
1006    */
1007   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
1008      const uint vertSlot = sinfo->attrib[fragSlot].src_index;
1009      uint j;
1010
1011      switch (sinfo->attrib[fragSlot].interp) {
1012      case SP_INTERP_CONSTANT:
1013         for (j = 0; j < TGSI_NUM_CHANNELS; j++)
1014            const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1015         break;
1016      case SP_INTERP_LINEAR:
1017         for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
1018            line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
1019                                        setup->vmax[vertSlot][j],
1020                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
1021                                        v);
1022            line_linear_coeff(setup, &setup->coef[fragSlot], j, v);
1023         }
1024         break;
1025      case SP_INTERP_PERSPECTIVE:
1026         for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
1027            line_apply_cylindrical_wrap(setup->vmin[vertSlot][j],
1028                                        setup->vmax[vertSlot][j],
1029                                        fsInfo->input_cylindrical_wrap[fragSlot] & (1 << j),
1030                                        v);
1031            line_persp_coeff(setup, &setup->coef[fragSlot], j, v);
1032         }
1033         break;
1034      case SP_INTERP_POS:
1035         setup_fragcoord_coeff(setup, fragSlot);
1036         break;
1037      default:
1038         assert(0);
1039      }
1040
1041      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1042         /* convert 0 to 1.0 and 1 to -1.0 */
1043         setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
1044         setup->coef[fragSlot].dadx[0] = 0.0;
1045         setup->coef[fragSlot].dady[0] = 0.0;
1046      }
1047   }
1048   return TRUE;
1049}
1050
1051
1052/**
1053 * Plot a pixel in a line segment.
1054 */
1055static inline void
1056plot(struct setup_context *setup, int x, int y)
1057{
1058   const int iy = y & 1;
1059   const int ix = x & 1;
1060   const int quadX = x - ix;
1061   const int quadY = y - iy;
1062   const int mask = (1 << ix) << (2 * iy);
1063
1064   if (quadX != setup->quad[0].input.x0 ||
1065       quadY != setup->quad[0].input.y0)
1066   {
1067      /* flush prev quad, start new quad */
1068
1069      if (setup->quad[0].input.x0 != -1)
1070         clip_emit_quad(setup, &setup->quad[0]);
1071
1072      setup->quad[0].input.x0 = quadX;
1073      setup->quad[0].input.y0 = quadY;
1074      setup->quad[0].inout.mask = 0x0;
1075   }
1076
1077   setup->quad[0].inout.mask |= mask;
1078}
1079
1080
1081/**
1082 * Do setup for line rasterization, then render the line.
1083 * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
1084 * to handle stippling and wide lines.
1085 */
1086void
1087sp_setup_line(struct setup_context *setup,
1088              const float (*v0)[4],
1089              const float (*v1)[4])
1090{
1091   int x0 = (int) v0[0][0];
1092   int x1 = (int) v1[0][0];
1093   int y0 = (int) v0[0][1];
1094   int y1 = (int) v1[0][1];
1095   int dx = x1 - x0;
1096   int dy = y1 - y0;
1097   int xstep, ystep;
1098   uint layer = 0;
1099   unsigned viewport_index = 0;
1100
1101#if DEBUG_VERTS
1102   debug_printf("Setup line:\n");
1103   print_vertex(setup, v0);
1104   print_vertex(setup, v1);
1105#endif
1106
1107   if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
1108      return;
1109
1110   if (dx == 0 && dy == 0)
1111      return;
1112
1113   if (!setup_line_coefficients(setup, v0, v1))
1114      return;
1115
1116   assert(v0[0][0] < 1.0e9);
1117   assert(v0[0][1] < 1.0e9);
1118   assert(v1[0][0] < 1.0e9);
1119   assert(v1[0][1] < 1.0e9);
1120
1121   if (dx < 0) {
1122      dx = -dx;   /* make positive */
1123      xstep = -1;
1124   }
1125   else {
1126      xstep = 1;
1127   }
1128
1129   if (dy < 0) {
1130      dy = -dy;   /* make positive */
1131      ystep = -1;
1132   }
1133   else {
1134      ystep = 1;
1135   }
1136
1137   assert(dx >= 0);
1138   assert(dy >= 0);
1139   assert(setup->softpipe->reduced_prim == PIPE_PRIM_LINES);
1140
1141   setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
1142   setup->quad[0].inout.mask = 0x0;
1143   if (setup->softpipe->layer_slot > 0) {
1144      layer = *(unsigned *)setup->vprovoke[setup->softpipe->layer_slot];
1145      layer = MIN2(layer, setup->max_layer);
1146   }
1147   setup->quad[0].input.layer = layer;
1148
1149   if (setup->softpipe->viewport_index_slot > 0) {
1150      unsigned *udata = (unsigned*)setup->vprovoke[setup->softpipe->viewport_index_slot];
1151      viewport_index = sp_clamp_viewport_idx(*udata);
1152   }
1153   setup->quad[0].input.viewport_index = viewport_index;
1154
1155   /* XXX temporary: set coverage to 1.0 so the line appears
1156    * if AA mode happens to be enabled.
1157    */
1158   setup->quad[0].input.coverage[0] =
1159   setup->quad[0].input.coverage[1] =
1160   setup->quad[0].input.coverage[2] =
1161   setup->quad[0].input.coverage[3] = 1.0;
1162
1163   if (dx > dy) {
1164      /*** X-major line ***/
1165      int i;
1166      const int errorInc = dy + dy;
1167      int error = errorInc - dx;
1168      const int errorDec = error - dx;
1169
1170      for (i = 0; i < dx; i++) {
1171         plot(setup, x0, y0);
1172
1173         x0 += xstep;
1174         if (error < 0) {
1175            error += errorInc;
1176         }
1177         else {
1178            error += errorDec;
1179            y0 += ystep;
1180         }
1181      }
1182   }
1183   else {
1184      /*** Y-major line ***/
1185      int i;
1186      const int errorInc = dx + dx;
1187      int error = errorInc - dy;
1188      const int errorDec = error - dy;
1189
1190      for (i = 0; i < dy; i++) {
1191         plot(setup, x0, y0);
1192
1193         y0 += ystep;
1194         if (error < 0) {
1195            error += errorInc;
1196         }
1197         else {
1198            error += errorDec;
1199            x0 += xstep;
1200         }
1201      }
1202   }
1203
1204   /* draw final quad */
1205   if (setup->quad[0].inout.mask) {
1206      clip_emit_quad(setup, &setup->quad[0]);
1207   }
1208}
1209
1210
1211static void
1212point_persp_coeff(const struct setup_context *setup,
1213                  const float (*vert)[4],
1214                  struct tgsi_interp_coef *coef,
1215                  uint vertSlot, uint i)
1216{
1217   assert(i <= 3);
1218   coef->dadx[i] = 0.0F;
1219   coef->dady[i] = 0.0F;
1220   coef->a0[i] = vert[vertSlot][i] * vert[0][3];
1221}
1222
1223
1224/**
1225 * Do setup for point rasterization, then render the point.
1226 * Round or square points...
1227 * XXX could optimize a lot for 1-pixel points.
1228 */
1229void
1230sp_setup_point(struct setup_context *setup,
1231               const float (*v0)[4])
1232{
1233   struct softpipe_context *softpipe = setup->softpipe;
1234   const struct tgsi_shader_info *fsInfo = &setup->softpipe->fs_variant->info;
1235   const int sizeAttr = setup->softpipe->psize_slot;
1236   const float size
1237      = sizeAttr > 0 ? v0[sizeAttr][0]
1238      : setup->softpipe->rasterizer->point_size;
1239   const float halfSize = 0.5F * size;
1240   const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth;
1241   const float x = v0[0][0];  /* Note: data[0] is always position */
1242   const float y = v0[0][1];
1243   const struct sp_setup_info *sinfo = &softpipe->setup_info;
1244   uint fragSlot;
1245   uint layer = 0;
1246   unsigned viewport_index = 0;
1247#if DEBUG_VERTS
1248   debug_printf("Setup point:\n");
1249   print_vertex(setup, v0);
1250#endif
1251
1252   assert(sinfo->valid);
1253
1254   if (setup->softpipe->no_rast || setup->softpipe->rasterizer->rasterizer_discard)
1255      return;
1256
1257   assert(setup->softpipe->reduced_prim == PIPE_PRIM_POINTS);
1258
1259   if (setup->softpipe->layer_slot > 0) {
1260      layer = *(unsigned *)v0[setup->softpipe->layer_slot];
1261      layer = MIN2(layer, setup->max_layer);
1262   }
1263   setup->quad[0].input.layer = layer;
1264
1265   if (setup->softpipe->viewport_index_slot > 0) {
1266      unsigned *udata = (unsigned*)v0[setup->softpipe->viewport_index_slot];
1267      viewport_index = sp_clamp_viewport_idx(*udata);
1268   }
1269   setup->quad[0].input.viewport_index = viewport_index;
1270
1271   /* For points, all interpolants are constant-valued.
1272    * However, for point sprites, we'll need to setup texcoords appropriately.
1273    * XXX: which coefficients are the texcoords???
1274    * We may do point sprites as textured quads...
1275    *
1276    * KW: We don't know which coefficients are texcoords - ultimately
1277    * the choice of what interpolation mode to use for each attribute
1278    * should be determined by the fragment program, using
1279    * per-attribute declaration statements that include interpolation
1280    * mode as a parameter.  So either the fragment program will have
1281    * to be adjusted for pointsprite vs normal point behaviour, or
1282    * otherwise a special interpolation mode will have to be defined
1283    * which matches the required behaviour for point sprites.  But -
1284    * the latter is not a feature of normal hardware, and as such
1285    * probably should be ruled out on that basis.
1286    */
1287   setup->vprovoke = v0;
1288
1289   /* setup Z, W */
1290   const_coeff(setup, &setup->posCoef, 0, 2);
1291   const_coeff(setup, &setup->posCoef, 0, 3);
1292
1293   for (fragSlot = 0; fragSlot < fsInfo->num_inputs; fragSlot++) {
1294      const uint vertSlot = sinfo->attrib[fragSlot].src_index;
1295      uint j;
1296
1297      switch (sinfo->attrib[fragSlot].interp) {
1298      case SP_INTERP_CONSTANT:
1299         /* fall-through */
1300      case SP_INTERP_LINEAR:
1301         for (j = 0; j < TGSI_NUM_CHANNELS; j++)
1302            const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
1303         break;
1304      case SP_INTERP_PERSPECTIVE:
1305         for (j = 0; j < TGSI_NUM_CHANNELS; j++)
1306            point_persp_coeff(setup, setup->vprovoke,
1307                              &setup->coef[fragSlot], vertSlot, j);
1308         break;
1309      case SP_INTERP_POS:
1310         setup_fragcoord_coeff(setup, fragSlot);
1311         break;
1312      default:
1313         assert(0);
1314      }
1315
1316      if (fsInfo->input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1317         /* convert 0 to 1.0 and 1 to -1.0 */
1318         setup->coef[fragSlot].a0[0] = setup->facing * -2.0f + 1.0f;
1319         setup->coef[fragSlot].dadx[0] = 0.0;
1320         setup->coef[fragSlot].dady[0] = 0.0;
1321      }
1322   }
1323
1324
1325   if (halfSize <= 0.5 && !round) {
1326      /* special case for 1-pixel points */
1327      const int ix = ((int) x) & 1;
1328      const int iy = ((int) y) & 1;
1329      setup->quad[0].input.x0 = (int) x - ix;
1330      setup->quad[0].input.y0 = (int) y - iy;
1331      setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
1332      clip_emit_quad(setup, &setup->quad[0]);
1333   }
1334   else {
1335      if (round) {
1336         /* rounded points */
1337         const int ixmin = block((int) (x - halfSize));
1338         const int ixmax = block((int) (x + halfSize));
1339         const int iymin = block((int) (y - halfSize));
1340         const int iymax = block((int) (y + halfSize));
1341         const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
1342         const float rmax = halfSize + 0.7071F;
1343         const float rmin2 = MAX2(0.0F, rmin * rmin);
1344         const float rmax2 = rmax * rmax;
1345         const float cscale = 1.0F / (rmax2 - rmin2);
1346         int ix, iy;
1347
1348         for (iy = iymin; iy <= iymax; iy += 2) {
1349            for (ix = ixmin; ix <= ixmax; ix += 2) {
1350               float dx, dy, dist2, cover;
1351
1352               setup->quad[0].inout.mask = 0x0;
1353
1354               dx = (ix + 0.5f) - x;
1355               dy = (iy + 0.5f) - y;
1356               dist2 = dx * dx + dy * dy;
1357               if (dist2 <= rmax2) {
1358                  cover = 1.0F - (dist2 - rmin2) * cscale;
1359                  setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1360                  setup->quad[0].inout.mask |= MASK_TOP_LEFT;
1361               }
1362
1363               dx = (ix + 1.5f) - x;
1364               dy = (iy + 0.5f) - y;
1365               dist2 = dx * dx + dy * dy;
1366               if (dist2 <= rmax2) {
1367                  cover = 1.0F - (dist2 - rmin2) * cscale;
1368                  setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1369                  setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
1370               }
1371
1372               dx = (ix + 0.5f) - x;
1373               dy = (iy + 1.5f) - y;
1374               dist2 = dx * dx + dy * dy;
1375               if (dist2 <= rmax2) {
1376                  cover = 1.0F - (dist2 - rmin2) * cscale;
1377                  setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1378                  setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
1379               }
1380
1381               dx = (ix + 1.5f) - x;
1382               dy = (iy + 1.5f) - y;
1383               dist2 = dx * dx + dy * dy;
1384               if (dist2 <= rmax2) {
1385                  cover = 1.0F - (dist2 - rmin2) * cscale;
1386                  setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1387                  setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
1388               }
1389
1390               if (setup->quad[0].inout.mask) {
1391                  setup->quad[0].input.x0 = ix;
1392                  setup->quad[0].input.y0 = iy;
1393                  clip_emit_quad(setup, &setup->quad[0]);
1394               }
1395            }
1396         }
1397      }
1398      else {
1399         /* square points */
1400         const int xmin = (int) (x + 0.75 - halfSize);
1401         const int ymin = (int) (y + 0.25 - halfSize);
1402         const int xmax = xmin + (int) size;
1403         const int ymax = ymin + (int) size;
1404         /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1405         const int ixmin = block(xmin);
1406         const int ixmax = block(xmax - 1);
1407         const int iymin = block(ymin);
1408         const int iymax = block(ymax - 1);
1409         int ix, iy;
1410
1411         /*
1412         debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1413         */
1414         for (iy = iymin; iy <= iymax; iy += 2) {
1415            uint rowMask = 0xf;
1416            if (iy < ymin) {
1417               /* above the top edge */
1418               rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1419            }
1420            if (iy + 1 >= ymax) {
1421               /* below the bottom edge */
1422               rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1423            }
1424
1425            for (ix = ixmin; ix <= ixmax; ix += 2) {
1426               uint mask = rowMask;
1427
1428               if (ix < xmin) {
1429                  /* fragment is past left edge of point, turn off left bits */
1430                  mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1431               }
1432               if (ix + 1 >= xmax) {
1433                  /* past the right edge */
1434                  mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1435               }
1436
1437               setup->quad[0].inout.mask = mask;
1438               setup->quad[0].input.x0 = ix;
1439               setup->quad[0].input.y0 = iy;
1440               clip_emit_quad(setup, &setup->quad[0]);
1441            }
1442         }
1443      }
1444   }
1445}
1446
1447
1448/**
1449 * Called by vbuf code just before we start buffering primitives.
1450 */
1451void
1452sp_setup_prepare(struct setup_context *setup)
1453{
1454   struct softpipe_context *sp = setup->softpipe;
1455   int i;
1456   unsigned max_layer = ~0;
1457   if (sp->dirty) {
1458      softpipe_update_derived(sp, sp->reduced_api_prim);
1459   }
1460
1461   /* Note: nr_attrs is only used for debugging (vertex printing) */
1462   setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw);
1463
1464   /*
1465    * Determine how many layers the fb has (used for clamping layer value).
1466    * OpenGL (but not d3d10) permits different amount of layers per rt, however
1467    * results are undefined if layer exceeds the amount of layers of ANY
1468    * attachment hence don't need separate per cbuf and zsbuf max.
1469    */
1470   for (i = 0; i < setup->softpipe->framebuffer.nr_cbufs; i++) {
1471      struct pipe_surface *cbuf = setup->softpipe->framebuffer.cbufs[i];
1472      if (cbuf) {
1473         max_layer = MIN2(max_layer,
1474                          cbuf->u.tex.last_layer - cbuf->u.tex.first_layer);
1475
1476      }
1477   }
1478
1479   setup->max_layer = max_layer;
1480
1481   sp->quad.first->begin( sp->quad.first );
1482
1483   if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
1484       sp->rasterizer->fill_front == PIPE_POLYGON_MODE_FILL &&
1485       sp->rasterizer->fill_back == PIPE_POLYGON_MODE_FILL) {
1486      /* we'll do culling */
1487      setup->cull_face = sp->rasterizer->cull_face;
1488   }
1489   else {
1490      /* 'draw' will do culling */
1491      setup->cull_face = PIPE_FACE_NONE;
1492   }
1493}
1494
1495
1496void
1497sp_setup_destroy_context(struct setup_context *setup)
1498{
1499   FREE( setup );
1500}
1501
1502
1503/**
1504 * Create a new primitive setup/render stage.
1505 */
1506struct setup_context *
1507sp_setup_create_context(struct softpipe_context *softpipe)
1508{
1509   struct setup_context *setup = CALLOC_STRUCT(setup_context);
1510   unsigned i;
1511
1512   setup->softpipe = softpipe;
1513
1514   for (i = 0; i < MAX_QUADS; i++) {
1515      setup->quad[i].coef = setup->coef;
1516      setup->quad[i].posCoef = &setup->posCoef;
1517   }
1518
1519   setup->span.left[0] = 1000000;     /* greater than right[0] */
1520   setup->span.left[1] = 1000000;     /* greater than right[1] */
1521
1522   return setup;
1523}
1524