lp_setup.c revision 26c78a4968a3c10ca006699d240150e6aa4b4250
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * \brief  Primitive rasterization/rendering (points, lines, triangles)
30 *
31 * \author  Keith Whitwell <keith@tungstengraphics.com>
32 * \author  Brian Paul
33 */
34
35#include "lp_context.h"
36#include "lp_quad.h"
37#include "lp_setup.h"
38#include "lp_state.h"
39#include "draw/draw_context.h"
40#include "draw/draw_private.h"
41#include "draw/draw_vertex.h"
42#include "pipe/p_shader_tokens.h"
43#include "pipe/p_thread.h"
44#include "util/u_format.h"
45#include "util/u_math.h"
46#include "util/u_memory.h"
47#include "lp_bld_debug.h"
48#include "lp_tile_cache.h"
49#include "lp_tile_soa.h"
50
51
52#define DEBUG_VERTS 0
53#define DEBUG_FRAGS 0
54
55/**
56 * Triangle edge info
57 */
58struct edge {
59   float dx;		/**< X(v1) - X(v0), used only during setup */
60   float dy;		/**< Y(v1) - Y(v0), used only during setup */
61   float dxdy;		/**< dx/dy */
62   float sx, sy;	/**< first sample point coord */
63   int lines;		/**< number of lines on this edge */
64};
65
66
67#define MAX_QUADS 16
68
69
70/**
71 * Triangle setup info (derived from draw_stage).
72 * Also used for line drawing (taking some liberties).
73 */
74struct setup_context {
75   struct llvmpipe_context *llvmpipe;
76
77   /* Vertices are just an array of floats making up each attribute in
78    * turn.  Currently fixed at 4 floats, but should change in time.
79    * Codegen will help cope with this.
80    */
81   const float (*vmax)[4];
82   const float (*vmid)[4];
83   const float (*vmin)[4];
84   const float (*vprovoke)[4];
85
86   struct edge ebot;
87   struct edge etop;
88   struct edge emaj;
89
90   float oneoverarea;
91   int facing;
92
93   float pixel_offset;
94
95   struct quad_header quad[MAX_QUADS];
96   struct quad_header *quad_ptrs[MAX_QUADS];
97   unsigned count;
98
99   struct quad_interp_coef coef;
100
101   struct {
102      int left[2];   /**< [0] = row0, [1] = row1 */
103      int right[2];
104      int y;
105   } span;
106
107#if DEBUG_FRAGS
108   uint numFragsEmitted;  /**< per primitive */
109   uint numFragsWritten;  /**< per primitive */
110#endif
111
112   unsigned winding;		/* which winding to cull */
113};
114
115
116
117/**
118 * Execute fragment shader for the four fragments in the quad.
119 */
120PIPE_ALIGN_STACK
121static void
122shade_quads(struct llvmpipe_context *llvmpipe,
123            struct quad_header *quads[],
124            unsigned nr)
125{
126   struct lp_fragment_shader *fs = llvmpipe->fs;
127   struct quad_header *quad = quads[0];
128   const unsigned x = quad->input.x0;
129   const unsigned y = quad->input.y0;
130   uint8_t *tile;
131   uint8_t *color;
132   void *depth;
133   PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS];
134   unsigned chan_index;
135   unsigned q;
136
137   assert(fs->current);
138   if(!fs->current)
139      return;
140
141   /* Sanity checks */
142   assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH);
143   assert(x % TILE_VECTOR_WIDTH == 0);
144   assert(y % TILE_VECTOR_HEIGHT == 0);
145   for (q = 0; q < nr; ++q) {
146      assert(quads[q]->input.x0 == x + q*2);
147      assert(quads[q]->input.y0 == y);
148   }
149
150   /* mask */
151   for (q = 0; q < 4; ++q)
152      for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
153         mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
154
155   /* color buffer */
156   if(llvmpipe->framebuffer.nr_cbufs >= 1 &&
157      llvmpipe->framebuffer.cbufs[0]) {
158      tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
159      color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
160   }
161   else
162      color = NULL;
163
164   /* depth buffer */
165   if(llvmpipe->zsbuf_map) {
166      assert((x % 2) == 0);
167      assert((y % 2) == 0);
168      depth = llvmpipe->zsbuf_map +
169              y*llvmpipe->zsbuf_transfer->stride +
170              2*x*util_format_get_blocksize(llvmpipe->zsbuf_transfer->texture->format);
171   }
172   else
173      depth = NULL;
174
175   /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
176   assert(lp_check_alignment(mask, 16));
177
178   assert(lp_check_alignment(depth, 16));
179   assert(lp_check_alignment(color, 16));
180   assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16));
181
182   /* run shader */
183   fs->current->jit_function( &llvmpipe->jit_context,
184                              x, y,
185                              quad->coef->a0,
186                              quad->coef->dadx,
187                              quad->coef->dady,
188                              &mask[0][0],
189                              color,
190                              depth);
191}
192
193
194
195
196/**
197 * Do triangle cull test using tri determinant (sign indicates orientation)
198 * \return true if triangle is to be culled.
199 */
200static INLINE boolean
201cull_tri(const struct setup_context *setup, float det)
202{
203   if (det != 0) {
204      /* if (det < 0 then Z points toward camera and triangle is
205       * counter-clockwise winding.
206       */
207      unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
208
209      if ((winding & setup->winding) == 0)
210	 return FALSE;
211   }
212
213   /* Culled:
214    */
215   return TRUE;
216}
217
218
219
220/**
221 * Clip setup->quad against the scissor/surface bounds.
222 */
223static INLINE void
224quad_clip( struct setup_context *setup, struct quad_header *quad )
225{
226   const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
227   const int minx = (int) cliprect->minx;
228   const int maxx = (int) cliprect->maxx;
229   const int miny = (int) cliprect->miny;
230   const int maxy = (int) cliprect->maxy;
231
232   if (quad->input.x0 >= maxx ||
233       quad->input.y0 >= maxy ||
234       quad->input.x0 + 1 < minx ||
235       quad->input.y0 + 1 < miny) {
236      /* totally clipped */
237      quad->inout.mask = 0x0;
238      return;
239   }
240   if (quad->input.x0 < minx)
241      quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
242   if (quad->input.y0 < miny)
243      quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
244   if (quad->input.x0 == maxx - 1)
245      quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
246   if (quad->input.y0 == maxy - 1)
247      quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
248}
249
250
251
252/**
253 * Given an X or Y coordinate, return the block/quad coordinate that it
254 * belongs to.
255 */
256static INLINE int block( int x )
257{
258   return x & ~(2-1);
259}
260
261static INLINE int block_x( int x )
262{
263   return x & ~(TILE_VECTOR_WIDTH - 1);
264}
265
266
267/**
268 * Emit a quad (pass to next stage) with clipping.
269 */
270static INLINE void
271clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
272{
273   quad_clip( setup, quad );
274
275   if (quad->inout.mask) {
276      struct llvmpipe_context *lp = setup->llvmpipe;
277
278#if 1
279      /* XXX: The blender expects 4 quads. This is far from efficient, but
280       * until we codegenerate single-quad variants of the fragment pipeline
281       * we need this hack. */
282      const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
283      struct quad_header quads[4];
284      struct quad_header *quad_ptrs[4];
285      int x0 = block_x(quad->input.x0);
286      unsigned i;
287
288      assert(nr_quads == 4);
289
290      for(i = 0; i < nr_quads; ++i) {
291         int x = x0 + 2*i;
292         if(x == quad->input.x0)
293            memcpy(&quads[i], quad, sizeof quads[i]);
294         else {
295            memset(&quads[i], 0, sizeof quads[i]);
296            quads[i].input.x0 = x;
297            quads[i].input.y0 = quad->input.y0;
298            quads[i].coef = quad->coef;
299         }
300         quad_ptrs[i] = &quads[i];
301      }
302
303      shade_quads( lp, quad_ptrs, nr_quads );
304#else
305      shade_quads( lp, &quad, 1 );
306#endif
307   }
308}
309
310
311/**
312 * Render a horizontal span of quads
313 */
314static void flush_spans( struct setup_context *setup )
315{
316   const int step = TILE_VECTOR_WIDTH;
317   const int xleft0 = setup->span.left[0];
318   const int xleft1 = setup->span.left[1];
319   const int xright0 = setup->span.right[0];
320   const int xright1 = setup->span.right[1];
321
322
323   int minleft = block_x(MIN2(xleft0, xleft1));
324   int maxright = MAX2(xright0, xright1);
325   int x;
326
327   for (x = minleft; x < maxright; x += step) {
328      unsigned skip_left0 = CLAMP(xleft0 - x, 0, step);
329      unsigned skip_left1 = CLAMP(xleft1 - x, 0, step);
330      unsigned skip_right0 = CLAMP(x + step - xright0, 0, step);
331      unsigned skip_right1 = CLAMP(x + step - xright1, 0, step);
332      unsigned lx = x;
333      const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
334      unsigned q = 0;
335
336      unsigned skipmask_left0 = (1U << skip_left0) - 1U;
337      unsigned skipmask_left1 = (1U << skip_left1) - 1U;
338
339      /* These calculations fail when step == 32 and skip_right == 0.
340       */
341      unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0);
342      unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1);
343
344      unsigned mask0 = ~skipmask_left0 & ~skipmask_right0;
345      unsigned mask1 = ~skipmask_left1 & ~skipmask_right1;
346
347      if (mask0 | mask1) {
348         for(q = 0; q < nr_quads; ++q) {
349            unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2);
350            setup->quad[q].input.x0 = lx;
351            setup->quad[q].input.y0 = setup->span.y;
352            setup->quad[q].inout.mask = quadmask;
353            setup->quad_ptrs[q] = &setup->quad[q];
354            mask0 >>= 2;
355            mask1 >>= 2;
356            lx += 2;
357         }
358         assert(!(mask0 | mask1));
359
360         shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads );
361      }
362   }
363
364
365   setup->span.y = 0;
366   setup->span.right[0] = 0;
367   setup->span.right[1] = 0;
368   setup->span.left[0] = 1000000;     /* greater than right[0] */
369   setup->span.left[1] = 1000000;     /* greater than right[1] */
370}
371
372
373#if DEBUG_VERTS
374static void print_vertex(const struct setup_context *setup,
375                         const float (*v)[4])
376{
377   int i;
378   debug_printf("   Vertex: (%p)\n", v);
379   for (i = 0; i < setup->quad[0].nr_attrs; i++) {
380      debug_printf("     %d: %f %f %f %f\n",  i,
381              v[i][0], v[i][1], v[i][2], v[i][3]);
382      if (util_is_inf_or_nan(v[i][0])) {
383         debug_printf("   NaN!\n");
384      }
385   }
386}
387#endif
388
389/**
390 * Sort the vertices from top to bottom order, setting up the triangle
391 * edge fields (ebot, emaj, etop).
392 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise
393 */
394static boolean setup_sort_vertices( struct setup_context *setup,
395                                    float det,
396                                    const float (*v0)[4],
397                                    const float (*v1)[4],
398                                    const float (*v2)[4] )
399{
400   setup->vprovoke = v2;
401
402   /* determine bottom to top order of vertices */
403   {
404      float y0 = v0[0][1];
405      float y1 = v1[0][1];
406      float y2 = v2[0][1];
407      if (y0 <= y1) {
408	 if (y1 <= y2) {
409	    /* y0<=y1<=y2 */
410	    setup->vmin = v0;
411	    setup->vmid = v1;
412	    setup->vmax = v2;
413	 }
414	 else if (y2 <= y0) {
415	    /* y2<=y0<=y1 */
416	    setup->vmin = v2;
417	    setup->vmid = v0;
418	    setup->vmax = v1;
419	 }
420	 else {
421	    /* y0<=y2<=y1 */
422	    setup->vmin = v0;
423	    setup->vmid = v2;
424	    setup->vmax = v1;
425	 }
426      }
427      else {
428	 if (y0 <= y2) {
429	    /* y1<=y0<=y2 */
430	    setup->vmin = v1;
431	    setup->vmid = v0;
432	    setup->vmax = v2;
433	 }
434	 else if (y2 <= y1) {
435	    /* y2<=y1<=y0 */
436	    setup->vmin = v2;
437	    setup->vmid = v1;
438	    setup->vmax = v0;
439	 }
440	 else {
441	    /* y1<=y2<=y0 */
442	    setup->vmin = v1;
443	    setup->vmid = v2;
444	    setup->vmax = v0;
445	 }
446      }
447   }
448
449   setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0];
450   setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1];
451   setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
452   setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
453   setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0];
454   setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1];
455
456   /*
457    * Compute triangle's area.  Use 1/area to compute partial
458    * derivatives of attributes later.
459    *
460    * The area will be the same as prim->det, but the sign may be
461    * different depending on how the vertices get sorted above.
462    *
463    * To determine whether the primitive is front or back facing we
464    * use the prim->det value because its sign is correct.
465    */
466   {
467      const float area = (setup->emaj.dx * setup->ebot.dy -
468			    setup->ebot.dx * setup->emaj.dy);
469
470      setup->oneoverarea = 1.0f / area;
471
472      /*
473      debug_printf("%s one-over-area %f  area %f  det %f\n",
474                   __FUNCTION__, setup->oneoverarea, area, det );
475      */
476      if (util_is_inf_or_nan(setup->oneoverarea))
477         return FALSE;
478   }
479
480   /* We need to know if this is a front or back-facing triangle for:
481    *  - the GLSL gl_FrontFacing fragment attribute (bool)
482    *  - two-sided stencil test
483    */
484   setup->facing =
485      ((det > 0.0) ^
486       (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW));
487
488   /* Prepare pixel offset for rasterisation:
489    *  - pixel center (0.5, 0.5) for GL, or
490    *  - assume (0.0, 0.0) for other APIs.
491    */
492   if (setup->llvmpipe->rasterizer->gl_rasterization_rules) {
493      setup->pixel_offset = 0.5f;
494   } else {
495      setup->pixel_offset = 0.0f;
496   }
497
498   return TRUE;
499}
500
501
502/**
503 * Compute a0, dadx and dady for a linearly interpolated coefficient,
504 * for a triangle.
505 */
506static void tri_pos_coeff( struct setup_context *setup,
507                           uint vertSlot, unsigned i)
508{
509   float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
510   float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
511   float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
512   float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
513   float dadx = a * setup->oneoverarea;
514   float dady = b * setup->oneoverarea;
515
516   assert(i <= 3);
517
518   setup->coef.dadx[0][i] = dadx;
519   setup->coef.dady[0][i] = dady;
520
521   /* calculate a0 as the value which would be sampled for the
522    * fragment at (0,0), taking into account that we want to sample at
523    * pixel centers, in other words (pixel_offset, pixel_offset).
524    *
525    * this is neat but unfortunately not a good way to do things for
526    * triangles with very large values of dadx or dady as it will
527    * result in the subtraction and re-addition from a0 of a very
528    * large number, which means we'll end up loosing a lot of the
529    * fractional bits and precision from a0.  the way to fix this is
530    * to define a0 as the sample at a pixel center somewhere near vmin
531    * instead - i'll switch to this later.
532    */
533   setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
534                           (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
535                            dady * (setup->vmin[0][1] - setup->pixel_offset)));
536
537   /*
538   debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
539                slot, "xyzw"[i],
540                setup->coef[slot].a0[i],
541                setup->coef[slot].dadx[i],
542                setup->coef[slot].dady[i]);
543   */
544}
545
546
547/**
548 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
549 * The value value comes from vertex[slot][i].
550 * The result will be put into setup->coef[slot].a0[i].
551 * \param slot  which attribute slot
552 * \param i  which component of the slot (0..3)
553 */
554static void const_pos_coeff( struct setup_context *setup,
555                             uint vertSlot, unsigned i)
556{
557   setup->coef.dadx[0][i] = 0;
558   setup->coef.dady[0][i] = 0;
559
560   /* need provoking vertex info!
561    */
562   setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i];
563}
564
565
566/**
567 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
568 * The value value comes from vertex[slot][i].
569 * The result will be put into setup->coef[slot].a0[i].
570 * \param slot  which attribute slot
571 * \param i  which component of the slot (0..3)
572 */
573static void const_coeff( struct setup_context *setup,
574                         unsigned attrib,
575                         uint vertSlot)
576{
577   unsigned i;
578   for (i = 0; i < NUM_CHANNELS; ++i) {
579      setup->coef.dadx[1 + attrib][i] = 0;
580      setup->coef.dady[1 + attrib][i] = 0;
581
582      /* need provoking vertex info!
583       */
584      setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i];
585   }
586}
587
588
589/**
590 * Compute a0, dadx and dady for a linearly interpolated coefficient,
591 * for a triangle.
592 */
593static void tri_linear_coeff( struct setup_context *setup,
594                              unsigned attrib,
595                              uint vertSlot)
596{
597   unsigned i;
598   for (i = 0; i < NUM_CHANNELS; ++i) {
599      float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
600      float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
601      float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
602      float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
603      float dadx = a * setup->oneoverarea;
604      float dady = b * setup->oneoverarea;
605
606      assert(i <= 3);
607
608      setup->coef.dadx[1 + attrib][i] = dadx;
609      setup->coef.dady[1 + attrib][i] = dady;
610
611      /* calculate a0 as the value which would be sampled for the
612       * fragment at (0,0), taking into account that we want to sample at
613       * pixel centers, in other words (0.5, 0.5).
614       *
615       * this is neat but unfortunately not a good way to do things for
616       * triangles with very large values of dadx or dady as it will
617       * result in the subtraction and re-addition from a0 of a very
618       * large number, which means we'll end up loosing a lot of the
619       * fractional bits and precision from a0.  the way to fix this is
620       * to define a0 as the sample at a pixel center somewhere near vmin
621       * instead - i'll switch to this later.
622       */
623      setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
624                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
625                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
626
627      /*
628      debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
629                   slot, "xyzw"[i],
630                   setup->coef[slot].a0[i],
631                   setup->coef[slot].dadx[i],
632                   setup->coef[slot].dady[i]);
633      */
634   }
635}
636
637
638/**
639 * Compute a0, dadx and dady for a perspective-corrected interpolant,
640 * for a triangle.
641 * We basically multiply the vertex value by 1/w before computing
642 * the plane coefficients (a0, dadx, dady).
643 * Later, when we compute the value at a particular fragment position we'll
644 * divide the interpolated value by the interpolated W at that fragment.
645 */
646static void tri_persp_coeff( struct setup_context *setup,
647                             unsigned attrib,
648                             uint vertSlot)
649{
650   unsigned i;
651   for (i = 0; i < NUM_CHANNELS; ++i) {
652      /* premultiply by 1/w  (v[0][3] is always W):
653       */
654      float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
655      float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
656      float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
657      float botda = mida - mina;
658      float majda = maxa - mina;
659      float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
660      float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
661      float dadx = a * setup->oneoverarea;
662      float dady = b * setup->oneoverarea;
663
664      /*
665      debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
666                   setup->vmin[vertSlot][i],
667                   setup->vmid[vertSlot][i],
668                   setup->vmax[vertSlot][i]
669             );
670      */
671      assert(i <= 3);
672
673      setup->coef.dadx[1 + attrib][i] = dadx;
674      setup->coef.dady[1 + attrib][i] = dady;
675      setup->coef.a0[1 + attrib][i] = (mina -
676                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
677                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
678   }
679}
680
681
682/**
683 * Special coefficient setup for gl_FragCoord.
684 * X and Y are trivial, though Y has to be inverted for OpenGL.
685 * Z and W are copied from posCoef which should have already been computed.
686 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
687 */
688static void
689setup_fragcoord_coeff(struct setup_context *setup, uint slot)
690{
691   /*X*/
692   setup->coef.a0[1 + slot][0] = 0;
693   setup->coef.dadx[1 + slot][0] = 1.0;
694   setup->coef.dady[1 + slot][0] = 0.0;
695   /*Y*/
696   setup->coef.a0[1 + slot][1] = 0.0;
697   setup->coef.dadx[1 + slot][1] = 0.0;
698   setup->coef.dady[1 + slot][1] = 1.0;
699   /*Z*/
700   setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2];
701   setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2];
702   setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2];
703   /*W*/
704   setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3];
705   setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3];
706   setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3];
707}
708
709
710
711/**
712 * Compute the setup->coef[] array dadx, dady, a0 values.
713 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
714 */
715static void setup_tri_coefficients( struct setup_context *setup )
716{
717   struct llvmpipe_context *llvmpipe = setup->llvmpipe;
718   const struct lp_fragment_shader *lpfs = llvmpipe->fs;
719   const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
720   uint fragSlot;
721
722   /* z and w are done by linear interpolation:
723    */
724   tri_pos_coeff(setup, 0, 2);
725   tri_pos_coeff(setup, 0, 3);
726
727   /* setup interpolation for all the remaining attributes:
728    */
729   for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
730      const uint vertSlot = vinfo->attrib[fragSlot].src_index;
731
732      switch (vinfo->attrib[fragSlot].interp_mode) {
733      case INTERP_CONSTANT:
734         const_coeff(setup, fragSlot, vertSlot);
735         break;
736      case INTERP_LINEAR:
737         tri_linear_coeff(setup, fragSlot, vertSlot);
738         break;
739      case INTERP_PERSPECTIVE:
740         tri_persp_coeff(setup, fragSlot, vertSlot);
741         break;
742      case INTERP_POS:
743         setup_fragcoord_coeff(setup, fragSlot);
744         break;
745      default:
746         assert(0);
747      }
748
749      if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
750         setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
751         setup->coef.dadx[1 + fragSlot][0] = 0.0;
752         setup->coef.dady[1 + fragSlot][0] = 0.0;
753      }
754   }
755}
756
757
758
759static void setup_tri_edges( struct setup_context *setup )
760{
761   float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
762   float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
763
764   float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
765   float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
766   float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
767
768   setup->emaj.sy = ceilf(vmin_y);
769   setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
770   setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
771   setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
772
773   setup->etop.sy = ceilf(vmid_y);
774   setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy);
775   setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
776   setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
777
778   setup->ebot.sy = ceilf(vmin_y);
779   setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy);
780   setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
781   setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
782}
783
784
785/**
786 * Render the upper or lower half of a triangle.
787 * Scissoring/cliprect is applied here too.
788 */
789static void subtriangle( struct setup_context *setup,
790			 struct edge *eleft,
791			 struct edge *eright,
792			 unsigned lines )
793{
794   const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect;
795   const int minx = (int) cliprect->minx;
796   const int maxx = (int) cliprect->maxx;
797   const int miny = (int) cliprect->miny;
798   const int maxy = (int) cliprect->maxy;
799   int y, start_y, finish_y;
800   int sy = (int)eleft->sy;
801
802   assert((int)eleft->sy == (int) eright->sy);
803
804   /* clip top/bottom */
805   start_y = sy;
806   if (start_y < miny)
807      start_y = miny;
808
809   finish_y = sy + lines;
810   if (finish_y > maxy)
811      finish_y = maxy;
812
813   start_y -= sy;
814   finish_y -= sy;
815
816   /*
817   debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
818   */
819
820   for (y = start_y; y < finish_y; y++) {
821
822      /* avoid accumulating adds as floats don't have the precision to
823       * accurately iterate large triangle edges that way.  luckily we
824       * can just multiply these days.
825       *
826       * this is all drowned out by the attribute interpolation anyway.
827       */
828      int left = (int)(eleft->sx + y * eleft->dxdy);
829      int right = (int)(eright->sx + y * eright->dxdy);
830
831      /* clip left/right */
832      if (left < minx)
833         left = minx;
834      if (right > maxx)
835         right = maxx;
836
837      if (left < right) {
838         int _y = sy + y;
839         if (block(_y) != setup->span.y) {
840            flush_spans(setup);
841            setup->span.y = block(_y);
842         }
843
844         setup->span.left[_y&1] = left;
845         setup->span.right[_y&1] = right;
846      }
847   }
848
849
850   /* save the values so that emaj can be restarted:
851    */
852   eleft->sx += lines * eleft->dxdy;
853   eright->sx += lines * eright->dxdy;
854   eleft->sy += lines;
855   eright->sy += lines;
856}
857
858
859/**
860 * Recalculate prim's determinant.  This is needed as we don't have
861 * get this information through the vbuf_render interface & we must
862 * calculate it here.
863 */
864static float
865calc_det( const float (*v0)[4],
866          const float (*v1)[4],
867          const float (*v2)[4] )
868{
869   /* edge vectors e = v0 - v2, f = v1 - v2 */
870   const float ex = v0[0][0] - v2[0][0];
871   const float ey = v0[0][1] - v2[0][1];
872   const float fx = v1[0][0] - v2[0][0];
873   const float fy = v1[0][1] - v2[0][1];
874
875   /* det = cross(e,f).z */
876   return ex * fy - ey * fx;
877}
878
879
880/**
881 * Do setup for triangle rasterization, then render the triangle.
882 */
883void llvmpipe_setup_tri( struct setup_context *setup,
884                const float (*v0)[4],
885                const float (*v1)[4],
886                const float (*v2)[4] )
887{
888   float det;
889
890#if DEBUG_VERTS
891   debug_printf("Setup triangle:\n");
892   print_vertex(setup, v0);
893   print_vertex(setup, v1);
894   print_vertex(setup, v2);
895#endif
896
897   if (setup->llvmpipe->no_rast)
898      return;
899
900   det = calc_det(v0, v1, v2);
901   /*
902   debug_printf("%s\n", __FUNCTION__ );
903   */
904
905#if DEBUG_FRAGS
906   setup->numFragsEmitted = 0;
907   setup->numFragsWritten = 0;
908#endif
909
910   if (cull_tri( setup, det ))
911      return;
912
913   if (!setup_sort_vertices( setup, det, v0, v1, v2 ))
914      return;
915   setup_tri_coefficients( setup );
916   setup_tri_edges( setup );
917
918   assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES);
919
920   setup->span.y = 0;
921   setup->span.right[0] = 0;
922   setup->span.right[1] = 0;
923   /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
924
925   /*   init_constant_attribs( setup ); */
926
927   if (setup->oneoverarea < 0.0) {
928      /* emaj on left:
929       */
930      subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
931      subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
932   }
933   else {
934      /* emaj on right:
935       */
936      subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
937      subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
938   }
939
940   flush_spans( setup );
941
942#if DEBUG_FRAGS
943   printf("Tri: %u frags emitted, %u written\n",
944          setup->numFragsEmitted,
945          setup->numFragsWritten);
946#endif
947}
948
949
950
951/**
952 * Compute a0, dadx and dady for a linearly interpolated coefficient,
953 * for a line.
954 */
955static void
956linear_pos_coeff(struct setup_context *setup,
957                 uint vertSlot, uint i)
958{
959   const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
960   const float dadx = da * setup->emaj.dx * setup->oneoverarea;
961   const float dady = da * setup->emaj.dy * setup->oneoverarea;
962   setup->coef.dadx[0][i] = dadx;
963   setup->coef.dady[0][i] = dady;
964   setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
965                           (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
966                            dady * (setup->vmin[0][1] - setup->pixel_offset)));
967}
968
969
970/**
971 * Compute a0, dadx and dady for a linearly interpolated coefficient,
972 * for a line.
973 */
974static void
975line_linear_coeff(struct setup_context *setup,
976                  unsigned attrib,
977                  uint vertSlot)
978{
979   unsigned i;
980   for (i = 0; i < NUM_CHANNELS; ++i) {
981      const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
982      const float dadx = da * setup->emaj.dx * setup->oneoverarea;
983      const float dady = da * setup->emaj.dy * setup->oneoverarea;
984      setup->coef.dadx[1 + attrib][i] = dadx;
985      setup->coef.dady[1 + attrib][i] = dady;
986      setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
987                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
988                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
989   }
990}
991
992
993/**
994 * Compute a0, dadx and dady for a perspective-corrected interpolant,
995 * for a line.
996 */
997static void
998line_persp_coeff(struct setup_context *setup,
999                 unsigned attrib,
1000                 uint vertSlot)
1001{
1002   unsigned i;
1003   for (i = 0; i < NUM_CHANNELS; ++i) {
1004      /* XXX double-check/verify this arithmetic */
1005      const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
1006      const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
1007      const float da = a1 - a0;
1008      const float dadx = da * setup->emaj.dx * setup->oneoverarea;
1009      const float dady = da * setup->emaj.dy * setup->oneoverarea;
1010      setup->coef.dadx[1 + attrib][i] = dadx;
1011      setup->coef.dady[1 + attrib][i] = dady;
1012      setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
1013                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
1014                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
1015   }
1016}
1017
1018
1019/**
1020 * Compute the setup->coef[] array dadx, dady, a0 values.
1021 * Must be called after setup->vmin,vmax are initialized.
1022 */
1023static INLINE boolean
1024setup_line_coefficients(struct setup_context *setup,
1025                        const float (*v0)[4],
1026                        const float (*v1)[4])
1027{
1028   struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1029   const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1030   const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1031   uint fragSlot;
1032   float area;
1033
1034   /* use setup->vmin, vmax to point to vertices */
1035   if (llvmpipe->rasterizer->flatshade_first)
1036      setup->vprovoke = v0;
1037   else
1038      setup->vprovoke = v1;
1039   setup->vmin = v0;
1040   setup->vmax = v1;
1041
1042   setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0];
1043   setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1];
1044
1045   /* NOTE: this is not really area but something proportional to it */
1046   area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy;
1047   if (area == 0.0f || util_is_inf_or_nan(area))
1048      return FALSE;
1049   setup->oneoverarea = 1.0f / area;
1050
1051   /* z and w are done by linear interpolation:
1052    */
1053   linear_pos_coeff(setup, 0, 2);
1054   linear_pos_coeff(setup, 0, 3);
1055
1056   /* setup interpolation for all the remaining attributes:
1057    */
1058   for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1059      const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1060
1061      switch (vinfo->attrib[fragSlot].interp_mode) {
1062      case INTERP_CONSTANT:
1063         const_coeff(setup, fragSlot, vertSlot);
1064         break;
1065      case INTERP_LINEAR:
1066         line_linear_coeff(setup, fragSlot, vertSlot);
1067         break;
1068      case INTERP_PERSPECTIVE:
1069         line_persp_coeff(setup, fragSlot, vertSlot);
1070         break;
1071      case INTERP_POS:
1072         setup_fragcoord_coeff(setup, fragSlot);
1073         break;
1074      default:
1075         assert(0);
1076      }
1077
1078      if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1079         setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
1080         setup->coef.dadx[1 + fragSlot][0] = 0.0;
1081         setup->coef.dady[1 + fragSlot][0] = 0.0;
1082      }
1083   }
1084   return TRUE;
1085}
1086
1087
1088/**
1089 * Plot a pixel in a line segment.
1090 */
1091static INLINE void
1092plot(struct setup_context *setup, int x, int y)
1093{
1094   const int iy = y & 1;
1095   const int ix = x & 1;
1096   const int quadX = x - ix;
1097   const int quadY = y - iy;
1098   const int mask = (1 << ix) << (2 * iy);
1099
1100   if (quadX != setup->quad[0].input.x0 ||
1101       quadY != setup->quad[0].input.y0)
1102   {
1103      /* flush prev quad, start new quad */
1104
1105      if (setup->quad[0].input.x0 != -1)
1106         clip_emit_quad( setup, &setup->quad[0] );
1107
1108      setup->quad[0].input.x0 = quadX;
1109      setup->quad[0].input.y0 = quadY;
1110      setup->quad[0].inout.mask = 0x0;
1111   }
1112
1113   setup->quad[0].inout.mask |= mask;
1114}
1115
1116
1117/**
1118 * Do setup for line rasterization, then render the line.
1119 * Single-pixel width, no stipple, etc.  We rely on the 'draw' module
1120 * to handle stippling and wide lines.
1121 */
1122void
1123llvmpipe_setup_line(struct setup_context *setup,
1124           const float (*v0)[4],
1125           const float (*v1)[4])
1126{
1127   int x0 = (int) v0[0][0];
1128   int x1 = (int) v1[0][0];
1129   int y0 = (int) v0[0][1];
1130   int y1 = (int) v1[0][1];
1131   int dx = x1 - x0;
1132   int dy = y1 - y0;
1133   int xstep, ystep;
1134
1135#if DEBUG_VERTS
1136   debug_printf("Setup line:\n");
1137   print_vertex(setup, v0);
1138   print_vertex(setup, v1);
1139#endif
1140
1141   if (setup->llvmpipe->no_rast)
1142      return;
1143
1144   if (dx == 0 && dy == 0)
1145      return;
1146
1147   if (!setup_line_coefficients(setup, v0, v1))
1148      return;
1149
1150   assert(v0[0][0] < 1.0e9);
1151   assert(v0[0][1] < 1.0e9);
1152   assert(v1[0][0] < 1.0e9);
1153   assert(v1[0][1] < 1.0e9);
1154
1155   if (dx < 0) {
1156      dx = -dx;   /* make positive */
1157      xstep = -1;
1158   }
1159   else {
1160      xstep = 1;
1161   }
1162
1163   if (dy < 0) {
1164      dy = -dy;   /* make positive */
1165      ystep = -1;
1166   }
1167   else {
1168      ystep = 1;
1169   }
1170
1171   assert(dx >= 0);
1172   assert(dy >= 0);
1173   assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES);
1174
1175   setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1;
1176   setup->quad[0].inout.mask = 0x0;
1177
1178   /* XXX temporary: set coverage to 1.0 so the line appears
1179    * if AA mode happens to be enabled.
1180    */
1181   setup->quad[0].input.coverage[0] =
1182   setup->quad[0].input.coverage[1] =
1183   setup->quad[0].input.coverage[2] =
1184   setup->quad[0].input.coverage[3] = 1.0;
1185
1186   if (dx > dy) {
1187      /*** X-major line ***/
1188      int i;
1189      const int errorInc = dy + dy;
1190      int error = errorInc - dx;
1191      const int errorDec = error - dx;
1192
1193      for (i = 0; i < dx; i++) {
1194         plot(setup, x0, y0);
1195
1196         x0 += xstep;
1197         if (error < 0) {
1198            error += errorInc;
1199         }
1200         else {
1201            error += errorDec;
1202            y0 += ystep;
1203         }
1204      }
1205   }
1206   else {
1207      /*** Y-major line ***/
1208      int i;
1209      const int errorInc = dx + dx;
1210      int error = errorInc - dy;
1211      const int errorDec = error - dy;
1212
1213      for (i = 0; i < dy; i++) {
1214         plot(setup, x0, y0);
1215
1216         y0 += ystep;
1217         if (error < 0) {
1218            error += errorInc;
1219         }
1220         else {
1221            error += errorDec;
1222            x0 += xstep;
1223         }
1224      }
1225   }
1226
1227   /* draw final quad */
1228   if (setup->quad[0].inout.mask) {
1229      clip_emit_quad( setup, &setup->quad[0] );
1230   }
1231}
1232
1233
1234static void
1235point_persp_coeff(struct setup_context *setup,
1236                  const float (*vert)[4],
1237                  unsigned attrib,
1238                  uint vertSlot)
1239{
1240   unsigned i;
1241   for(i = 0; i < NUM_CHANNELS; ++i) {
1242      setup->coef.dadx[1 + attrib][i] = 0.0F;
1243      setup->coef.dady[1 + attrib][i] = 0.0F;
1244      setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3];
1245   }
1246}
1247
1248
1249/**
1250 * Do setup for point rasterization, then render the point.
1251 * Round or square points...
1252 * XXX could optimize a lot for 1-pixel points.
1253 */
1254void
1255llvmpipe_setup_point( struct setup_context *setup,
1256             const float (*v0)[4] )
1257{
1258   struct llvmpipe_context *llvmpipe = setup->llvmpipe;
1259   const struct lp_fragment_shader *lpfs = llvmpipe->fs;
1260   const int sizeAttr = setup->llvmpipe->psize_slot;
1261   const float size
1262      = sizeAttr > 0 ? v0[sizeAttr][0]
1263      : setup->llvmpipe->rasterizer->point_size;
1264   const float halfSize = 0.5F * size;
1265   const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth;
1266   const float x = v0[0][0];  /* Note: data[0] is always position */
1267   const float y = v0[0][1];
1268   const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe);
1269   uint fragSlot;
1270
1271#if DEBUG_VERTS
1272   debug_printf("Setup point:\n");
1273   print_vertex(setup, v0);
1274#endif
1275
1276   if (llvmpipe->no_rast)
1277      return;
1278
1279   assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS);
1280
1281   /* For points, all interpolants are constant-valued.
1282    * However, for point sprites, we'll need to setup texcoords appropriately.
1283    * XXX: which coefficients are the texcoords???
1284    * We may do point sprites as textured quads...
1285    *
1286    * KW: We don't know which coefficients are texcoords - ultimately
1287    * the choice of what interpolation mode to use for each attribute
1288    * should be determined by the fragment program, using
1289    * per-attribute declaration statements that include interpolation
1290    * mode as a parameter.  So either the fragment program will have
1291    * to be adjusted for pointsprite vs normal point behaviour, or
1292    * otherwise a special interpolation mode will have to be defined
1293    * which matches the required behaviour for point sprites.  But -
1294    * the latter is not a feature of normal hardware, and as such
1295    * probably should be ruled out on that basis.
1296    */
1297   setup->vprovoke = v0;
1298
1299   /* setup Z, W */
1300   const_pos_coeff(setup, 0, 2);
1301   const_pos_coeff(setup, 0, 3);
1302
1303   for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
1304      const uint vertSlot = vinfo->attrib[fragSlot].src_index;
1305
1306      switch (vinfo->attrib[fragSlot].interp_mode) {
1307      case INTERP_CONSTANT:
1308         /* fall-through */
1309      case INTERP_LINEAR:
1310         const_coeff(setup, fragSlot, vertSlot);
1311         break;
1312      case INTERP_PERSPECTIVE:
1313         point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot);
1314         break;
1315      case INTERP_POS:
1316         setup_fragcoord_coeff(setup, fragSlot);
1317         break;
1318      default:
1319         assert(0);
1320      }
1321
1322      if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
1323         setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing;
1324         setup->coef.dadx[1 + fragSlot][0] = 0.0;
1325         setup->coef.dady[1 + fragSlot][0] = 0.0;
1326      }
1327   }
1328
1329
1330   if (halfSize <= 0.5 && !round) {
1331      /* special case for 1-pixel points */
1332      const int ix = ((int) x) & 1;
1333      const int iy = ((int) y) & 1;
1334      setup->quad[0].input.x0 = (int) x - ix;
1335      setup->quad[0].input.y0 = (int) y - iy;
1336      setup->quad[0].inout.mask = (1 << ix) << (2 * iy);
1337      clip_emit_quad( setup, &setup->quad[0] );
1338   }
1339   else {
1340      if (round) {
1341         /* rounded points */
1342         const int ixmin = block((int) (x - halfSize));
1343         const int ixmax = block((int) (x + halfSize));
1344         const int iymin = block((int) (y - halfSize));
1345         const int iymax = block((int) (y + halfSize));
1346         const float rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
1347         const float rmax = halfSize + 0.7071F;
1348         const float rmin2 = MAX2(0.0F, rmin * rmin);
1349         const float rmax2 = rmax * rmax;
1350         const float cscale = 1.0F / (rmax2 - rmin2);
1351         int ix, iy;
1352
1353         for (iy = iymin; iy <= iymax; iy += 2) {
1354            for (ix = ixmin; ix <= ixmax; ix += 2) {
1355               float dx, dy, dist2, cover;
1356
1357               setup->quad[0].inout.mask = 0x0;
1358
1359               dx = (ix + 0.5f) - x;
1360               dy = (iy + 0.5f) - y;
1361               dist2 = dx * dx + dy * dy;
1362               if (dist2 <= rmax2) {
1363                  cover = 1.0F - (dist2 - rmin2) * cscale;
1364                  setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f);
1365                  setup->quad[0].inout.mask |= MASK_TOP_LEFT;
1366               }
1367
1368               dx = (ix + 1.5f) - x;
1369               dy = (iy + 0.5f) - y;
1370               dist2 = dx * dx + dy * dy;
1371               if (dist2 <= rmax2) {
1372                  cover = 1.0F - (dist2 - rmin2) * cscale;
1373                  setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f);
1374                  setup->quad[0].inout.mask |= MASK_TOP_RIGHT;
1375               }
1376
1377               dx = (ix + 0.5f) - x;
1378               dy = (iy + 1.5f) - y;
1379               dist2 = dx * dx + dy * dy;
1380               if (dist2 <= rmax2) {
1381                  cover = 1.0F - (dist2 - rmin2) * cscale;
1382                  setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f);
1383                  setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT;
1384               }
1385
1386               dx = (ix + 1.5f) - x;
1387               dy = (iy + 1.5f) - y;
1388               dist2 = dx * dx + dy * dy;
1389               if (dist2 <= rmax2) {
1390                  cover = 1.0F - (dist2 - rmin2) * cscale;
1391                  setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f);
1392                  setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT;
1393               }
1394
1395               if (setup->quad[0].inout.mask) {
1396                  setup->quad[0].input.x0 = ix;
1397                  setup->quad[0].input.y0 = iy;
1398                  clip_emit_quad( setup, &setup->quad[0] );
1399               }
1400            }
1401         }
1402      }
1403      else {
1404         /* square points */
1405         const int xmin = (int) (x + 0.75 - halfSize);
1406         const int ymin = (int) (y + 0.25 - halfSize);
1407         const int xmax = xmin + (int) size;
1408         const int ymax = ymin + (int) size;
1409         /* XXX could apply scissor to xmin,ymin,xmax,ymax now */
1410         const int ixmin = block(xmin);
1411         const int ixmax = block(xmax - 1);
1412         const int iymin = block(ymin);
1413         const int iymax = block(ymax - 1);
1414         int ix, iy;
1415
1416         /*
1417         debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
1418         */
1419         for (iy = iymin; iy <= iymax; iy += 2) {
1420            uint rowMask = 0xf;
1421            if (iy < ymin) {
1422               /* above the top edge */
1423               rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
1424            }
1425            if (iy + 1 >= ymax) {
1426               /* below the bottom edge */
1427               rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
1428            }
1429
1430            for (ix = ixmin; ix <= ixmax; ix += 2) {
1431               uint mask = rowMask;
1432
1433               if (ix < xmin) {
1434                  /* fragment is past left edge of point, turn off left bits */
1435                  mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
1436               }
1437               if (ix + 1 >= xmax) {
1438                  /* past the right edge */
1439                  mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
1440               }
1441
1442               setup->quad[0].inout.mask = mask;
1443               setup->quad[0].input.x0 = ix;
1444               setup->quad[0].input.y0 = iy;
1445               clip_emit_quad( setup, &setup->quad[0] );
1446            }
1447         }
1448      }
1449   }
1450}
1451
1452void llvmpipe_setup_prepare( struct setup_context *setup )
1453{
1454   struct llvmpipe_context *lp = setup->llvmpipe;
1455
1456   if (lp->dirty) {
1457      llvmpipe_update_derived(lp);
1458   }
1459
1460   if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
1461       lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL &&
1462       lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) {
1463      /* we'll do culling */
1464      setup->winding = lp->rasterizer->cull_mode;
1465   }
1466   else {
1467      /* 'draw' will do culling */
1468      setup->winding = PIPE_WINDING_NONE;
1469   }
1470}
1471
1472
1473
1474void llvmpipe_setup_destroy_context( struct setup_context *setup )
1475{
1476   align_free( setup );
1477}
1478
1479
1480/**
1481 * Create a new primitive setup/render stage.
1482 */
1483struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe )
1484{
1485   struct setup_context *setup;
1486   unsigned i;
1487
1488   setup = align_malloc(sizeof(struct setup_context), 16);
1489   if (!setup)
1490      return NULL;
1491
1492   memset(setup, 0, sizeof *setup);
1493   setup->llvmpipe = llvmpipe;
1494
1495   for (i = 0; i < MAX_QUADS; i++) {
1496      setup->quad[i].coef = &setup->coef;
1497   }
1498
1499   setup->span.left[0] = 1000000;     /* greater than right[0] */
1500   setup->span.left[1] = 1000000;     /* greater than right[1] */
1501
1502   return setup;
1503}
1504
1505