1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * Binning code for triangles
30 */
31
32#include "util/u_math.h"
33#include "util/u_memory.h"
34#include "util/u_rect.h"
35#include "util/u_sse.h"
36#include "lp_perf.h"
37#include "lp_setup_context.h"
38#include "lp_rast.h"
39#include "lp_state_fs.h"
40#include "lp_state_setup.h"
41
42#define NUM_CHANNELS 4
43
44#if defined(PIPE_ARCH_SSE)
45#include <emmintrin.h>
46#endif
47
48static INLINE int
49subpixel_snap(float a)
50{
51   return util_iround(FIXED_ONE * a);
52}
53
54static INLINE float
55fixed_to_float(int a)
56{
57   return a * (1.0 / FIXED_ONE);
58}
59
60
61/* Position and area in fixed point coordinates */
62struct fixed_position {
63   int x[4];
64   int y[4];
65   int area;
66   int dx01;
67   int dy01;
68   int dx20;
69   int dy20;
70};
71
72
73/**
74 * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
75 * immediately after it.
76 * The memory is allocated from the per-scene pool, not per-tile.
77 * \param tri_size  returns number of bytes allocated
78 * \param num_inputs  number of fragment shader inputs
79 * \return pointer to triangle space
80 */
81struct lp_rast_triangle *
82lp_setup_alloc_triangle(struct lp_scene *scene,
83                        unsigned nr_inputs,
84                        unsigned nr_planes,
85                        unsigned *tri_size)
86{
87   unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
88   unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
89   struct lp_rast_triangle *tri;
90
91   *tri_size = (sizeof(struct lp_rast_triangle) +
92                3 * input_array_sz +
93                plane_sz);
94
95   tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
96   if (tri == NULL)
97      return NULL;
98
99   tri->inputs.stride = input_array_sz;
100
101   {
102      char *a = (char *)tri;
103      char *b = (char *)&GET_PLANES(tri)[nr_planes];
104      assert(b - a == *tri_size);
105   }
106
107   return tri;
108}
109
110void
111lp_setup_print_vertex(struct lp_setup_context *setup,
112                      const char *name,
113                      const float (*v)[4])
114{
115   const struct lp_setup_variant_key *key = &setup->setup.variant->key;
116   int i, j;
117
118   debug_printf("   wpos (%s[0]) xyzw %f %f %f %f\n",
119                name,
120                v[0][0], v[0][1], v[0][2], v[0][3]);
121
122   for (i = 0; i < key->num_inputs; i++) {
123      const float *in = v[key->inputs[i].src_index];
124
125      debug_printf("  in[%d] (%s[%d]) %s%s%s%s ",
126                   i,
127                   name, key->inputs[i].src_index,
128                   (key->inputs[i].usage_mask & 0x1) ? "x" : " ",
129                   (key->inputs[i].usage_mask & 0x2) ? "y" : " ",
130                   (key->inputs[i].usage_mask & 0x4) ? "z" : " ",
131                   (key->inputs[i].usage_mask & 0x8) ? "w" : " ");
132
133      for (j = 0; j < 4; j++)
134         if (key->inputs[i].usage_mask & (1<<j))
135            debug_printf("%.5f ", in[j]);
136
137      debug_printf("\n");
138   }
139}
140
141
142/**
143 * Print triangle vertex attribs (for debug).
144 */
145void
146lp_setup_print_triangle(struct lp_setup_context *setup,
147                        const float (*v0)[4],
148                        const float (*v1)[4],
149                        const float (*v2)[4])
150{
151   debug_printf("triangle\n");
152
153   {
154      const float ex = v0[0][0] - v2[0][0];
155      const float ey = v0[0][1] - v2[0][1];
156      const float fx = v1[0][0] - v2[0][0];
157      const float fy = v1[0][1] - v2[0][1];
158
159      /* det = cross(e,f).z */
160      const float det = ex * fy - ey * fx;
161      if (det < 0.0f)
162         debug_printf("   - ccw\n");
163      else if (det > 0.0f)
164         debug_printf("   - cw\n");
165      else
166         debug_printf("   - zero area\n");
167   }
168
169   lp_setup_print_vertex(setup, "v0", v0);
170   lp_setup_print_vertex(setup, "v1", v1);
171   lp_setup_print_vertex(setup, "v2", v2);
172}
173
174
175#define MAX_PLANES 8
176static unsigned
177lp_rast_tri_tab[MAX_PLANES+1] = {
178   0,               /* should be impossible */
179   LP_RAST_OP_TRIANGLE_1,
180   LP_RAST_OP_TRIANGLE_2,
181   LP_RAST_OP_TRIANGLE_3,
182   LP_RAST_OP_TRIANGLE_4,
183   LP_RAST_OP_TRIANGLE_5,
184   LP_RAST_OP_TRIANGLE_6,
185   LP_RAST_OP_TRIANGLE_7,
186   LP_RAST_OP_TRIANGLE_8
187};
188
189
190
191/**
192 * The primitive covers the whole tile- shade whole tile.
193 *
194 * \param tx, ty  the tile position in tiles, not pixels
195 */
196static boolean
197lp_setup_whole_tile(struct lp_setup_context *setup,
198                    const struct lp_rast_shader_inputs *inputs,
199                    int tx, int ty)
200{
201   struct lp_scene *scene = setup->scene;
202
203   LP_COUNT(nr_fully_covered_64);
204
205   /* if variant is opaque and scissor doesn't effect the tile */
206   if (inputs->opaque) {
207      if (!scene->fb.zsbuf) {
208         /*
209          * All previous rendering will be overwritten so reset the bin.
210          */
211         lp_scene_bin_reset( scene, tx, ty );
212      }
213
214      LP_COUNT(nr_shade_opaque_64);
215      return lp_scene_bin_cmd_with_state( scene, tx, ty,
216                                          setup->fs.stored,
217                                          LP_RAST_OP_SHADE_TILE_OPAQUE,
218                                          lp_rast_arg_inputs(inputs) );
219   } else {
220      LP_COUNT(nr_shade_64);
221      return lp_scene_bin_cmd_with_state( scene, tx, ty,
222                                          setup->fs.stored,
223                                          LP_RAST_OP_SHADE_TILE,
224                                          lp_rast_arg_inputs(inputs) );
225   }
226}
227
228
229/**
230 * Do basic setup for triangle rasterization and determine which
231 * framebuffer tiles are touched.  Put the triangle in the scene's
232 * bins for the tiles which we overlap.
233 */
234static boolean
235do_triangle_ccw(struct lp_setup_context *setup,
236                struct fixed_position* position,
237                const float (*v0)[4],
238                const float (*v1)[4],
239                const float (*v2)[4],
240                boolean frontfacing )
241{
242   struct lp_scene *scene = setup->scene;
243   const struct lp_setup_variant_key *key = &setup->setup.variant->key;
244   struct lp_rast_triangle *tri;
245   struct lp_rast_plane *plane;
246   struct u_rect bbox;
247   unsigned tri_bytes;
248   int nr_planes = 3;
249
250   /* Area should always be positive here */
251   assert(position->area > 0);
252
253   if (0)
254      lp_setup_print_triangle(setup, v0, v1, v2);
255
256   if (setup->scissor_test) {
257      nr_planes = 7;
258   }
259   else {
260      nr_planes = 3;
261   }
262
263   /* Bounding rectangle (in pixels) */
264   {
265      /* Yes this is necessary to accurately calculate bounding boxes
266       * with the two fill-conventions we support.  GL (normally) ends
267       * up needing a bottom-left fill convention, which requires
268       * slightly different rounding.
269       */
270      int adj = (setup->pixel_offset != 0) ? 1 : 0;
271
272      /* Inclusive x0, exclusive x1 */
273      bbox.x0 =  MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER;
274      bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER;
275
276      /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */
277      bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER;
278      bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER;
279   }
280
281   if (bbox.x1 < bbox.x0 ||
282       bbox.y1 < bbox.y0) {
283      if (0) debug_printf("empty bounding box\n");
284      LP_COUNT(nr_culled_tris);
285      return TRUE;
286   }
287
288   if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
289      if (0) debug_printf("offscreen\n");
290      LP_COUNT(nr_culled_tris);
291      return TRUE;
292   }
293
294   /* Can safely discard negative regions, but need to keep hold of
295    * information about when the triangle extends past screen
296    * boundaries.  See trimmed_box in lp_setup_bin_triangle().
297    */
298   bbox.x0 = MAX2(bbox.x0, 0);
299   bbox.y0 = MAX2(bbox.y0, 0);
300
301   tri = lp_setup_alloc_triangle(scene,
302                                 key->num_inputs,
303                                 nr_planes,
304                                 &tri_bytes);
305   if (!tri)
306      return FALSE;
307
308#if 0
309   tri->v[0][0] = v0[0][0];
310   tri->v[1][0] = v1[0][0];
311   tri->v[2][0] = v2[0][0];
312   tri->v[0][1] = v0[0][1];
313   tri->v[1][1] = v1[0][1];
314   tri->v[2][1] = v2[0][1];
315#endif
316
317   LP_COUNT(nr_tris);
318
319   /* Setup parameter interpolants:
320    */
321   setup->setup.variant->jit_function( v0,
322				       v1,
323				       v2,
324				       frontfacing,
325				       GET_A0(&tri->inputs),
326				       GET_DADX(&tri->inputs),
327				       GET_DADY(&tri->inputs) );
328
329   tri->inputs.frontfacing = frontfacing;
330   tri->inputs.disable = FALSE;
331   tri->inputs.opaque = setup->fs.current.variant->opaque;
332
333   if (0)
334      lp_dump_setup_coef(&setup->setup.variant->key,
335			 (const float (*)[4])GET_A0(&tri->inputs),
336			 (const float (*)[4])GET_DADX(&tri->inputs),
337			 (const float (*)[4])GET_DADY(&tri->inputs));
338
339   plane = GET_PLANES(tri);
340
341#if defined(PIPE_ARCH_SSE)
342   {
343      __m128i vertx, verty;
344      __m128i shufx, shufy;
345      __m128i dcdx, dcdy, c;
346      __m128i unused;
347      __m128i dcdx_neg_mask;
348      __m128i dcdy_neg_mask;
349      __m128i dcdx_zero_mask;
350      __m128i top_left_flag;
351      __m128i c_inc_mask, c_inc;
352      __m128i eo, p0, p1, p2;
353      __m128i zero = _mm_setzero_si128();
354
355      vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
356      verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
357
358      shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
359      shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
360
361      dcdx = _mm_sub_epi32(verty, shufy);
362      dcdy = _mm_sub_epi32(vertx, shufx);
363
364      dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
365      dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
366      dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
367
368      top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0);
369
370      c_inc_mask = _mm_or_si128(dcdx_neg_mask,
371                                _mm_and_si128(dcdx_zero_mask,
372                                              _mm_xor_si128(dcdy_neg_mask,
373                                                            top_left_flag)));
374
375      c_inc = _mm_srli_epi32(c_inc_mask, 31);
376
377      c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
378                        mm_mullo_epi32(dcdy, verty));
379
380      c = _mm_add_epi32(c, c_inc);
381
382      /* Scale up to match c:
383       */
384      dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
385      dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
386
387      /* Calculate trivial reject values:
388       */
389      eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
390                         _mm_and_si128(dcdx_neg_mask, dcdx));
391
392      /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
393
394      /* Pointless transpose which gets undone immediately in
395       * rasterization:
396       */
397      transpose4_epi32(&c, &dcdx, &dcdy, &eo,
398                       &p0, &p1, &p2, &unused);
399
400      _mm_store_si128((__m128i *)&plane[0], p0);
401      _mm_store_si128((__m128i *)&plane[1], p1);
402      _mm_store_si128((__m128i *)&plane[2], p2);
403   }
404#else
405   {
406      int i;
407      plane[0].dcdy = position->dx01;
408      plane[1].dcdy = position->x[1] - position->x[2];
409      plane[2].dcdy = position->dx20;
410      plane[0].dcdx = position->dy01;
411      plane[1].dcdx = position->y[1] - position->y[2];
412      plane[2].dcdx = position->dy20;
413
414      for (i = 0; i < 3; i++) {
415         /* half-edge constants, will be interated over the whole render
416          * target.
417          */
418         plane[i].c = plane[i].dcdx * position->x[i] - plane[i].dcdy * position->y[i];
419
420         /* correct for top-left vs. bottom-left fill convention.
421          *
422          * note that we're overloading gl_rasterization_rules to mean
423          * both (0.5,0.5) pixel centers *and* bottom-left filling
424          * convention.
425          *
426          * GL actually has a top-left filling convention, but GL's
427          * notion of "top" differs from gallium's...
428          *
429          * Also, sometimes (in FBO cases) GL will render upside down
430          * to its usual method, in which case it will probably want
431          * to use the opposite, top-left convention.
432          */
433         if (plane[i].dcdx < 0) {
434            /* both fill conventions want this - adjust for left edges */
435            plane[i].c++;
436         }
437         else if (plane[i].dcdx == 0) {
438            if (setup->pixel_offset == 0) {
439               /* correct for top-left fill convention:
440                */
441               if (plane[i].dcdy > 0) plane[i].c++;
442            }
443            else {
444               /* correct for bottom-left fill convention:
445                */
446               if (plane[i].dcdy < 0) plane[i].c++;
447            }
448         }
449
450         plane[i].dcdx *= FIXED_ONE;
451         plane[i].dcdy *= FIXED_ONE;
452
453         /* find trivial reject offsets for each edge for a single-pixel
454          * sized block.  These will be scaled up at each recursive level to
455          * match the active blocksize.  Scaling in this way works best if
456          * the blocks are square.
457          */
458         plane[i].eo = 0;
459         if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
460         if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
461      }
462   }
463#endif
464
465   if (0) {
466      debug_printf("p0: %08x/%08x/%08x/%08x\n",
467                   plane[0].c,
468                   plane[0].dcdx,
469                   plane[0].dcdy,
470                   plane[0].eo);
471
472      debug_printf("p1: %08x/%08x/%08x/%08x\n",
473                   plane[1].c,
474                   plane[1].dcdx,
475                   plane[1].dcdy,
476                   plane[1].eo);
477
478      debug_printf("p0: %08x/%08x/%08x/%08x\n",
479                   plane[2].c,
480                   plane[2].dcdx,
481                   plane[2].dcdy,
482                   plane[2].eo);
483   }
484
485
486   /*
487    * When rasterizing scissored tris, use the intersection of the
488    * triangle bounding box and the scissor rect to generate the
489    * scissor planes.
490    *
491    * This permits us to cut off the triangle "tails" that are present
492    * in the intermediate recursive levels caused when two of the
493    * triangles edges don't diverge quickly enough to trivially reject
494    * exterior blocks from the triangle.
495    *
496    * It's not really clear if it's worth worrying about these tails,
497    * but since we generate the planes for each scissored tri, it's
498    * free to trim them in this case.
499    *
500    * Note that otherwise, the scissor planes only vary in 'C' value,
501    * and even then only on state-changes.  Could alternatively store
502    * these planes elsewhere.
503    */
504   if (nr_planes == 7) {
505      const struct u_rect *scissor = &setup->scissor;
506
507      plane[3].dcdx = -1;
508      plane[3].dcdy = 0;
509      plane[3].c = 1-scissor->x0;
510      plane[3].eo = 1;
511
512      plane[4].dcdx = 1;
513      plane[4].dcdy = 0;
514      plane[4].c = scissor->x1+1;
515      plane[4].eo = 0;
516
517      plane[5].dcdx = 0;
518      plane[5].dcdy = 1;
519      plane[5].c = 1-scissor->y0;
520      plane[5].eo = 1;
521
522      plane[6].dcdx = 0;
523      plane[6].dcdy = -1;
524      plane[6].c = scissor->y1+1;
525      plane[6].eo = 0;
526   }
527
528   return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
529}
530
531/*
532 * Round to nearest less or equal power of two of the input.
533 *
534 * Undefined if no bit set exists, so code should check against 0 first.
535 */
536static INLINE uint32_t
537floor_pot(uint32_t n)
538{
539#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
540   if (n == 0)
541      return 0;
542
543   __asm__("bsr %1,%0"
544          : "=r" (n)
545          : "rm" (n));
546   return 1 << n;
547#else
548   n |= (n >>  1);
549   n |= (n >>  2);
550   n |= (n >>  4);
551   n |= (n >>  8);
552   n |= (n >> 16);
553   return n - (n >> 1);
554#endif
555}
556
557
558boolean
559lp_setup_bin_triangle( struct lp_setup_context *setup,
560                       struct lp_rast_triangle *tri,
561                       const struct u_rect *bbox,
562                       int nr_planes )
563{
564   struct lp_scene *scene = setup->scene;
565   struct u_rect trimmed_box = *bbox;
566   int i;
567
568   /* What is the largest power-of-two boundary this triangle crosses:
569    */
570   int dx = floor_pot((bbox->x0 ^ bbox->x1) |
571		      (bbox->y0 ^ bbox->y1));
572
573   /* The largest dimension of the rasterized area of the triangle
574    * (aligned to a 4x4 grid), rounded down to the nearest power of two:
575    */
576   int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) |
577		      (bbox->y1 - (bbox->y0 & ~3)));
578
579   /* Now apply scissor, etc to the bounding box.  Could do this
580    * earlier, but it confuses the logic for tri-16 and would force
581    * the rasterizer to also respect scissor, etc, just for the rare
582    * cases where a small triangle extends beyond the scissor.
583    */
584   u_rect_find_intersection(&setup->draw_region, &trimmed_box);
585
586   /* Determine which tile(s) intersect the triangle's bounding box
587    */
588   if (dx < TILE_SIZE)
589   {
590      int ix0 = bbox->x0 / TILE_SIZE;
591      int iy0 = bbox->y0 / TILE_SIZE;
592      unsigned px = bbox->x0 & 63 & ~3;
593      unsigned py = bbox->y0 & 63 & ~3;
594
595      assert(iy0 == bbox->y1 / TILE_SIZE &&
596	     ix0 == bbox->x1 / TILE_SIZE);
597
598      if (nr_planes == 3) {
599         if (sz < 4)
600         {
601            /* Triangle is contained in a single 4x4 stamp:
602             */
603            assert(px + 4 <= TILE_SIZE);
604            assert(py + 4 <= TILE_SIZE);
605            return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
606                                                setup->fs.stored,
607                                                LP_RAST_OP_TRIANGLE_3_4,
608                                                lp_rast_arg_triangle_contained(tri, px, py) );
609         }
610
611         if (sz < 16)
612         {
613            /* Triangle is contained in a single 16x16 block:
614             */
615
616            /*
617             * The 16x16 block is only 4x4 aligned, and can exceed the tile
618             * dimensions if the triangle is 16 pixels in one dimension but 4
619             * in the other. So budge the 16x16 back inside the tile.
620             */
621            px = MIN2(px, TILE_SIZE - 16);
622            py = MIN2(py, TILE_SIZE - 16);
623
624            assert(px + 16 <= TILE_SIZE);
625            assert(py + 16 <= TILE_SIZE);
626
627            return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
628                                                setup->fs.stored,
629                                                LP_RAST_OP_TRIANGLE_3_16,
630                                                lp_rast_arg_triangle_contained(tri, px, py) );
631         }
632      }
633      else if (nr_planes == 4 && sz < 16)
634      {
635         px = MIN2(px, TILE_SIZE - 16);
636         py = MIN2(py, TILE_SIZE - 16);
637
638         assert(px + 16 <= TILE_SIZE);
639         assert(py + 16 <= TILE_SIZE);
640
641         return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
642                                            setup->fs.stored,
643                                            LP_RAST_OP_TRIANGLE_4_16,
644                                            lp_rast_arg_triangle_contained(tri, px, py));
645      }
646
647
648      /* Triangle is contained in a single tile:
649       */
650      return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored,
651                                          lp_rast_tri_tab[nr_planes],
652                                          lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
653   }
654   else
655   {
656      struct lp_rast_plane *plane = GET_PLANES(tri);
657      int c[MAX_PLANES];
658      int ei[MAX_PLANES];
659
660      int eo[MAX_PLANES];
661      int xstep[MAX_PLANES];
662      int ystep[MAX_PLANES];
663      int x, y;
664
665      int ix0 = trimmed_box.x0 / TILE_SIZE;
666      int iy0 = trimmed_box.y0 / TILE_SIZE;
667      int ix1 = trimmed_box.x1 / TILE_SIZE;
668      int iy1 = trimmed_box.y1 / TILE_SIZE;
669
670      for (i = 0; i < nr_planes; i++) {
671         c[i] = (plane[i].c +
672                 plane[i].dcdy * iy0 * TILE_SIZE -
673                 plane[i].dcdx * ix0 * TILE_SIZE);
674
675         ei[i] = (plane[i].dcdy -
676                  plane[i].dcdx -
677                  plane[i].eo) << TILE_ORDER;
678
679         eo[i] = plane[i].eo << TILE_ORDER;
680         xstep[i] = -(plane[i].dcdx << TILE_ORDER);
681         ystep[i] = plane[i].dcdy << TILE_ORDER;
682      }
683
684
685
686      /* Test tile-sized blocks against the triangle.
687       * Discard blocks fully outside the tri.  If the block is fully
688       * contained inside the tri, bin an lp_rast_shade_tile command.
689       * Else, bin a lp_rast_triangle command.
690       */
691      for (y = iy0; y <= iy1; y++)
692      {
693	 boolean in = FALSE;  /* are we inside the triangle? */
694	 int cx[MAX_PLANES];
695
696         for (i = 0; i < nr_planes; i++)
697            cx[i] = c[i];
698
699	 for (x = ix0; x <= ix1; x++)
700	 {
701            int out = 0;
702            int partial = 0;
703
704            for (i = 0; i < nr_planes; i++) {
705               int planeout = cx[i] + eo[i];
706               int planepartial = cx[i] + ei[i] - 1;
707               out |= (planeout >> 31);
708               partial |= (planepartial >> 31) & (1<<i);
709            }
710
711            if (out) {
712               /* do nothing */
713               if (in)
714                  break;  /* exiting triangle, all done with this row */
715               LP_COUNT(nr_empty_64);
716            }
717            else if (partial) {
718               /* Not trivially accepted by at least one plane -
719                * rasterize/shade partial tile
720                */
721               int count = util_bitcount(partial);
722               in = TRUE;
723
724               if (!lp_scene_bin_cmd_with_state( scene, x, y,
725                                                 setup->fs.stored,
726                                                 lp_rast_tri_tab[count],
727                                                 lp_rast_arg_triangle(tri, partial) ))
728                  goto fail;
729
730               LP_COUNT(nr_partially_covered_64);
731            }
732            else {
733               /* triangle covers the whole tile- shade whole tile */
734               LP_COUNT(nr_fully_covered_64);
735               in = TRUE;
736               if (!lp_setup_whole_tile(setup, &tri->inputs, x, y))
737                  goto fail;
738            }
739
740	    /* Iterate cx values across the region:
741	     */
742            for (i = 0; i < nr_planes; i++)
743               cx[i] += xstep[i];
744	 }
745
746	 /* Iterate c values down the region:
747	  */
748         for (i = 0; i < nr_planes; i++)
749            c[i] += ystep[i];
750      }
751   }
752
753   return TRUE;
754
755fail:
756   /* Need to disable any partially binned triangle.  This is easier
757    * than trying to locate all the triangle, shade-tile, etc,
758    * commands which may have been binned.
759    */
760   tri->inputs.disable = TRUE;
761   return FALSE;
762}
763
764
765/**
766 * Try to draw the triangle, restart the scene on failure.
767 */
768static void retry_triangle_ccw( struct lp_setup_context *setup,
769                                struct fixed_position* position,
770                                const float (*v0)[4],
771                                const float (*v1)[4],
772                                const float (*v2)[4],
773                                boolean front)
774{
775   if (!do_triangle_ccw( setup, position, v0, v1, v2, front ))
776   {
777      if (!lp_setup_flush_and_restart(setup))
778         return;
779
780      if (!do_triangle_ccw( setup, position, v0, v1, v2, front ))
781         return;
782   }
783}
784
785
786/**
787 * Calculate fixed position data for a triangle
788 */
789static INLINE void
790calc_fixed_position( struct lp_setup_context *setup,
791                     struct fixed_position* position,
792                     const float (*v0)[4],
793                     const float (*v1)[4],
794                     const float (*v2)[4])
795{
796   position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
797   position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
798   position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
799   position->x[3] = 0;
800
801   position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
802   position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
803   position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
804   position->y[3] = 0;
805
806   position->dx01 = position->x[0] - position->x[1];
807   position->dy01 = position->y[0] - position->y[1];
808
809   position->dx20 = position->x[2] - position->x[0];
810   position->dy20 = position->y[2] - position->y[0];
811
812   position->area = position->dx01 * position->dy20 - position->dx20 * position->dy01;
813}
814
815
816/**
817 * Rotate a triangle, flipping its clockwise direction,
818 * Swaps values for xy[0] and xy[1]
819 */
820static INLINE void
821rotate_fixed_position_01( struct fixed_position* position )
822{
823   int x, y;
824
825   x = position->x[1];
826   y = position->y[1];
827   position->x[1] = position->x[0];
828   position->y[1] = position->y[0];
829   position->x[0] = x;
830   position->y[0] = y;
831
832   position->dx01 = -position->dx01;
833   position->dy01 = -position->dy01;
834   position->dx20 = position->x[2] - position->x[0];
835   position->dy20 = position->y[2] - position->y[0];
836
837   position->area = -position->area;
838}
839
840
841/**
842 * Rotate a triangle, flipping its clockwise direction,
843 * Swaps values for xy[1] and xy[2]
844 */
845static INLINE void
846rotate_fixed_position_12( struct fixed_position* position )
847{
848   int x, y;
849
850   x = position->x[2];
851   y = position->y[2];
852   position->x[2] = position->x[1];
853   position->y[2] = position->y[1];
854   position->x[1] = x;
855   position->y[1] = y;
856
857   x = position->dx01;
858   y = position->dy01;
859   position->dx01 = -position->dx20;
860   position->dy01 = -position->dy20;
861   position->dx20 = -x;
862   position->dy20 = -y;
863
864   position->area = -position->area;
865}
866
867
868/**
869 * Draw triangle if it's CW, cull otherwise.
870 */
871static void triangle_cw( struct lp_setup_context *setup,
872			 const float (*v0)[4],
873			 const float (*v1)[4],
874			 const float (*v2)[4] )
875{
876   struct fixed_position position;
877   calc_fixed_position(setup, &position, v0, v1, v2);
878
879   if (position.area < 0) {
880      if (setup->flatshade_first) {
881         rotate_fixed_position_12(&position);
882         retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface);
883      } else {
884         rotate_fixed_position_01(&position);
885         retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface);
886      }
887   }
888}
889
890
891static void triangle_ccw( struct lp_setup_context *setup,
892                          const float (*v0)[4],
893                          const float (*v1)[4],
894                          const float (*v2)[4])
895{
896   struct fixed_position position;
897   calc_fixed_position(setup, &position, v0, v1, v2);
898
899   if (position.area > 0)
900      retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface);
901}
902
903/**
904 * Draw triangle whether it's CW or CCW.
905 */
906static void triangle_both( struct lp_setup_context *setup,
907			   const float (*v0)[4],
908			   const float (*v1)[4],
909			   const float (*v2)[4] )
910{
911   struct fixed_position position;
912   calc_fixed_position(setup, &position, v0, v1, v2);
913
914   if (0) {
915      assert(!util_is_inf_or_nan(v0[0][0]));
916      assert(!util_is_inf_or_nan(v0[0][1]));
917      assert(!util_is_inf_or_nan(v1[0][0]));
918      assert(!util_is_inf_or_nan(v1[0][1]));
919      assert(!util_is_inf_or_nan(v2[0][0]));
920      assert(!util_is_inf_or_nan(v2[0][1]));
921   }
922
923   if (position.area > 0)
924      retry_triangle_ccw( setup, &position, v0, v1, v2, setup->ccw_is_frontface );
925   else if (position.area < 0) {
926      if (setup->flatshade_first) {
927         rotate_fixed_position_12( &position );
928         retry_triangle_ccw( setup, &position, v0, v2, v1, !setup->ccw_is_frontface );
929      } else {
930         rotate_fixed_position_01( &position );
931         retry_triangle_ccw( setup, &position, v1, v0, v2, !setup->ccw_is_frontface );
932      }
933   }
934}
935
936
937static void triangle_nop( struct lp_setup_context *setup,
938			  const float (*v0)[4],
939			  const float (*v1)[4],
940			  const float (*v2)[4] )
941{
942}
943
944
945void
946lp_setup_choose_triangle( struct lp_setup_context *setup )
947{
948   switch (setup->cullmode) {
949   case PIPE_FACE_NONE:
950      setup->triangle = triangle_both;
951      break;
952   case PIPE_FACE_BACK:
953      setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
954      break;
955   case PIPE_FACE_FRONT:
956      setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
957      break;
958   default:
959      setup->triangle = triangle_nop;
960      break;
961   }
962}
963