lp_setup_line.c revision 5286dd701640976ffc328e8e85fb3830746851a1
1/**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/*
29 * Binning code for lines
30 */
31
32#include "util/u_math.h"
33#include "util/u_memory.h"
34#include "lp_perf.h"
35#include "lp_setup_context.h"
36#include "lp_rast.h"
37#include "lp_state_fs.h"
38
39#define NUM_CHANNELS 4
40
41
42static const int step_scissor_minx[16] = {
43   0, 1, 0, 1,
44   2, 3, 2, 3,
45   0, 1, 0, 1,
46   2, 3, 2, 3
47};
48
49static const int step_scissor_maxx[16] = {
50    0, -1,  0, -1,
51   -2, -3, -2, -3,
52    0, -1,  0, -1,
53   -2, -3, -2, -3
54};
55
56static const int step_scissor_miny[16] = {
57   0, 0, 1, 1,
58   0, 0, 1, 1,
59   2, 2, 3, 3,
60   2, 2, 3, 3
61};
62
63static const int step_scissor_maxy[16] = {
64    0,  0, -1, -1,
65    0,  0, -1, -1,
66   -2, -2, -3, -3,
67   -2, -2, -3, -3
68};
69
70
71
72/**
73 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
74 */
75static void constant_coef( struct lp_setup_context *setup,
76                           struct lp_rast_triangle *tri,
77                           unsigned slot,
78                           const float value,
79                           unsigned i )
80{
81   tri->inputs.a0[slot][i] = value;
82   tri->inputs.dadx[slot][i] = 0.0f;
83   tri->inputs.dady[slot][i] = 0.0f;
84}
85
86
87/**
88 * Compute a0, dadx and dady for a linearly interpolated coefficient,
89 * for a triangle.
90 */
91static void linear_coef( struct lp_setup_context *setup,
92                         struct lp_rast_triangle *tri,
93                         float oneoverarea,
94                         unsigned slot,
95                         const float (*v1)[4],
96                         const float (*v2)[4],
97                         unsigned vert_attr,
98                         unsigned i)
99{
100   float a1 = v1[vert_attr][i];
101   float a2 = v2[vert_attr][i];
102
103   float da21 = a1 - a2;
104   float dadx = da21 * tri->dx * oneoverarea;
105   float dady = da21 * tri->dy * oneoverarea;
106
107   tri->inputs.dadx[slot][i] = dadx;
108   tri->inputs.dady[slot][i] = dady;
109
110   tri->inputs.a0[slot][i] = (a1 -
111                              (dadx * (v1[0][0] - setup->pixel_offset) +
112                               dady * (v1[0][1] - setup->pixel_offset)));
113}
114
115
116/**
117 * Compute a0, dadx and dady for a perspective-corrected interpolant,
118 * for a triangle.
119 * We basically multiply the vertex value by 1/w before computing
120 * the plane coefficients (a0, dadx, dady).
121 * Later, when we compute the value at a particular fragment position we'll
122 * divide the interpolated value by the interpolated W at that fragment.
123 */
124static void perspective_coef( struct lp_setup_context *setup,
125                              struct lp_rast_triangle *tri,
126                              float oneoverarea,
127                              unsigned slot,
128                              const float (*v1)[4],
129                              const float (*v2)[4],
130                              unsigned vert_attr,
131                              unsigned i)
132{
133   /* premultiply by 1/w  (v[0][3] is always 1/w):
134    */
135   float a1 = v1[vert_attr][i] * v1[0][3];
136   float a2 = v2[vert_attr][i] * v2[0][3];
137
138   float da21 = a1 - a2;
139   float dadx = da21 * tri->dx * oneoverarea;
140   float dady = da21 * tri->dy * oneoverarea;
141
142   tri->inputs.dadx[slot][i] = dadx;
143   tri->inputs.dady[slot][i] = dady;
144
145   tri->inputs.a0[slot][i] = (a1 -
146                              (dadx * (v1[0][0] - setup->pixel_offset) +
147                               dady * (v1[0][1] - setup->pixel_offset)));
148}
149
150/**
151 * Compute the tri->coef[] array dadx, dady, a0 values.
152 */
153static void setup_line_coefficients( struct lp_setup_context *setup,
154                                     struct lp_rast_triangle *tri,
155                                     float oneoverarea,
156                                     const float (*v1)[4],
157                                     const float (*v2)[4])
158{
159   unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
160   unsigned slot;
161
162   /* setup interpolation for all the remaining attributes:
163    */
164   for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
165      unsigned vert_attr = setup->fs.input[slot].src_index;
166      unsigned usage_mask = setup->fs.input[slot].usage_mask;
167      unsigned i;
168
169      switch (setup->fs.input[slot].interp) {
170      case LP_INTERP_CONSTANT:
171         if (setup->flatshade_first) {
172            for (i = 0; i < NUM_CHANNELS; i++)
173               if (usage_mask & (1 << i))
174                  constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
175         }
176         else {
177            for (i = 0; i < NUM_CHANNELS; i++)
178               if (usage_mask & (1 << i))
179                  constant_coef(setup, tri, slot+1, v2[vert_attr][i], i);
180         }
181         break;
182
183      case LP_INTERP_LINEAR:
184         for (i = 0; i < NUM_CHANNELS; i++)
185            if (usage_mask & (1 << i))
186               linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i);
187         break;
188
189      case LP_INTERP_PERSPECTIVE:
190         for (i = 0; i < NUM_CHANNELS; i++)
191            if (usage_mask & (1 << i))
192               perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i);
193         fragcoord_usage_mask |= TGSI_WRITEMASK_W;
194         break;
195
196      case LP_INTERP_POSITION:
197         /*
198          * The generated pixel interpolators will pick up the coeffs from
199          * slot 0, so all need to ensure that the usage mask is covers all
200          * usages.
201          */
202         fragcoord_usage_mask |= usage_mask;
203         break;
204
205      default:
206         assert(0);
207      }
208   }
209
210   /* The internal position input is in slot zero:
211    */
212   lp_setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v2,
213                            fragcoord_usage_mask);
214}
215
216
217
218static INLINE int subpixel_snap( float a )
219{
220   return util_iround(FIXED_ONE * a);
221}
222
223
224/**
225 * Print line vertex attribs (for debug).
226 */
227static void
228print_line(struct lp_setup_context *setup,
229           const float (*v1)[4],
230           const float (*v2)[4])
231{
232   uint i;
233
234   debug_printf("llvmpipe line\n");
235   for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
236      debug_printf("  v1[%d]:  %f %f %f %f\n", i,
237                   v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
238   }
239   for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
240      debug_printf("  v2[%d]:  %f %f %f %f\n", i,
241                   v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
242   }
243}
244
245
246static void
247lp_setup_line( struct lp_setup_context *setup,
248               const float (*v1)[4],
249               const float (*v2)[4])
250{
251   struct lp_scene *scene = lp_setup_get_current_scene(setup);
252   struct lp_rast_triangle *line;
253   float oneoverarea;
254   float half_width = setup->line_width / 2;
255   int minx, maxx, miny, maxy;
256   int ix0, ix1, iy0, iy1;
257   unsigned tri_bytes;
258   int x[4];
259   int y[4];
260   int i;
261   int nr_planes = 4;
262   boolean opaque;
263
264   if (0)
265      print_line(setup, v1, v2);
266
267   if (setup->scissor_test) {
268      nr_planes = 8;
269   }
270   else {
271      nr_planes = 4;
272   }
273
274   line = lp_setup_alloc_triangle(scene,
275                                  setup->fs.nr_inputs,
276                                  nr_planes,
277                                  &tri_bytes);
278   if (!line)
279      return;
280
281#ifndef DEBUG
282   line->v[0][0] = v1[0][0];
283   line->v[1][0] = v2[0][0];
284   line->v[0][1] = v1[0][1];
285   line->v[1][1] = v2[0][1];
286#endif
287
288   /* pre-calculation(based on given vertices) to determine if line is
289    * more horizontal or more vertical
290    */
291   line->dx = v1[0][0] - v2[0][0];
292   line->dy = v1[0][1] - v2[0][1];
293
294   /* x-major line */
295   if (fabsf(line->dx) >= fabsf(line->dy)) {
296      if (line->dx < 0) {
297         /* if v2 is to the right of v1, swap pointers */
298         const float (*temp)[4] = v1;
299         v1 = v2;
300         v2 = temp;
301         line->dx = -line->dx;
302         line->dy = -line->dy;
303      }
304
305      /* x/y positions in fixed point */
306      x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
307      x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
308      x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
309      x[3] = subpixel_snap(v1[0][0] - setup->pixel_offset);
310
311      y[0] = subpixel_snap(v1[0][1] - half_width - setup->pixel_offset);
312      y[1] = subpixel_snap(v2[0][1] - half_width - setup->pixel_offset);
313      y[2] = subpixel_snap(v2[0][1] + half_width - setup->pixel_offset);
314      y[3] = subpixel_snap(v1[0][1] + half_width - setup->pixel_offset);
315   }
316   else{
317      /* y-major line */
318      if (line->dy > 0) {
319         /* if v2 is on top of v1, swap pointers */
320         const float (*temp)[4] = v1;
321         v1 = v2;
322         v2 = temp;
323         line->dx = -line->dx;
324         line->dy = -line->dy;
325      }
326
327      x[0] = subpixel_snap(v1[0][0] - half_width - setup->pixel_offset);
328      x[1] = subpixel_snap(v2[0][0] - half_width - setup->pixel_offset);
329      x[2] = subpixel_snap(v2[0][0] + half_width - setup->pixel_offset);
330      x[3] = subpixel_snap(v1[0][0] + half_width - setup->pixel_offset);
331
332      y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
333      y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
334      y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
335      y[3] = subpixel_snap(v1[0][1] - setup->pixel_offset);
336   }
337
338   /* calculate the deltas */
339   line->plane[0].dcdy = x[0] - x[1];
340   line->plane[1].dcdy = x[1] - x[2];
341   line->plane[2].dcdy = x[2] - x[3];
342   line->plane[3].dcdy = x[3] - x[0];
343
344   line->plane[0].dcdx = y[0] - y[1];
345   line->plane[1].dcdx = y[1] - y[2];
346   line->plane[2].dcdx = y[2] - y[3];
347   line->plane[3].dcdx = y[3] - y[0];
348
349
350   LP_COUNT(nr_tris);
351
352
353   /* Bounding rectangle (in pixels) */
354   {
355      /* Yes this is necessary to accurately calculate bounding boxes
356       * with the two fill-conventions we support.  GL (normally) ends
357       * up needing a bottom-left fill convention, which requires
358       * slightly different rounding.
359       */
360      int adj = (setup->pixel_offset != 0) ? 1 : 0;
361
362      minx = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
363      maxx = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
364      miny = (MIN4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
365      maxy = (MAX4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
366   }
367
368   if (setup->scissor_test) {
369      minx = MAX2(minx, setup->scissor.current.minx);
370      maxx = MIN2(maxx, setup->scissor.current.maxx);
371      miny = MAX2(miny, setup->scissor.current.miny);
372      maxy = MIN2(maxy, setup->scissor.current.maxy);
373   }
374   else {
375      minx = MAX2(minx, 0);
376      miny = MAX2(miny, 0);
377      maxx = MIN2(maxx, scene->fb.width);
378      maxy = MIN2(maxy, scene->fb.height);
379   }
380
381
382   if (miny >= maxy || minx >= maxx) {
383      lp_scene_putback_data( scene, tri_bytes );
384      return;
385   }
386
387   oneoverarea = 1.0f / (line->dx * line->dx  + line->dy * line->dy);
388
389   /* Setup parameter interpolants:
390    */
391   setup_line_coefficients( setup, line, oneoverarea, v1, v2);
392
393   for (i = 0; i < 4; i++) {
394      struct lp_rast_plane *plane = &line->plane[i];
395
396      /* half-edge constants, will be interated over the whole render
397       * target.
398       */
399      plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
400
401
402      /* correct for top-left vs. bottom-left fill convention.
403       *
404       * note that we're overloading gl_rasterization_rules to mean
405       * both (0.5,0.5) pixel centers *and* bottom-left filling
406       * convention.
407       *
408       * GL actually has a top-left filling convention, but GL's
409       * notion of "top" differs from gallium's...
410       *
411       * Also, sometimes (in FBO cases) GL will render upside down
412       * to its usual method, in which case it will probably want
413       * to use the opposite, top-left convention.
414       */
415      if (plane->dcdx < 0) {
416         /* both fill conventions want this - adjust for left edges */
417         plane->c++;
418      }
419      else if (plane->dcdx == 0) {
420         if (setup->pixel_offset == 0) {
421            /* correct for top-left fill convention:
422             */
423            if (plane->dcdy > 0) plane->c++;
424         }
425         else {
426            /* correct for bottom-left fill convention:
427             */
428            if (plane->dcdy < 0) plane->c++;
429         }
430      }
431
432      plane->dcdx *= FIXED_ONE;
433      plane->dcdy *= FIXED_ONE;
434
435      /* find trivial reject offsets for each edge for a single-pixel
436       * sized block.  These will be scaled up at each recursive level to
437       * match the active blocksize.  Scaling in this way works best if
438       * the blocks are square.
439       */
440      plane->eo = 0;
441      if (plane->dcdx < 0) plane->eo -= plane->dcdx;
442      if (plane->dcdy > 0) plane->eo += plane->dcdy;
443
444      /* Calculate trivial accept offsets from the above.
445       */
446      plane->ei = plane->dcdy - plane->dcdx - plane->eo;
447
448      plane->step = line->step[i];
449
450      /* Fill in the inputs.step[][] arrays.
451       * We've manually unrolled some loops here.
452       */
453#define SETUP_STEP(j, x, y) \
454      line->step[i][j] = y * plane->dcdy - x * plane->dcdx
455
456      SETUP_STEP(0, 0, 0);
457      SETUP_STEP(1, 1, 0);
458      SETUP_STEP(2, 0, 1);
459      SETUP_STEP(3, 1, 1);
460
461      SETUP_STEP(4, 2, 0);
462      SETUP_STEP(5, 3, 0);
463      SETUP_STEP(6, 2, 1);
464      SETUP_STEP(7, 3, 1);
465
466      SETUP_STEP(8, 0, 2);
467      SETUP_STEP(9, 1, 2);
468      SETUP_STEP(10, 0, 3);
469      SETUP_STEP(11, 1, 3);
470
471      SETUP_STEP(12, 2, 2);
472      SETUP_STEP(13, 3, 2);
473      SETUP_STEP(14, 2, 3);
474      SETUP_STEP(15, 3, 3);
475#undef STEP
476   }
477
478
479   /*
480    * When rasterizing scissored tris, use the intersection of the
481    * triangle bounding box and the scissor rect to generate the
482    * scissor planes.
483    *
484    * This permits us to cut off the triangle "tails" that are present
485    * in the intermediate recursive levels caused when two of the
486    * triangles edges don't diverge quickly enough to trivially reject
487    * exterior blocks from the triangle.
488    *
489    * It's not really clear if it's worth worrying about these tails,
490    * but since we generate the planes for each scissored tri, it's
491    * free to trim them in this case.
492    *
493    * Note that otherwise, the scissor planes only vary in 'C' value,
494    * and even then only on state-changes.  Could alternatively store
495    * these planes elsewhere.
496    */
497   if (nr_planes == 8) {
498      line->plane[4].step = step_scissor_maxx;
499      line->plane[4].dcdx = 1;
500      line->plane[4].dcdy = 0;
501      line->plane[4].c = maxx;
502      line->plane[4].ei = -1;
503      line->plane[4].eo = 0;
504
505      line->plane[5].step = step_scissor_miny;
506      line->plane[5].dcdx = 0;
507      line->plane[5].dcdy = 1;
508      line->plane[5].c = 1-miny;
509      line->plane[5].ei = 0;
510      line->plane[5].eo = 1;
511
512      line->plane[6].step = step_scissor_maxy;
513      line->plane[6].dcdx = 0;
514      line->plane[6].dcdy = -1;
515      line->plane[6].c = maxy;
516      line->plane[6].ei = -1;
517      line->plane[6].eo = 0;
518
519      line->plane[7].step = step_scissor_minx;
520      line->plane[7].dcdx = -1;
521      line->plane[7].dcdy = 0;
522      line->plane[7].c = 1-minx;
523      line->plane[7].ei = 0;
524      line->plane[7].eo = 1;
525   }
526
527
528   /*
529    * All fields of 'tri' are now set.  The remaining code here is
530    * concerned with binning.
531    */
532
533   /* Convert to tile coordinates, and inclusive ranges:
534    */
535   ix0 = minx / TILE_SIZE;
536   iy0 = miny / TILE_SIZE;
537   ix1 = (maxx-1) / TILE_SIZE;
538   iy1 = (maxy-1) / TILE_SIZE;
539
540   /*
541    * Clamp to framebuffer size
542    */
543   assert(ix0 == MAX2(ix0, 0));
544   assert(iy0 == MAX2(iy0, 0));
545   assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
546   assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
547
548   /* Determine which tile(s) intersect the triangle's bounding box
549    */
550   if (iy0 == iy1 && ix0 == ix1)
551   {
552      /* Triangle is contained in a single tile:
553       */
554      lp_scene_bin_command( scene, ix0, iy0,
555                            lp_rast_tri_tab[nr_planes],
556                            lp_rast_arg_triangle(line, (1<<nr_planes)-1) );
557   }
558   else
559   {
560      int c[8];
561      int ei[8];
562      int eo[8];
563      int xstep[8];
564      int ystep[8];
565      int x, y;
566      int is_blit = -1; /* undetermined */
567
568      for (i = 0; i < nr_planes; i++) {
569         c[i] = (line->plane[i].c +
570                 line->plane[i].dcdy * iy0 * TILE_SIZE -
571                 line->plane[i].dcdx * ix0 * TILE_SIZE);
572
573         ei[i] = line->plane[i].ei << TILE_ORDER;
574         eo[i] = line->plane[i].eo << TILE_ORDER;
575         xstep[i] = -(line->plane[i].dcdx << TILE_ORDER);
576         ystep[i] = line->plane[i].dcdy << TILE_ORDER;
577      }
578
579
580
581      /* Test tile-sized blocks against the triangle.
582       * Discard blocks fully outside the tri.  If the block is fully
583       * contained inside the tri, bin an lp_rast_shade_tile command.
584       * Else, bin a lp_rast_triangle command.
585       */
586      for (y = iy0; y <= iy1; y++)
587      {
588         boolean in = FALSE;  /* are we inside the triangle? */
589         int cx[8];
590
591         for (i = 0; i < nr_planes; i++)
592            cx[i] = c[i];
593
594         for (x = ix0; x <= ix1; x++)
595         {
596            int out = 0;
597            int partial = 0;
598
599            for (i = 0; i < nr_planes; i++) {
600               int planeout = cx[i] + eo[i];
601               int planepartial = cx[i] + ei[i] - 1;
602               out |= (planeout >> 31);
603               partial |= (planepartial >> 31) & (1<<i);
604            }
605            if (out) {
606               /* do nothing */
607               if (in)
608                  break;  /* exiting triangle, all done with this row */
609               LP_COUNT(nr_empty_64);
610            }
611            else if (partial) {
612               /* Not trivially accepted by at least one plane -
613                * rasterize/shade partial tile
614                */
615               int count = util_bitcount(partial);
616               in = TRUE;
617               lp_scene_bin_command( scene, x, y,
618                                     lp_rast_tri_tab[count],
619                                     lp_rast_arg_triangle(line, partial) );
620
621               LP_COUNT(nr_partially_covered_64);
622            }
623            else {
624               /* triangle covers the whole tile- shade whole tile */
625               LP_COUNT(nr_fully_covered_64);
626               in = TRUE;
627               /* leverages on existing code in lp_setup_tri.c */
628               do_triangle_ccw_whole_tile(setup, scene, line, x, y,
629                                          opaque, &is_blit);
630            }
631
632            /* Iterate cx values across the region:
633             */
634            for (i = 0; i < nr_planes; i++)
635               cx[i] += xstep[i];
636         }
637
638         /* Iterate c values down the region:
639          */
640         for (i = 0; i < nr_planes; i++)
641            c[i] += ystep[i];
642      }
643   }
644}
645
646
647void lp_setup_choose_line( struct lp_setup_context *setup )
648{
649   setup->line = lp_setup_line;
650}
651
652
653