1d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell/**************************************************************************
2d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell *
3d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * Copyright 2007-2010 VMware, Inc.
4d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * All Rights Reserved.
5d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell *
6d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * Permission is hereby granted, free of charge, to any person obtaining a
7d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * copy of this software and associated documentation files (the
8d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * "Software"), to deal in the Software without restriction, including
9d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * without limitation the rights to use, copy, modify, merge, publish,
10d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * distribute, sub license, and/or sell copies of the Software, and to
11d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * permit persons to whom the Software is furnished to do so, subject to
12d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * the following conditions:
13d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell *
14d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * The above copyright notice and this permission notice (including the
15d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * next paragraph) shall be included in all copies or substantial portions
16d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * of the Software.
17d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell *
18d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell *
26d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell **************************************************************************/
27d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
28d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell/*
29d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * Rasterization for binned triangles within a tile
30d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell */
31d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
32d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
33d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
34d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell/**
355286dd701640976ffc328e8e85fb3830746851a1Hui Qi Tay * Prototype for a 8 plane rasterizer function.  Will codegenerate
36d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * several of these.
37d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell *
38d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * XXX: Varients for more/fewer planes.
39d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * XXX: Need ways of dropping planes as we descend.
40d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * XXX: SIMD
41d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell */
42d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwellstatic void
43d4b64167b56f780d0dea73193c345622888fbc16Keith WhitwellTAG(do_block_4)(struct lp_rasterizer_task *task,
44d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                const struct lp_rast_triangle *tri,
45d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                const struct lp_rast_plane *plane,
46d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                int x, int y,
47d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                const int *c)
48d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell{
49510b03539413552a543e25de6b896eb10baf60aeKeith Whitwell   unsigned mask = 0xffff;
50510b03539413552a543e25de6b896eb10baf60aeKeith Whitwell   int j;
51d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
52510b03539413552a543e25de6b896eb10baf60aeKeith Whitwell   for (j = 0; j < NR_PLANES; j++) {
5367b957781d8195b8f8867e994c03b68f8dc5c807Keith Whitwell      mask &= ~build_mask_linear(c[j] - 1,
5467b957781d8195b8f8867e994c03b68f8dc5c807Keith Whitwell				 -plane[j].dcdx,
5567b957781d8195b8f8867e994c03b68f8dc5c807Keith Whitwell				 plane[j].dcdy);
56d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   }
57d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
58d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Now pass to the shader:
59d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
60d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   if (mask)
61d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
62d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell}
63d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
64d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell/**
65d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
66d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * of the triangle's bounds.
67d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell */
68d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwellstatic void
69d4b64167b56f780d0dea73193c345622888fbc16Keith WhitwellTAG(do_block_16)(struct lp_rasterizer_task *task,
70d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                 const struct lp_rast_triangle *tri,
71d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                 const struct lp_rast_plane *plane,
72d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                 int x, int y,
73d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                 const int *c)
74d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell{
75d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   unsigned outmask, inmask, partmask, partial_mask;
764b322e71bb169af637864922edfb4108675781bbKeith Whitwell   unsigned j;
77d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
78d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   outmask = 0;                 /* outside one or more trivial reject planes */
79d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   partmask = 0;                /* outside one or more trivial accept planes */
80d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
81d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   for (j = 0; j < NR_PLANES; j++) {
82ee0d1c29eeddfa364a18783507acd4d031029ba2Keith Whitwell      const int dcdx = -plane[j].dcdx * 4;
83ee0d1c29eeddfa364a18783507acd4d031029ba2Keith Whitwell      const int dcdy = plane[j].dcdy * 4;
840aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell      const int cox = plane[j].eo * 4;
858965f042b327ad8697963e757f4607f4bb13a045Keith Whitwell      const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
868965f042b327ad8697963e757f4607f4bb13a045Keith Whitwell      const int cio = ei * 4 - 1;
870aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell
880aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell      build_masks(c[j] + cox,
890aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell		  cio - cox,
900aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell		  dcdx, dcdy,
910aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell		  &outmask,   /* sign bits from c[i][0..15] + cox */
920aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell		  &partmask); /* sign bits from c[i][0..15] + cio */
93d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   }
94d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
95d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   if (outmask == 0xffff)
96d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      return;
97d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
98d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Mask of sub-blocks which are inside all trivial accept planes:
99d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
100d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   inmask = ~partmask & 0xffff;
101d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
102d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Mask of sub-blocks which are inside all trivial reject planes,
103d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    * but outside at least one trivial accept plane:
104d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
105d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   partial_mask = partmask & ~outmask;
106d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
107d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   assert((partial_mask & inmask) == 0);
108d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
10998f3ff8f4a761d579ee9b42ee3090635519213a5Keith Whitwell   LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask)));
11098f3ff8f4a761d579ee9b42ee3090635519213a5Keith Whitwell
111d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Iterate over partials:
112d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
113d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   while (partial_mask) {
114d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      int i = ffs(partial_mask) - 1;
1154c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int ix = (i & 3) * 4;
1164c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int iy = (i >> 2) * 4;
1174c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int px = x + ix;
1184c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int py = y + iy;
119d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      int cx[NR_PLANES];
120d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
121d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      partial_mask &= ~(1 << i);
122d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
12398f3ff8f4a761d579ee9b42ee3090635519213a5Keith Whitwell      LP_COUNT(nr_partially_covered_4);
12498f3ff8f4a761d579ee9b42ee3090635519213a5Keith Whitwell
1254c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      for (j = 0; j < NR_PLANES; j++)
1264c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell         cx[j] = (c[j]
1274c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell		  - plane[j].dcdx * ix
1284c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell		  + plane[j].dcdy * iy);
1294c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell
130d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      TAG(do_block_4)(task, tri, plane, px, py, cx);
131d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   }
132d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
133d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Iterate over fulls:
134d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
135d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   while (inmask) {
136d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      int i = ffs(inmask) - 1;
1374c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int ix = (i & 3) * 4;
1384c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int iy = (i >> 2) * 4;
1394c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int px = x + ix;
1404c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int py = y + iy;
141d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
142d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      inmask &= ~(1 << i);
143d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
14498f3ff8f4a761d579ee9b42ee3090635519213a5Keith Whitwell      LP_COUNT(nr_fully_covered_4);
145d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      block_full_4(task, tri, px, py);
146d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   }
147d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell}
148d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
149d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
150d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell/**
151d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * Scan the tile in chunks and figure out which pixels to rasterize
152d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell * for this triangle.
153d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell */
154d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwellvoid
155d4b64167b56f780d0dea73193c345622888fbc16Keith WhitwellTAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
156d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell                      const union lp_rast_cmd_arg arg)
157d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell{
158d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   const struct lp_rast_triangle *tri = arg.triangle.tri;
159d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   unsigned plane_mask = arg.triangle.plane_mask;
1609bf8a55c4b29d55320fc2e7875ecf0e9ca164ee8Keith Whitwell   const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
161d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   const int x = task->x, y = task->y;
162d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   struct lp_rast_plane plane[NR_PLANES];
163d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   int c[NR_PLANES];
164d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   unsigned outmask, inmask, partmask, partial_mask;
16585d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell   unsigned j = 0;
166d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
1679f6e8e1d6b8696a3ee96cba01b2466ba7a1a8ef6Keith Whitwell   if (tri->inputs.disable) {
1689f6e8e1d6b8696a3ee96cba01b2466ba7a1a8ef6Keith Whitwell      /* This triangle was partially binned and has been disabled */
1699f6e8e1d6b8696a3ee96cba01b2466ba7a1a8ef6Keith Whitwell      return;
1709f6e8e1d6b8696a3ee96cba01b2466ba7a1a8ef6Keith Whitwell   }
1719f6e8e1d6b8696a3ee96cba01b2466ba7a1a8ef6Keith Whitwell
172d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   outmask = 0;                 /* outside one or more trivial reject planes */
173d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   partmask = 0;                /* outside one or more trivial accept planes */
174d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
17585d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell   while (plane_mask) {
17685d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell      int i = ffs(plane_mask) - 1;
1779bf8a55c4b29d55320fc2e7875ecf0e9ca164ee8Keith Whitwell      plane[j] = tri_plane[i];
17885d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell      plane_mask &= ~(1 << i);
179d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
1804b322e71bb169af637864922edfb4108675781bbKeith Whitwell
18185d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell      {
18285d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell	 const int dcdx = -plane[j].dcdx * 16;
18385d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell	 const int dcdy = plane[j].dcdy * 16;
1840aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell	 const int cox = plane[j].eo * 16;
1858965f042b327ad8697963e757f4607f4bb13a045Keith Whitwell         const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
1868965f042b327ad8697963e757f4607f4bb13a045Keith Whitwell         const int cio = ei * 16 - 1;
1870aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell
1880aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell	 build_masks(c[j] + cox,
1890aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell		     cio - cox,
1900aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell		     dcdx, dcdy,
1910aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell		     &outmask,   /* sign bits from c[i][0..15] + cox */
1920aa3a09ced07e150901cd0f7a7917556a018c252Keith Whitwell		     &partmask); /* sign bits from c[i][0..15] + cio */
19385d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell      }
19485d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell
19585d9bc236d6a8ff8f12cbc2150f8c3740354f573Keith Whitwell      j++;
196d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   }
197d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
198d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   if (outmask == 0xffff)
199d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      return;
200d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
201d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Mask of sub-blocks which are inside all trivial accept planes:
202d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
203d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   inmask = ~partmask & 0xffff;
204d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
205d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Mask of sub-blocks which are inside all trivial reject planes,
206d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    * but outside at least one trivial accept plane:
207d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
208d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   partial_mask = partmask & ~outmask;
209d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
210d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   assert((partial_mask & inmask) == 0);
211d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
21298f3ff8f4a761d579ee9b42ee3090635519213a5Keith Whitwell   LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask)));
21398f3ff8f4a761d579ee9b42ee3090635519213a5Keith Whitwell
214d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Iterate over partials:
215d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
216d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   while (partial_mask) {
217d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      int i = ffs(partial_mask) - 1;
2184c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int ix = (i & 3) * 16;
2194c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int iy = (i >> 2) * 16;
2204c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int px = x + ix;
2214c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int py = y + iy;
222d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      int cx[NR_PLANES];
223d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
224d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      for (j = 0; j < NR_PLANES; j++)
2254c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell         cx[j] = (c[j]
2264c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell		  - plane[j].dcdx * ix
2274c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell		  + plane[j].dcdy * iy);
228d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
229d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      partial_mask &= ~(1 << i);
230d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
231d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      LP_COUNT(nr_partially_covered_16);
232d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      TAG(do_block_16)(task, tri, plane, px, py, cx);
233d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   }
234d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
235d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   /* Iterate over fulls:
236d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell    */
237d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   while (inmask) {
238d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      int i = ffs(inmask) - 1;
2394c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int ix = (i & 3) * 16;
2404c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int iy = (i >> 2) * 16;
2414c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int px = x + ix;
2424c0641454b952f2c240de8c83511703f98e1f72fKeith Whitwell      int py = y + iy;
243d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
244d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      inmask &= ~(1 << i);
245d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
246d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      LP_COUNT(nr_fully_covered_16);
247d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell      block_full_16(task, tri, px, py);
248d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell   }
249d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell}
250d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
2510ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell#if defined(PIPE_ARCH_SSE) && defined(TRI_16)
2520ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell/* XXX: special case this when intersection is not required.
2530ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell *      - tile completely within bbox,
2540ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell *      - bbox completely within tile.
2550ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell */
2560ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwellvoid
2570ff132e5a633170afaed0aea54d01438c895b8abKeith WhitwellTRI_16(struct lp_rasterizer_task *task,
2580ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell       const union lp_rast_cmd_arg arg)
2590ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell{
2600ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   const struct lp_rast_triangle *tri = arg.triangle.tri;
2619bf8a55c4b29d55320fc2e7875ecf0e9ca164ee8Keith Whitwell   const struct lp_rast_plane *plane = GET_PLANES(tri);
2620ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   unsigned mask = arg.triangle.plane_mask;
2630ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   unsigned outmask, partial_mask;
2640ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   unsigned j;
2650ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   __m128i cstep4[NR_PLANES][4];
2660ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2670ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   int x = (mask & 0xff);
2680ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   int y = (mask >> 8);
2690ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2700ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   outmask = 0;                 /* outside one or more trivial reject planes */
2710ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2720ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   x += task->x;
2730ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   y += task->y;
2740ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2750ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   for (j = 0; j < NR_PLANES; j++) {
2760ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      const int dcdx = -plane[j].dcdx * 4;
2770ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      const int dcdy = plane[j].dcdy * 4;
2780ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      __m128i xdcdy = _mm_set1_epi32(dcdy);
2790ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2800ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
2810ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
2820ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
2830ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
2840ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2850ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      {
2860ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
2870ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 const int cox = plane[j].eo * 4;
2880ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2890ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 outmask |= sign_bits4(cstep4[j], c + cox);
2900ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      }
2910ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   }
2920ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2930ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   if (outmask == 0xffff)
2940ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      return;
2950ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2960ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
2970ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   /* Mask of sub-blocks which are inside all trivial reject planes,
2980ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell    * but outside at least one trivial accept plane:
2990ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell    */
3000ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   partial_mask = 0xffff & ~outmask;
3010ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3020ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   /* Iterate over partials:
3030ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell    */
3040ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   while (partial_mask) {
3050ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      int i = ffs(partial_mask) - 1;
3060ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      int ix = (i & 3) * 4;
3070ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      int iy = (i >> 2) * 4;
3080ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      int px = x + ix;
3090ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      int py = y + iy;
3100ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      unsigned mask = 0xffff;
3110ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3120ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      partial_mask &= ~(1 << i);
3130ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3140ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      for (j = 0; j < NR_PLANES; j++) {
315ef3407672ed4c2c6d070384ea763e73b3da2240aKeith Whitwell         const int cx = (plane[j].c - 1
3160ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell			 - plane[j].dcdx * px
3170ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell			 + plane[j].dcdy * py) * 4;
3180ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3190ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 mask &= ~sign_bits4(cstep4[j], cx);
3200ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      }
3210ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3220ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      if (mask)
3230ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
3240ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   }
3250ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell}
3260ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell#endif
3270ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3280ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell#if defined(PIPE_ARCH_SSE) && defined(TRI_4)
3290ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwellvoid
3300ff132e5a633170afaed0aea54d01438c895b8abKeith WhitwellTRI_4(struct lp_rasterizer_task *task,
3310ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      const union lp_rast_cmd_arg arg)
3320ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell{
3330ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   const struct lp_rast_triangle *tri = arg.triangle.tri;
3349bf8a55c4b29d55320fc2e7875ecf0e9ca164ee8Keith Whitwell   const struct lp_rast_plane *plane = GET_PLANES(tri);
3350ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   unsigned mask = arg.triangle.plane_mask;
3360ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   const int x = task->x + (mask & 0xff);
3370ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   const int y = task->y + (mask >> 8);
3380ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   unsigned j;
3390ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3400ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   /* Iterate over partials:
3410ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell    */
3420ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   {
3430ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      unsigned mask = 0xffff;
3440ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3450ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      for (j = 0; j < NR_PLANES; j++) {
3460ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 const int cx = (plane[j].c
3470ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell			 - plane[j].dcdx * x
3480ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell			 + plane[j].dcdy * y);
3490ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3500ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 const int dcdx = -plane[j].dcdx;
3510ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 const int dcdy = plane[j].dcdy;
3520ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 __m128i xdcdy = _mm_set1_epi32(dcdy);
3530ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3540ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
3550ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
3560ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
3570ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
3580ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3590ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
3600ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
3610ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 __m128i result = _mm_packs_epi16(cstep01, cstep23);
3620ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3630ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 /* Extract the sign bits
3640ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	  */
3650ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 mask &= ~_mm_movemask_epi8(result);
3660ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      }
3670ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3680ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell      if (mask)
3690ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell	 lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
3700ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell   }
3710ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell}
3720ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell#endif
3730ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3740ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
3750ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell
376d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell#undef TAG
3770ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell#undef TRI_4
3780ff132e5a633170afaed0aea54d01438c895b8abKeith Whitwell#undef TRI_16
379d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell#undef NR_PLANES
380d4b64167b56f780d0dea73193c345622888fbc16Keith Whitwell
381