lp_rast_tri_tmp.h revision ef3407672ed4c2c6d070384ea763e73b3da2240a
1/************************************************************************** 2 * 3 * Copyright 2007-2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * Rasterization for binned triangles within a tile 30 */ 31 32 33 34/** 35 * Prototype for a 8 plane rasterizer function. Will codegenerate 36 * several of these. 37 * 38 * XXX: Varients for more/fewer planes. 39 * XXX: Need ways of dropping planes as we descend. 40 * XXX: SIMD 41 */ 42static void 43TAG(do_block_4)(struct lp_rasterizer_task *task, 44 const struct lp_rast_triangle *tri, 45 const struct lp_rast_plane *plane, 46 int x, int y, 47 const int *c) 48{ 49 unsigned mask = 0xffff; 50 int j; 51 52 for (j = 0; j < NR_PLANES; j++) { 53 mask &= ~build_mask_linear(c[j] - 1, 54 -plane[j].dcdx, 55 plane[j].dcdy); 56 } 57 58 /* Now pass to the shader: 59 */ 60 if (mask) 61 lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); 62} 63 64/** 65 * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out 66 * of the triangle's bounds. 67 */ 68static void 69TAG(do_block_16)(struct lp_rasterizer_task *task, 70 const struct lp_rast_triangle *tri, 71 const struct lp_rast_plane *plane, 72 int x, int y, 73 const int *c) 74{ 75 unsigned outmask, inmask, partmask, partial_mask; 76 unsigned j; 77 78 outmask = 0; /* outside one or more trivial reject planes */ 79 partmask = 0; /* outside one or more trivial accept planes */ 80 81 for (j = 0; j < NR_PLANES; j++) { 82 const int dcdx = -plane[j].dcdx * 4; 83 const int dcdy = plane[j].dcdy * 4; 84 const int cox = plane[j].eo * 4; 85 const int cio = plane[j].ei * 4 - 1; 86 87 build_masks(c[j] + cox, 88 cio - cox, 89 dcdx, dcdy, 90 &outmask, /* sign bits from c[i][0..15] + cox */ 91 &partmask); /* sign bits from c[i][0..15] + cio */ 92 } 93 94 if (outmask == 0xffff) 95 return; 96 97 /* Mask of sub-blocks which are inside all trivial accept planes: 98 */ 99 inmask = ~partmask & 0xffff; 100 101 /* Mask of sub-blocks which are inside all trivial reject planes, 102 * but outside at least one trivial accept plane: 103 */ 104 partial_mask = partmask & ~outmask; 105 106 assert((partial_mask & inmask) == 0); 107 108 LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask))); 109 110 /* Iterate over partials: 111 */ 112 while (partial_mask) { 113 int i = ffs(partial_mask) - 1; 114 int ix = (i & 3) * 4; 115 int iy = (i >> 2) * 4; 116 int px = x + ix; 117 int py = y + iy; 118 int cx[NR_PLANES]; 119 120 partial_mask &= ~(1 << i); 121 122 LP_COUNT(nr_partially_covered_4); 123 124 for (j = 0; j < NR_PLANES; j++) 125 cx[j] = (c[j] 126 - plane[j].dcdx * ix 127 + plane[j].dcdy * iy); 128 129 TAG(do_block_4)(task, tri, plane, px, py, cx); 130 } 131 132 /* Iterate over fulls: 133 */ 134 while (inmask) { 135 int i = ffs(inmask) - 1; 136 int ix = (i & 3) * 4; 137 int iy = (i >> 2) * 4; 138 int px = x + ix; 139 int py = y + iy; 140 141 inmask &= ~(1 << i); 142 143 LP_COUNT(nr_fully_covered_4); 144 block_full_4(task, tri, px, py); 145 } 146} 147 148 149/** 150 * Scan the tile in chunks and figure out which pixels to rasterize 151 * for this triangle. 152 */ 153void 154TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, 155 const union lp_rast_cmd_arg arg) 156{ 157 const struct lp_rast_triangle *tri = arg.triangle.tri; 158 unsigned plane_mask = arg.triangle.plane_mask; 159 const int x = task->x, y = task->y; 160 struct lp_rast_plane plane[NR_PLANES]; 161 int c[NR_PLANES]; 162 unsigned outmask, inmask, partmask, partial_mask; 163 unsigned j = 0; 164 165 if (tri->inputs.disable) { 166 /* This triangle was partially binned and has been disabled */ 167 return; 168 } 169 170 outmask = 0; /* outside one or more trivial reject planes */ 171 partmask = 0; /* outside one or more trivial accept planes */ 172 173 while (plane_mask) { 174 int i = ffs(plane_mask) - 1; 175 plane[j] = tri->plane[i]; 176 plane_mask &= ~(1 << i); 177 c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; 178 179 { 180 const int dcdx = -plane[j].dcdx * 16; 181 const int dcdy = plane[j].dcdy * 16; 182 const int cox = plane[j].eo * 16; 183 const int cio = plane[j].ei * 16 - 1; 184 185 build_masks(c[j] + cox, 186 cio - cox, 187 dcdx, dcdy, 188 &outmask, /* sign bits from c[i][0..15] + cox */ 189 &partmask); /* sign bits from c[i][0..15] + cio */ 190 } 191 192 j++; 193 } 194 195 if (outmask == 0xffff) 196 return; 197 198 /* Mask of sub-blocks which are inside all trivial accept planes: 199 */ 200 inmask = ~partmask & 0xffff; 201 202 /* Mask of sub-blocks which are inside all trivial reject planes, 203 * but outside at least one trivial accept plane: 204 */ 205 partial_mask = partmask & ~outmask; 206 207 assert((partial_mask & inmask) == 0); 208 209 LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask))); 210 211 /* Iterate over partials: 212 */ 213 while (partial_mask) { 214 int i = ffs(partial_mask) - 1; 215 int ix = (i & 3) * 16; 216 int iy = (i >> 2) * 16; 217 int px = x + ix; 218 int py = y + iy; 219 int cx[NR_PLANES]; 220 221 for (j = 0; j < NR_PLANES; j++) 222 cx[j] = (c[j] 223 - plane[j].dcdx * ix 224 + plane[j].dcdy * iy); 225 226 partial_mask &= ~(1 << i); 227 228 LP_COUNT(nr_partially_covered_16); 229 TAG(do_block_16)(task, tri, plane, px, py, cx); 230 } 231 232 /* Iterate over fulls: 233 */ 234 while (inmask) { 235 int i = ffs(inmask) - 1; 236 int ix = (i & 3) * 16; 237 int iy = (i >> 2) * 16; 238 int px = x + ix; 239 int py = y + iy; 240 241 inmask &= ~(1 << i); 242 243 LP_COUNT(nr_fully_covered_16); 244 block_full_16(task, tri, px, py); 245 } 246} 247 248#if defined(PIPE_ARCH_SSE) && defined(TRI_16) 249/* XXX: special case this when intersection is not required. 250 * - tile completely within bbox, 251 * - bbox completely within tile. 252 */ 253void 254TRI_16(struct lp_rasterizer_task *task, 255 const union lp_rast_cmd_arg arg) 256{ 257 const struct lp_rast_triangle *tri = arg.triangle.tri; 258 const struct lp_rast_plane *plane = tri->plane; 259 unsigned mask = arg.triangle.plane_mask; 260 unsigned outmask, partial_mask; 261 unsigned j; 262 __m128i cstep4[NR_PLANES][4]; 263 264 int x = (mask & 0xff); 265 int y = (mask >> 8); 266 267 outmask = 0; /* outside one or more trivial reject planes */ 268 269 x += task->x; 270 y += task->y; 271 272 for (j = 0; j < NR_PLANES; j++) { 273 const int dcdx = -plane[j].dcdx * 4; 274 const int dcdy = plane[j].dcdy * 4; 275 __m128i xdcdy = _mm_set1_epi32(dcdy); 276 277 cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); 278 cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); 279 cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); 280 cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); 281 282 { 283 const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; 284 const int cox = plane[j].eo * 4; 285 286 outmask |= sign_bits4(cstep4[j], c + cox); 287 } 288 } 289 290 if (outmask == 0xffff) 291 return; 292 293 294 /* Mask of sub-blocks which are inside all trivial reject planes, 295 * but outside at least one trivial accept plane: 296 */ 297 partial_mask = 0xffff & ~outmask; 298 299 /* Iterate over partials: 300 */ 301 while (partial_mask) { 302 int i = ffs(partial_mask) - 1; 303 int ix = (i & 3) * 4; 304 int iy = (i >> 2) * 4; 305 int px = x + ix; 306 int py = y + iy; 307 unsigned mask = 0xffff; 308 309 partial_mask &= ~(1 << i); 310 311 for (j = 0; j < NR_PLANES; j++) { 312 const int cx = (plane[j].c - 1 313 - plane[j].dcdx * px 314 + plane[j].dcdy * py) * 4; 315 316 mask &= ~sign_bits4(cstep4[j], cx); 317 } 318 319 if (mask) 320 lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); 321 } 322} 323#endif 324 325#if defined(PIPE_ARCH_SSE) && defined(TRI_4) 326void 327TRI_4(struct lp_rasterizer_task *task, 328 const union lp_rast_cmd_arg arg) 329{ 330 const struct lp_rast_triangle *tri = arg.triangle.tri; 331 const struct lp_rast_plane *plane = tri->plane; 332 unsigned mask = arg.triangle.plane_mask; 333 const int x = task->x + (mask & 0xff); 334 const int y = task->y + (mask >> 8); 335 unsigned j; 336 337 /* Iterate over partials: 338 */ 339 { 340 unsigned mask = 0xffff; 341 342 for (j = 0; j < NR_PLANES; j++) { 343 const int cx = (plane[j].c 344 - plane[j].dcdx * x 345 + plane[j].dcdy * y); 346 347 const int dcdx = -plane[j].dcdx; 348 const int dcdy = plane[j].dcdy; 349 __m128i xdcdy = _mm_set1_epi32(dcdy); 350 351 __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); 352 __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); 353 __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); 354 __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); 355 356 __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); 357 __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); 358 __m128i result = _mm_packs_epi16(cstep01, cstep23); 359 360 /* Extract the sign bits 361 */ 362 mask &= ~_mm_movemask_epi8(result); 363 } 364 365 if (mask) 366 lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); 367 } 368} 369#endif 370 371 372 373#undef TAG 374#undef TRI_4 375#undef TRI_16 376#undef NR_PLANES 377 378