lp_setup_tri.c revision 82ec7f018d20e46e9c43ea467354dcfe4f03bae3
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * Binning code for triangles 30 */ 31 32#include "lp_setup_context.h" 33#include "lp_rast.h" 34#include "util/u_math.h" 35#include "util/u_memory.h" 36 37#define NUM_CHANNELS 4 38 39/** 40 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 41 */ 42static void constant_coef( struct lp_rast_triangle *tri, 43 unsigned slot, 44 const float value, 45 unsigned i ) 46{ 47 tri->inputs.a0[slot][i] = value; 48 tri->inputs.dadx[slot][i] = 0; 49 tri->inputs.dady[slot][i] = 0; 50} 51 52/** 53 * Compute a0, dadx and dady for a linearly interpolated coefficient, 54 * for a triangle. 55 */ 56static void linear_coef( struct lp_rast_triangle *tri, 57 unsigned slot, 58 const float (*v1)[4], 59 const float (*v2)[4], 60 const float (*v3)[4], 61 unsigned vert_attr, 62 unsigned i) 63{ 64 float a1 = v1[vert_attr][i]; 65 float a2 = v2[vert_attr][i]; 66 float a3 = v3[vert_attr][i]; 67 68 float da12 = a1 - a2; 69 float da31 = a3 - a1; 70 float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; 71 float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; 72 73 tri->inputs.dadx[slot][i] = dadx; 74 tri->inputs.dady[slot][i] = dady; 75 76 /* calculate a0 as the value which would be sampled for the 77 * fragment at (0,0), taking into account that we want to sample at 78 * pixel centers, in other words (0.5, 0.5). 79 * 80 * this is neat but unfortunately not a good way to do things for 81 * triangles with very large values of dadx or dady as it will 82 * result in the subtraction and re-addition from a0 of a very 83 * large number, which means we'll end up loosing a lot of the 84 * fractional bits and precision from a0. the way to fix this is 85 * to define a0 as the sample at a pixel center somewhere near vmin 86 * instead - i'll switch to this later. 87 */ 88 tri->inputs.a0[slot][i] = (v1[vert_attr][i] - 89 (dadx * (v1[0][0] - 0.5f) + 90 dady * (v1[0][1] - 0.5f))); 91} 92 93 94/** 95 * Compute a0, dadx and dady for a perspective-corrected interpolant, 96 * for a triangle. 97 * We basically multiply the vertex value by 1/w before computing 98 * the plane coefficients (a0, dadx, dady). 99 * Later, when we compute the value at a particular fragment position we'll 100 * divide the interpolated value by the interpolated W at that fragment. 101 */ 102static void perspective_coef( struct lp_rast_triangle *tri, 103 unsigned slot, 104 const float (*v1)[4], 105 const float (*v2)[4], 106 const float (*v3)[4], 107 unsigned vert_attr, 108 unsigned i) 109{ 110 /* premultiply by 1/w (v[0][3] is always 1/w): 111 */ 112 float a1 = v1[vert_attr][i] * v1[0][3]; 113 float a2 = v2[vert_attr][i] * v2[0][3]; 114 float a3 = v3[vert_attr][i] * v3[0][3]; 115 float da12 = a1 - a2; 116 float da31 = a3 - a1; 117 float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; 118 float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; 119 120 121 tri->inputs.dadx[slot][i] = dadx; 122 tri->inputs.dady[slot][i] = dady; 123 tri->inputs.a0[slot][i] = (a1 - 124 (dadx * (v1[0][0] - 0.5f) + 125 dady * (v1[0][1] - 0.5f))); 126} 127 128 129/** 130 * Special coefficient setup for gl_FragCoord. 131 * X and Y are trivial, though Y has to be inverted for OpenGL. 132 * Z and W are copied from position_coef which should have already been computed. 133 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. 134 */ 135static void 136setup_fragcoord_coef(struct lp_rast_triangle *tri, 137 unsigned slot, 138 const float (*v1)[4], 139 const float (*v2)[4], 140 const float (*v3)[4]) 141{ 142 /*X*/ 143 tri->inputs.a0[slot][0] = 0.0; 144 tri->inputs.dadx[slot][0] = 1.0; 145 tri->inputs.dady[slot][0] = 0.0; 146 /*Y*/ 147 tri->inputs.a0[slot][1] = 0.0; 148 tri->inputs.dadx[slot][1] = 0.0; 149 tri->inputs.dady[slot][1] = 1.0; 150 /*Z*/ 151 linear_coef(tri, slot, v1, v2, v3, 0, 2); 152 /*W*/ 153 linear_coef(tri, slot, v1, v2, v3, 0, 3); 154} 155 156 157static void setup_facing_coef( struct lp_rast_triangle *tri, 158 unsigned slot, 159 boolean frontface ) 160{ 161 constant_coef( tri, slot, 1.0f - frontface, 0 ); 162 constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ 163 constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ 164 constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ 165} 166 167 168/** 169 * Compute the tri->coef[] array dadx, dady, a0 values. 170 */ 171static void setup_tri_coefficients( struct setup_context *setup, 172 struct lp_rast_triangle *tri, 173 const float (*v1)[4], 174 const float (*v2)[4], 175 const float (*v3)[4], 176 boolean frontface ) 177{ 178 unsigned slot; 179 180 /* The internal position input is in slot zero: 181 */ 182 setup_fragcoord_coef(tri, 0, v1, v2, v3); 183 184 /* setup interpolation for all the remaining attrbutes: 185 */ 186 for (slot = 0; slot < setup->fs.nr_inputs; slot++) { 187 unsigned vert_attr = setup->fs.input[slot].src_index; 188 unsigned i; 189 190 switch (setup->fs.input[slot].interp) { 191 case LP_INTERP_CONSTANT: 192 for (i = 0; i < NUM_CHANNELS; i++) 193 constant_coef(tri, slot+1, v3[vert_attr][i], i); 194 break; 195 196 case LP_INTERP_LINEAR: 197 for (i = 0; i < NUM_CHANNELS; i++) 198 linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i); 199 break; 200 201 case LP_INTERP_PERSPECTIVE: 202 for (i = 0; i < NUM_CHANNELS; i++) 203 perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i); 204 break; 205 206 case LP_INTERP_POSITION: 207 /* XXX: fix me - duplicates the values in slot zero. 208 */ 209 setup_fragcoord_coef(tri, slot+1, v1, v2, v3); 210 break; 211 212 case LP_INTERP_FACING: 213 setup_facing_coef(tri, slot+1, frontface); 214 break; 215 216 default: 217 assert(0); 218 } 219 } 220} 221 222 223 224/* XXX: do this by add/subtracting a large floating point number: 225 */ 226static inline float subpixel_snap( float a ) 227{ 228 int i = a * 16; 229 return (float)i * (1.0/16); 230} 231 232 233static INLINE void bin_triangle( struct cmd_block_list *list, 234 const struct lp_rast_triangle arg ) 235{ 236} 237 238 239/* to avoid having to allocate power-of-four, square render targets, 240 * end up having a specialized version of the above that runs only at 241 * the topmost level. 242 * 243 * at the topmost level there may be an arbitary number of steps on 244 * either dimension, so this loop needs to be either separately 245 * code-generated and unrolled for each render target size, or kept as 246 * generic looping code: 247 */ 248 249#define MIN3(a,b,c) MIN2(MIN2(a,b),c) 250#define MAX3(a,b,c) MAX2(MAX2(a,b),c) 251 252static void 253do_triangle_ccw(struct setup_context *setup, 254 const float (*v1)[4], 255 const float (*v2)[4], 256 const float (*v3)[4], 257 boolean frontfacing ) 258{ 259 const int rt_width = setup->fb.width; 260 const int rt_height = setup->fb.height; 261 262 const float y1 = subpixel_snap(v1[0][1]); 263 const float y2 = subpixel_snap(v2[0][1]); 264 const float y3 = subpixel_snap(v3[0][1]); 265 266 const float x1 = subpixel_snap(v1[0][0]); 267 const float x2 = subpixel_snap(v2[0][0]); 268 const float x3 = subpixel_snap(v3[0][0]); 269 270 struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); 271 float area; 272 float c1, c2, c3; 273 int minx, maxx, miny, maxy; 274 275 tri->inputs.state = setup->fs.stored; 276 277 tri->dx12 = x1 - x2; 278 tri->dx23 = x2 - x3; 279 tri->dx31 = x3 - x1; 280 281 tri->dy12 = y1 - y2; 282 tri->dy23 = y2 - y3; 283 tri->dy31 = y3 - y1; 284 285 area = (tri->dx12 * tri->dy31 - 286 tri->dx31 * tri->dy12); 287 288 /* Cull non-ccw and zero-sized triangles. 289 */ 290 if (area <= 0 || util_is_inf_or_nan(area)) 291 return; 292 293 // Bounding rectangle 294 minx = util_iround(MIN3(x1, x2, x3) - .5); 295 maxx = util_iround(MAX3(x1, x2, x3) + .5); 296 miny = util_iround(MIN3(y1, y2, y3) - .5); 297 maxy = util_iround(MAX3(y1, y2, y3) + .5); 298 299 /* Clamp to framebuffer (or tile) dimensions: 300 */ 301 miny = MAX2(0, miny); 302 minx = MAX2(0, minx); 303 maxy = MIN2(rt_height, maxy); 304 maxx = MIN2(rt_width, maxx); 305 306 if (miny == maxy || minx == maxx) 307 return; 308 309 /* The only divide in this code. Is it really needed? 310 */ 311 tri->oneoverarea = 1.0f / area; 312 313 /* Setup parameter interpolants: 314 */ 315 setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing ); 316 317 /* half-edge constants, will be interated over the whole 318 * rendertarget. 319 */ 320 tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; 321 tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; 322 tri->c3 = tri->dy31 * x3 - tri->dx31 * y3; 323 324 /* correct for top-left fill convention: 325 */ 326 if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++; 327 if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++; 328 if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++; 329 330 /* find trivial reject offsets for each edge for a single-pixel 331 * sized block. These will be scaled up at each recursive level to 332 * match the active blocksize. Scaling in this way works best if 333 * the blocks are square. 334 */ 335 tri->eo1 = 0; 336 if (tri->dy12 < 0) tri->eo1 -= tri->dy12; 337 if (tri->dx12 > 0) tri->eo1 += tri->dx12; 338 339 tri->eo2 = 0; 340 if (tri->dy23 < 0) tri->eo2 -= tri->dy23; 341 if (tri->dx23 > 0) tri->eo2 += tri->dx23; 342 343 tri->eo3 = 0; 344 if (tri->dy31 < 0) tri->eo3 -= tri->dy31; 345 if (tri->dx31 > 0) tri->eo3 += tri->dx31; 346 347 /* Calculate trivial accept offsets from the above. 348 */ 349 tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; 350 tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; 351 tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; 352 353 minx &= ~(TILESIZE-1); /* aligned blocks */ 354 miny &= ~(TILESIZE-1); /* aligned blocks */ 355 356 c1 = tri->c1 + tri->dx12 * miny - tri->dy12 * minx; 357 c2 = tri->c2 + tri->dx23 * miny - tri->dy23 * minx; 358 c3 = tri->c3 + tri->dx31 * miny - tri->dy31 * minx; 359 360 minx /= TILESIZE; 361 miny /= TILESIZE; 362 maxx /= TILESIZE; 363 maxy /= TILESIZE; 364 365 /* Convert to tile coordinates: 366 */ 367 if (miny == maxy && minx == maxx) 368 { 369 /* Triangle is contained in a single tile: 370 */ 371 bin_command( &setup->tile[minx][miny], lp_rast_triangle, 372 lp_rast_arg_triangle(tri) ); 373 } 374 else 375 { 376 const int step = TILESIZE; 377 378 float ei1 = tri->ei1 * step; 379 float ei2 = tri->ei2 * step; 380 float ei3 = tri->ei3 * step; 381 382 float eo1 = tri->eo1 * step; 383 float eo2 = tri->eo2 * step; 384 float eo3 = tri->eo3 * step; 385 386 float xstep1 = -step * tri->dy12; 387 float xstep2 = -step * tri->dy23; 388 float xstep3 = -step * tri->dy31; 389 390 float ystep1 = step * tri->dx12; 391 float ystep2 = step * tri->dx23; 392 float ystep3 = step * tri->dx31; 393 int x, y; 394 395 396 /* Subdivide space into NxM blocks, where each block is square and 397 * power-of-four in dimension. 398 * 399 * Trivially accept or reject blocks, else jump to per-pixel 400 * examination above. 401 */ 402 for (y = miny; y <= maxy; y++) 403 { 404 float cx1 = c1; 405 float cx2 = c2; 406 float cx3 = c3; 407 408 for (x = minx; x <= maxx; x++) 409 { 410 if (cx1 + eo1 < 0 || 411 cx2 + eo2 < 0 || 412 cx3 + eo3 < 0) 413 { 414 /* do nothing */ 415 } 416 else if (cx1 + ei1 > 0 && 417 cx2 + ei2 > 0 && 418 cx3 + ei3 > 0) 419 { 420 /* shade whole tile */ 421 bin_command( &setup->tile[x][y], lp_rast_shade_tile, 422 lp_rast_arg_inputs(&tri->inputs) ); 423 } 424 else 425 { 426#if 1 427 bin_command( &setup->tile[x][y], lp_rast_shade_tile, 428 lp_rast_arg_inputs(&tri->inputs) ); 429#else 430 /* shade partial tile */ 431 bin_command( &setup->tile[x][y], 432 lp_rast_triangle, 433 lp_rast_arg_triangle(tri) ); 434#endif 435 } 436 437 /* Iterate cx values across the region: 438 */ 439 cx1 += xstep1; 440 cx2 += xstep2; 441 cx3 += xstep3; 442 } 443 444 /* Iterate c values down the region: 445 */ 446 c1 += ystep1; 447 c2 += ystep2; 448 c3 += ystep3; 449 } 450 } 451} 452 453static void triangle_cw( struct setup_context *setup, 454 const float (*v0)[4], 455 const float (*v1)[4], 456 const float (*v2)[4] ) 457{ 458 do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); 459} 460 461static void triangle_ccw( struct setup_context *setup, 462 const float (*v0)[4], 463 const float (*v1)[4], 464 const float (*v2)[4] ) 465{ 466 do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); 467} 468 469static void triangle_both( struct setup_context *setup, 470 const float (*v0)[4], 471 const float (*v1)[4], 472 const float (*v2)[4] ) 473{ 474 /* edge vectors e = v0 - v2, f = v1 - v2 */ 475 const float ex = v0[0][0] - v2[0][0]; 476 const float ey = v0[0][1] - v2[0][1]; 477 const float fx = v1[0][0] - v2[0][0]; 478 const float fy = v1[0][1] - v2[0][1]; 479 480 /* det = cross(e,f).z */ 481 if (ex * fy - ey * fx < 0) 482 triangle_ccw( setup, v0, v1, v2 ); 483 else 484 triangle_cw( setup, v0, v1, v2 ); 485} 486 487static void triangle_nop( struct setup_context *setup, 488 const float (*v0)[4], 489 const float (*v1)[4], 490 const float (*v2)[4] ) 491{ 492} 493 494 495void 496lp_setup_choose_triangle( struct setup_context *setup ) 497{ 498 switch (setup->cullmode) { 499 case PIPE_WINDING_NONE: 500 setup->triangle = triangle_both; 501 break; 502 case PIPE_WINDING_CCW: 503 setup->triangle = triangle_cw; 504 break; 505 case PIPE_WINDING_CW: 506 setup->triangle = triangle_ccw; 507 break; 508 default: 509 setup->triangle = triangle_nop; 510 break; 511 } 512} 513 514 515