lp_setup_tri.c revision 415b271b5100d64579690111bc8eb549866865a7
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * Binning code for triangles 30 */ 31 32#include "lp_setup_context.h" 33#include "lp_rast.h" 34#include "util/u_math.h" 35#include "util/u_memory.h" 36 37#define NUM_CHANNELS 4 38 39/** 40 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 41 */ 42static void constant_coef( struct lp_rast_triangle *tri, 43 unsigned slot, 44 const float value, 45 unsigned i ) 46{ 47 tri->inputs.a0[slot][i] = value; 48 tri->inputs.dadx[slot][i] = 0; 49 tri->inputs.dady[slot][i] = 0; 50} 51 52/** 53 * Compute a0, dadx and dady for a linearly interpolated coefficient, 54 * for a triangle. 55 */ 56static void linear_coef( struct lp_rast_triangle *tri, 57 unsigned slot, 58 const float (*v1)[4], 59 const float (*v2)[4], 60 const float (*v3)[4], 61 unsigned vert_attr, 62 unsigned i) 63{ 64 float a1 = v1[vert_attr][i]; 65 float a2 = v2[vert_attr][i]; 66 float a3 = v3[vert_attr][i]; 67 68 float da12 = a1 - a2; 69 float da31 = a3 - a1; 70 float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; 71 float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; 72 73 tri->inputs.dadx[slot][i] = dadx; 74 tri->inputs.dady[slot][i] = dady; 75 76 /* calculate a0 as the value which would be sampled for the 77 * fragment at (0,0), taking into account that we want to sample at 78 * pixel centers, in other words (0.5, 0.5). 79 * 80 * this is neat but unfortunately not a good way to do things for 81 * triangles with very large values of dadx or dady as it will 82 * result in the subtraction and re-addition from a0 of a very 83 * large number, which means we'll end up loosing a lot of the 84 * fractional bits and precision from a0. the way to fix this is 85 * to define a0 as the sample at a pixel center somewhere near vmin 86 * instead - i'll switch to this later. 87 */ 88 tri->inputs.a0[slot][i] = (v1[vert_attr][i] - 89 (dadx * (v1[0][0] - 0.5f) + 90 dady * (v1[0][1] - 0.5f))); 91} 92 93 94/** 95 * Compute a0, dadx and dady for a perspective-corrected interpolant, 96 * for a triangle. 97 * We basically multiply the vertex value by 1/w before computing 98 * the plane coefficients (a0, dadx, dady). 99 * Later, when we compute the value at a particular fragment position we'll 100 * divide the interpolated value by the interpolated W at that fragment. 101 */ 102static void perspective_coef( struct lp_rast_triangle *tri, 103 unsigned slot, 104 const float (*v1)[4], 105 const float (*v2)[4], 106 const float (*v3)[4], 107 unsigned vert_attr, 108 unsigned i) 109{ 110 /* premultiply by 1/w (v[0][3] is always 1/w): 111 */ 112 float a1 = v1[vert_attr][i] * v1[0][3]; 113 float a2 = v2[vert_attr][i] * v2[0][3]; 114 float a3 = v3[vert_attr][i] * v3[0][3]; 115 float da12 = a1 - a2; 116 float da31 = a3 - a1; 117 float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; 118 float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; 119 120 121 tri->inputs.dadx[slot][i] = dadx; 122 tri->inputs.dady[slot][i] = dady; 123 tri->inputs.a0[slot][i] = (a1 - 124 (dadx * (v1[0][0] - 0.5f) + 125 dady * (v1[0][1] - 0.5f))); 126} 127 128 129/** 130 * Special coefficient setup for gl_FragCoord. 131 * X and Y are trivial, though Y has to be inverted for OpenGL. 132 * Z and W are copied from position_coef which should have already been computed. 133 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. 134 */ 135static void 136setup_fragcoord_coef(struct lp_rast_triangle *tri, 137 unsigned slot, 138 const float (*v1)[4], 139 const float (*v2)[4], 140 const float (*v3)[4]) 141{ 142 /*X*/ 143 tri->inputs.a0[slot][0] = 0.0; 144 tri->inputs.dadx[slot][0] = 1.0; 145 tri->inputs.dady[slot][0] = 0.0; 146 /*Y*/ 147 tri->inputs.a0[slot][1] = 0.0; 148 tri->inputs.dadx[slot][1] = 0.0; 149 tri->inputs.dady[slot][1] = 1.0; 150 /*Z*/ 151 linear_coef(tri, slot, v1, v2, v3, 0, 2); 152 /*W*/ 153 linear_coef(tri, slot, v1, v2, v3, 0, 3); 154} 155 156 157static void setup_facing_coef( struct lp_rast_triangle *tri, 158 unsigned slot, 159 boolean frontface ) 160{ 161 constant_coef( tri, slot, 1.0f - frontface, 0 ); 162 constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ 163 constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ 164 constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ 165} 166 167 168/** 169 * Compute the tri->coef[] array dadx, dady, a0 values. 170 */ 171static void setup_tri_coefficients( struct setup_context *setup, 172 struct lp_rast_triangle *tri, 173 const float (*v1)[4], 174 const float (*v2)[4], 175 const float (*v3)[4], 176 boolean frontface ) 177{ 178 unsigned slot; 179 180 /* The internal position input is in slot zero: 181 */ 182 setup_fragcoord_coef(tri, 0, v1, v2, v3); 183 184 /* setup interpolation for all the remaining attrbutes: 185 */ 186 for (slot = 0; slot < setup->fs.nr_inputs; slot++) { 187 unsigned vert_attr = setup->fs.input[slot].src_index; 188 unsigned i; 189 190 switch (setup->fs.input[slot].interp) { 191 case LP_INTERP_CONSTANT: 192 for (i = 0; i < NUM_CHANNELS; i++) 193 constant_coef(tri, slot+1, v3[vert_attr][i], i); 194 break; 195 196 case LP_INTERP_LINEAR: 197 for (i = 0; i < NUM_CHANNELS; i++) 198 linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i); 199 break; 200 201 case LP_INTERP_PERSPECTIVE: 202 for (i = 0; i < NUM_CHANNELS; i++) 203 perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i); 204 break; 205 206 case LP_INTERP_POSITION: 207 /* XXX: fix me - duplicates the values in slot zero. 208 */ 209 setup_fragcoord_coef(tri, slot+1, v1, v2, v3); 210 break; 211 212 case LP_INTERP_FACING: 213 setup_facing_coef(tri, slot+1, frontface); 214 break; 215 216 default: 217 assert(0); 218 } 219 } 220} 221 222 223 224/* XXX: do this by add/subtracting a large floating point number: 225 */ 226static inline float subpixel_snap( float a ) 227{ 228 int i = a * 16; 229 return (float)i * (1.0/16); 230} 231 232 233 234 235 236/* to avoid having to allocate power-of-four, square render targets, 237 * end up having a specialized version of the above that runs only at 238 * the topmost level. 239 * 240 * at the topmost level there may be an arbitary number of steps on 241 * either dimension, so this loop needs to be either separately 242 * code-generated and unrolled for each render target size, or kept as 243 * generic looping code: 244 */ 245 246#define MIN3(a,b,c) MIN2(MIN2(a,b),c) 247#define MAX3(a,b,c) MAX2(MAX2(a,b),c) 248 249static void 250do_triangle_ccw(struct setup_context *setup, 251 const float (*v1)[4], 252 const float (*v2)[4], 253 const float (*v3)[4], 254 boolean frontfacing ) 255{ 256 const int rt_width = setup->fb.width; 257 const int rt_height = setup->fb.height; 258 259 const float y1 = subpixel_snap(v1[0][1]); 260 const float y2 = subpixel_snap(v2[0][1]); 261 const float y3 = subpixel_snap(v3[0][1]); 262 263 const float x1 = subpixel_snap(v1[0][0]); 264 const float x2 = subpixel_snap(v2[0][0]); 265 const float x3 = subpixel_snap(v3[0][0]); 266 267 struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri ); 268 float area; 269 float c1, c2, c3; 270 int minx, maxx, miny, maxy; 271 272 tri->dx12 = x1 - x2; 273 tri->dx23 = x2 - x3; 274 tri->dx31 = x3 - x1; 275 276 tri->dy12 = y1 - y2; 277 tri->dy23 = y2 - y3; 278 tri->dy31 = y3 - y1; 279 280 area = (tri->dx12 * tri->dy31 - 281 tri->dx31 * tri->dy12); 282 283 /* Cull non-ccw and zero-sized triangles. 284 */ 285 if (area <= 0 || util_is_inf_or_nan(area)) 286 return; 287 288 // Bounding rectangle 289 minx = util_iround(MIN3(x1, x2, x3) - .5); 290 maxx = util_iround(MAX3(x1, x2, x3) + .5); 291 miny = util_iround(MIN3(y1, y2, y3) - .5); 292 maxy = util_iround(MAX3(y1, y2, y3) + .5); 293 294 /* Clamp to framebuffer (or tile) dimensions: 295 */ 296 miny = MAX2(0, miny); 297 minx = MAX2(0, minx); 298 maxy = MIN2(rt_height, maxy); 299 maxx = MIN2(rt_width, maxx); 300 301 if (miny == maxy || minx == maxx) 302 return; 303 304 /* The only divide in this code. Is it really needed? 305 */ 306 tri->oneoverarea = 1.0f / area; 307 308 /* Setup parameter interpolants: 309 */ 310 setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing ); 311 312 /* half-edge constants, will be interated over the whole 313 * rendertarget. 314 */ 315 c1 = tri->dy12 * x1 - tri->dx12 * y1; 316 c2 = tri->dy23 * x2 - tri->dx23 * y2; 317 c3 = tri->dy31 * x3 - tri->dx31 * y3; 318 319 /* correct for top-left fill convention: 320 */ 321 if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) c1++; 322 if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) c2++; 323 if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) c3++; 324 325 /* find trivial reject offsets for each edge for a single-pixel 326 * sized block. These will be scaled up at each recursive level to 327 * match the active blocksize. Scaling in this way works best if 328 * the blocks are square. 329 */ 330 tri->eo1 = 0; 331 if (tri->dy12 < 0) tri->eo1 -= tri->dy12; 332 if (tri->dx12 > 0) tri->eo1 += tri->dx12; 333 334 tri->eo2 = 0; 335 if (tri->dy23 < 0) tri->eo2 -= tri->dy23; 336 if (tri->dx23 > 0) tri->eo2 += tri->dx23; 337 338 tri->eo3 = 0; 339 if (tri->dy31 < 0) tri->eo3 -= tri->dy31; 340 if (tri->dx31 > 0) tri->eo3 += tri->dx31; 341 342 /* Calculate trivial accept offsets from the above. 343 */ 344 tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; 345 tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; 346 tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; 347 348 minx &= ~(TILESIZE-1); /* aligned blocks */ 349 miny &= ~(TILESIZE-1); /* aligned blocks */ 350 351 c1 += tri->dx12 * miny - tri->dy12 * minx; 352 c2 += tri->dx23 * miny - tri->dy23 * minx; 353 c3 += tri->dx31 * miny - tri->dy31 * minx; 354 355 /* Convert to tile coordinates: 356 */ 357 minx /= TILESIZE; 358 maxx /= TILESIZE; 359 miny /= TILESIZE; 360 maxy /= TILESIZE; 361 362 if (miny == maxy && minx == maxx) 363 { 364 /* Triangle is contained in a single tile: 365 */ 366 bin_command( &setup->tile[minx][miny], lp_rast_triangle, tri ); 367 } 368 else 369 { 370 const int step = TILESIZE; 371 372 float ei1 = tri->ei1 * step; 373 float ei2 = tri->ei2 * step; 374 float ei3 = tri->ei3 * step; 375 376 float eo1 = tri->eo1 * step; 377 float eo2 = tri->eo2 * step; 378 float eo3 = tri->eo3 * step; 379 380 float xstep1 = -step * tri->dy12; 381 float xstep2 = -step * tri->dy23; 382 float xstep3 = -step * tri->dy31; 383 384 float ystep1 = step * tri->dx12; 385 float ystep2 = step * tri->dx23; 386 float ystep3 = step * tri->dx31; 387 int x, y; 388 389 390 /* Subdivide space into NxM blocks, where each block is square and 391 * power-of-four in dimension. 392 * 393 * Trivially accept or reject blocks, else jump to per-pixel 394 * examination above. 395 */ 396 for (y = miny; y < maxy; y++) 397 { 398 float cx1 = c1; 399 float cx2 = c2; 400 float cx3 = c3; 401 402 for (x = minx; x < maxx; x++) 403 { 404 if (cx1 + eo1 < 0 || 405 cx2 + eo2 < 0 || 406 cx3 + eo3 < 0) 407 { 408 /* do nothing */ 409 } 410 else if (cx1 + ei1 > 0 && 411 cx2 + ei2 > 0 && 412 cx3 + ei3 > 0) 413 { 414 /* shade whole tile */ 415 bin_command( &setup->tile[x][y], lp_rast_shade_tile, &tri->inputs ); 416 } 417 else 418 { 419 /* shade partial tile */ 420 bin_command( &setup->tile[x][y], lp_rast_triangle, tri ); 421 } 422 423 /* Iterate cx values across the region: 424 */ 425 cx1 += xstep1; 426 cx2 += xstep2; 427 cx3 += xstep3; 428 } 429 430 /* Iterate c values down the region: 431 */ 432 c1 += ystep1; 433 c2 += ystep2; 434 c3 += ystep3; 435 } 436 } 437} 438 439static void triangle_cw( struct setup_context *setup, 440 const float (*v0)[4], 441 const float (*v1)[4], 442 const float (*v2)[4] ) 443{ 444 do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); 445} 446 447static void triangle_ccw( struct setup_context *setup, 448 const float (*v0)[4], 449 const float (*v1)[4], 450 const float (*v2)[4] ) 451{ 452 do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); 453} 454 455static void triangle_both( struct setup_context *setup, 456 const float (*v0)[4], 457 const float (*v1)[4], 458 const float (*v2)[4] ) 459{ 460 /* edge vectors e = v0 - v2, f = v1 - v2 */ 461 const float ex = v0[0][0] - v2[0][0]; 462 const float ey = v0[0][1] - v2[0][1]; 463 const float fx = v1[0][0] - v2[0][0]; 464 const float fy = v1[0][1] - v2[0][1]; 465 466 /* det = cross(e,f).z */ 467 if (ex * fy - ey * fx < 0) 468 triangle_ccw( setup, v0, v1, v2 ); 469 else 470 triangle_cw( setup, v0, v1, v2 ); 471} 472 473static void triangle_nop( struct setup_context *setup, 474 const float (*v0)[4], 475 const float (*v1)[4], 476 const float (*v2)[4] ) 477{ 478} 479 480 481void 482lp_setup_choose_triangle( struct setup_context *setup ) 483{ 484 switch (setup->cull_mode) { 485 case PIPE_WINDING_NONE: 486 setup->triangle = triangle_both; 487 break; 488 case PIPE_WINDING_CCW: 489 setup->triangle = triangle_cw; 490 break; 491 case PIPE_WINDING_CW: 492 setup->triangle = triangle_ccw; 493 break; 494 default: 495 setup->triangle = triangle_nop; 496 break; 497 } 498} 499 500 501