lp_setup_line.c revision 5286dd701640976ffc328e8e85fb3830746851a1
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * Binning code for lines 30 */ 31 32#include "util/u_math.h" 33#include "util/u_memory.h" 34#include "lp_perf.h" 35#include "lp_setup_context.h" 36#include "lp_rast.h" 37#include "lp_state_fs.h" 38 39#define NUM_CHANNELS 4 40 41 42static const int step_scissor_minx[16] = { 43 0, 1, 0, 1, 44 2, 3, 2, 3, 45 0, 1, 0, 1, 46 2, 3, 2, 3 47}; 48 49static const int step_scissor_maxx[16] = { 50 0, -1, 0, -1, 51 -2, -3, -2, -3, 52 0, -1, 0, -1, 53 -2, -3, -2, -3 54}; 55 56static const int step_scissor_miny[16] = { 57 0, 0, 1, 1, 58 0, 0, 1, 1, 59 2, 2, 3, 3, 60 2, 2, 3, 3 61}; 62 63static const int step_scissor_maxy[16] = { 64 0, 0, -1, -1, 65 0, 0, -1, -1, 66 -2, -2, -3, -3, 67 -2, -2, -3, -3 68}; 69 70 71 72/** 73 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 74 */ 75static void constant_coef( struct lp_setup_context *setup, 76 struct lp_rast_triangle *tri, 77 unsigned slot, 78 const float value, 79 unsigned i ) 80{ 81 tri->inputs.a0[slot][i] = value; 82 tri->inputs.dadx[slot][i] = 0.0f; 83 tri->inputs.dady[slot][i] = 0.0f; 84} 85 86 87/** 88 * Compute a0, dadx and dady for a linearly interpolated coefficient, 89 * for a triangle. 90 */ 91static void linear_coef( struct lp_setup_context *setup, 92 struct lp_rast_triangle *tri, 93 float oneoverarea, 94 unsigned slot, 95 const float (*v1)[4], 96 const float (*v2)[4], 97 unsigned vert_attr, 98 unsigned i) 99{ 100 float a1 = v1[vert_attr][i]; 101 float a2 = v2[vert_attr][i]; 102 103 float da21 = a1 - a2; 104 float dadx = da21 * tri->dx * oneoverarea; 105 float dady = da21 * tri->dy * oneoverarea; 106 107 tri->inputs.dadx[slot][i] = dadx; 108 tri->inputs.dady[slot][i] = dady; 109 110 tri->inputs.a0[slot][i] = (a1 - 111 (dadx * (v1[0][0] - setup->pixel_offset) + 112 dady * (v1[0][1] - setup->pixel_offset))); 113} 114 115 116/** 117 * Compute a0, dadx and dady for a perspective-corrected interpolant, 118 * for a triangle. 119 * We basically multiply the vertex value by 1/w before computing 120 * the plane coefficients (a0, dadx, dady). 121 * Later, when we compute the value at a particular fragment position we'll 122 * divide the interpolated value by the interpolated W at that fragment. 123 */ 124static void perspective_coef( struct lp_setup_context *setup, 125 struct lp_rast_triangle *tri, 126 float oneoverarea, 127 unsigned slot, 128 const float (*v1)[4], 129 const float (*v2)[4], 130 unsigned vert_attr, 131 unsigned i) 132{ 133 /* premultiply by 1/w (v[0][3] is always 1/w): 134 */ 135 float a1 = v1[vert_attr][i] * v1[0][3]; 136 float a2 = v2[vert_attr][i] * v2[0][3]; 137 138 float da21 = a1 - a2; 139 float dadx = da21 * tri->dx * oneoverarea; 140 float dady = da21 * tri->dy * oneoverarea; 141 142 tri->inputs.dadx[slot][i] = dadx; 143 tri->inputs.dady[slot][i] = dady; 144 145 tri->inputs.a0[slot][i] = (a1 - 146 (dadx * (v1[0][0] - setup->pixel_offset) + 147 dady * (v1[0][1] - setup->pixel_offset))); 148} 149 150/** 151 * Compute the tri->coef[] array dadx, dady, a0 values. 152 */ 153static void setup_line_coefficients( struct lp_setup_context *setup, 154 struct lp_rast_triangle *tri, 155 float oneoverarea, 156 const float (*v1)[4], 157 const float (*v2)[4]) 158{ 159 unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; 160 unsigned slot; 161 162 /* setup interpolation for all the remaining attributes: 163 */ 164 for (slot = 0; slot < setup->fs.nr_inputs; slot++) { 165 unsigned vert_attr = setup->fs.input[slot].src_index; 166 unsigned usage_mask = setup->fs.input[slot].usage_mask; 167 unsigned i; 168 169 switch (setup->fs.input[slot].interp) { 170 case LP_INTERP_CONSTANT: 171 if (setup->flatshade_first) { 172 for (i = 0; i < NUM_CHANNELS; i++) 173 if (usage_mask & (1 << i)) 174 constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); 175 } 176 else { 177 for (i = 0; i < NUM_CHANNELS; i++) 178 if (usage_mask & (1 << i)) 179 constant_coef(setup, tri, slot+1, v2[vert_attr][i], i); 180 } 181 break; 182 183 case LP_INTERP_LINEAR: 184 for (i = 0; i < NUM_CHANNELS; i++) 185 if (usage_mask & (1 << i)) 186 linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i); 187 break; 188 189 case LP_INTERP_PERSPECTIVE: 190 for (i = 0; i < NUM_CHANNELS; i++) 191 if (usage_mask & (1 << i)) 192 perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i); 193 fragcoord_usage_mask |= TGSI_WRITEMASK_W; 194 break; 195 196 case LP_INTERP_POSITION: 197 /* 198 * The generated pixel interpolators will pick up the coeffs from 199 * slot 0, so all need to ensure that the usage mask is covers all 200 * usages. 201 */ 202 fragcoord_usage_mask |= usage_mask; 203 break; 204 205 default: 206 assert(0); 207 } 208 } 209 210 /* The internal position input is in slot zero: 211 */ 212 lp_setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v2, 213 fragcoord_usage_mask); 214} 215 216 217 218static INLINE int subpixel_snap( float a ) 219{ 220 return util_iround(FIXED_ONE * a); 221} 222 223 224/** 225 * Print line vertex attribs (for debug). 226 */ 227static void 228print_line(struct lp_setup_context *setup, 229 const float (*v1)[4], 230 const float (*v2)[4]) 231{ 232 uint i; 233 234 debug_printf("llvmpipe line\n"); 235 for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { 236 debug_printf(" v1[%d]: %f %f %f %f\n", i, 237 v1[i][0], v1[i][1], v1[i][2], v1[i][3]); 238 } 239 for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { 240 debug_printf(" v2[%d]: %f %f %f %f\n", i, 241 v2[i][0], v2[i][1], v2[i][2], v2[i][3]); 242 } 243} 244 245 246static void 247lp_setup_line( struct lp_setup_context *setup, 248 const float (*v1)[4], 249 const float (*v2)[4]) 250{ 251 struct lp_scene *scene = lp_setup_get_current_scene(setup); 252 struct lp_rast_triangle *line; 253 float oneoverarea; 254 float half_width = setup->line_width / 2; 255 int minx, maxx, miny, maxy; 256 int ix0, ix1, iy0, iy1; 257 unsigned tri_bytes; 258 int x[4]; 259 int y[4]; 260 int i; 261 int nr_planes = 4; 262 boolean opaque; 263 264 if (0) 265 print_line(setup, v1, v2); 266 267 if (setup->scissor_test) { 268 nr_planes = 8; 269 } 270 else { 271 nr_planes = 4; 272 } 273 274 line = lp_setup_alloc_triangle(scene, 275 setup->fs.nr_inputs, 276 nr_planes, 277 &tri_bytes); 278 if (!line) 279 return; 280 281#ifndef DEBUG 282 line->v[0][0] = v1[0][0]; 283 line->v[1][0] = v2[0][0]; 284 line->v[0][1] = v1[0][1]; 285 line->v[1][1] = v2[0][1]; 286#endif 287 288 /* pre-calculation(based on given vertices) to determine if line is 289 * more horizontal or more vertical 290 */ 291 line->dx = v1[0][0] - v2[0][0]; 292 line->dy = v1[0][1] - v2[0][1]; 293 294 /* x-major line */ 295 if (fabsf(line->dx) >= fabsf(line->dy)) { 296 if (line->dx < 0) { 297 /* if v2 is to the right of v1, swap pointers */ 298 const float (*temp)[4] = v1; 299 v1 = v2; 300 v2 = temp; 301 line->dx = -line->dx; 302 line->dy = -line->dy; 303 } 304 305 /* x/y positions in fixed point */ 306 x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset); 307 x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset); 308 x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); 309 x[3] = subpixel_snap(v1[0][0] - setup->pixel_offset); 310 311 y[0] = subpixel_snap(v1[0][1] - half_width - setup->pixel_offset); 312 y[1] = subpixel_snap(v2[0][1] - half_width - setup->pixel_offset); 313 y[2] = subpixel_snap(v2[0][1] + half_width - setup->pixel_offset); 314 y[3] = subpixel_snap(v1[0][1] + half_width - setup->pixel_offset); 315 } 316 else{ 317 /* y-major line */ 318 if (line->dy > 0) { 319 /* if v2 is on top of v1, swap pointers */ 320 const float (*temp)[4] = v1; 321 v1 = v2; 322 v2 = temp; 323 line->dx = -line->dx; 324 line->dy = -line->dy; 325 } 326 327 x[0] = subpixel_snap(v1[0][0] - half_width - setup->pixel_offset); 328 x[1] = subpixel_snap(v2[0][0] - half_width - setup->pixel_offset); 329 x[2] = subpixel_snap(v2[0][0] + half_width - setup->pixel_offset); 330 x[3] = subpixel_snap(v1[0][0] + half_width - setup->pixel_offset); 331 332 y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset); 333 y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset); 334 y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); 335 y[3] = subpixel_snap(v1[0][1] - setup->pixel_offset); 336 } 337 338 /* calculate the deltas */ 339 line->plane[0].dcdy = x[0] - x[1]; 340 line->plane[1].dcdy = x[1] - x[2]; 341 line->plane[2].dcdy = x[2] - x[3]; 342 line->plane[3].dcdy = x[3] - x[0]; 343 344 line->plane[0].dcdx = y[0] - y[1]; 345 line->plane[1].dcdx = y[1] - y[2]; 346 line->plane[2].dcdx = y[2] - y[3]; 347 line->plane[3].dcdx = y[3] - y[0]; 348 349 350 LP_COUNT(nr_tris); 351 352 353 /* Bounding rectangle (in pixels) */ 354 { 355 /* Yes this is necessary to accurately calculate bounding boxes 356 * with the two fill-conventions we support. GL (normally) ends 357 * up needing a bottom-left fill convention, which requires 358 * slightly different rounding. 359 */ 360 int adj = (setup->pixel_offset != 0) ? 1 : 0; 361 362 minx = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; 363 maxx = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; 364 miny = (MIN4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; 365 maxy = (MAX4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; 366 } 367 368 if (setup->scissor_test) { 369 minx = MAX2(minx, setup->scissor.current.minx); 370 maxx = MIN2(maxx, setup->scissor.current.maxx); 371 miny = MAX2(miny, setup->scissor.current.miny); 372 maxy = MIN2(maxy, setup->scissor.current.maxy); 373 } 374 else { 375 minx = MAX2(minx, 0); 376 miny = MAX2(miny, 0); 377 maxx = MIN2(maxx, scene->fb.width); 378 maxy = MIN2(maxy, scene->fb.height); 379 } 380 381 382 if (miny >= maxy || minx >= maxx) { 383 lp_scene_putback_data( scene, tri_bytes ); 384 return; 385 } 386 387 oneoverarea = 1.0f / (line->dx * line->dx + line->dy * line->dy); 388 389 /* Setup parameter interpolants: 390 */ 391 setup_line_coefficients( setup, line, oneoverarea, v1, v2); 392 393 for (i = 0; i < 4; i++) { 394 struct lp_rast_plane *plane = &line->plane[i]; 395 396 /* half-edge constants, will be interated over the whole render 397 * target. 398 */ 399 plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; 400 401 402 /* correct for top-left vs. bottom-left fill convention. 403 * 404 * note that we're overloading gl_rasterization_rules to mean 405 * both (0.5,0.5) pixel centers *and* bottom-left filling 406 * convention. 407 * 408 * GL actually has a top-left filling convention, but GL's 409 * notion of "top" differs from gallium's... 410 * 411 * Also, sometimes (in FBO cases) GL will render upside down 412 * to its usual method, in which case it will probably want 413 * to use the opposite, top-left convention. 414 */ 415 if (plane->dcdx < 0) { 416 /* both fill conventions want this - adjust for left edges */ 417 plane->c++; 418 } 419 else if (plane->dcdx == 0) { 420 if (setup->pixel_offset == 0) { 421 /* correct for top-left fill convention: 422 */ 423 if (plane->dcdy > 0) plane->c++; 424 } 425 else { 426 /* correct for bottom-left fill convention: 427 */ 428 if (plane->dcdy < 0) plane->c++; 429 } 430 } 431 432 plane->dcdx *= FIXED_ONE; 433 plane->dcdy *= FIXED_ONE; 434 435 /* find trivial reject offsets for each edge for a single-pixel 436 * sized block. These will be scaled up at each recursive level to 437 * match the active blocksize. Scaling in this way works best if 438 * the blocks are square. 439 */ 440 plane->eo = 0; 441 if (plane->dcdx < 0) plane->eo -= plane->dcdx; 442 if (plane->dcdy > 0) plane->eo += plane->dcdy; 443 444 /* Calculate trivial accept offsets from the above. 445 */ 446 plane->ei = plane->dcdy - plane->dcdx - plane->eo; 447 448 plane->step = line->step[i]; 449 450 /* Fill in the inputs.step[][] arrays. 451 * We've manually unrolled some loops here. 452 */ 453#define SETUP_STEP(j, x, y) \ 454 line->step[i][j] = y * plane->dcdy - x * plane->dcdx 455 456 SETUP_STEP(0, 0, 0); 457 SETUP_STEP(1, 1, 0); 458 SETUP_STEP(2, 0, 1); 459 SETUP_STEP(3, 1, 1); 460 461 SETUP_STEP(4, 2, 0); 462 SETUP_STEP(5, 3, 0); 463 SETUP_STEP(6, 2, 1); 464 SETUP_STEP(7, 3, 1); 465 466 SETUP_STEP(8, 0, 2); 467 SETUP_STEP(9, 1, 2); 468 SETUP_STEP(10, 0, 3); 469 SETUP_STEP(11, 1, 3); 470 471 SETUP_STEP(12, 2, 2); 472 SETUP_STEP(13, 3, 2); 473 SETUP_STEP(14, 2, 3); 474 SETUP_STEP(15, 3, 3); 475#undef STEP 476 } 477 478 479 /* 480 * When rasterizing scissored tris, use the intersection of the 481 * triangle bounding box and the scissor rect to generate the 482 * scissor planes. 483 * 484 * This permits us to cut off the triangle "tails" that are present 485 * in the intermediate recursive levels caused when two of the 486 * triangles edges don't diverge quickly enough to trivially reject 487 * exterior blocks from the triangle. 488 * 489 * It's not really clear if it's worth worrying about these tails, 490 * but since we generate the planes for each scissored tri, it's 491 * free to trim them in this case. 492 * 493 * Note that otherwise, the scissor planes only vary in 'C' value, 494 * and even then only on state-changes. Could alternatively store 495 * these planes elsewhere. 496 */ 497 if (nr_planes == 8) { 498 line->plane[4].step = step_scissor_maxx; 499 line->plane[4].dcdx = 1; 500 line->plane[4].dcdy = 0; 501 line->plane[4].c = maxx; 502 line->plane[4].ei = -1; 503 line->plane[4].eo = 0; 504 505 line->plane[5].step = step_scissor_miny; 506 line->plane[5].dcdx = 0; 507 line->plane[5].dcdy = 1; 508 line->plane[5].c = 1-miny; 509 line->plane[5].ei = 0; 510 line->plane[5].eo = 1; 511 512 line->plane[6].step = step_scissor_maxy; 513 line->plane[6].dcdx = 0; 514 line->plane[6].dcdy = -1; 515 line->plane[6].c = maxy; 516 line->plane[6].ei = -1; 517 line->plane[6].eo = 0; 518 519 line->plane[7].step = step_scissor_minx; 520 line->plane[7].dcdx = -1; 521 line->plane[7].dcdy = 0; 522 line->plane[7].c = 1-minx; 523 line->plane[7].ei = 0; 524 line->plane[7].eo = 1; 525 } 526 527 528 /* 529 * All fields of 'tri' are now set. The remaining code here is 530 * concerned with binning. 531 */ 532 533 /* Convert to tile coordinates, and inclusive ranges: 534 */ 535 ix0 = minx / TILE_SIZE; 536 iy0 = miny / TILE_SIZE; 537 ix1 = (maxx-1) / TILE_SIZE; 538 iy1 = (maxy-1) / TILE_SIZE; 539 540 /* 541 * Clamp to framebuffer size 542 */ 543 assert(ix0 == MAX2(ix0, 0)); 544 assert(iy0 == MAX2(iy0, 0)); 545 assert(ix1 == MIN2(ix1, scene->tiles_x - 1)); 546 assert(iy1 == MIN2(iy1, scene->tiles_y - 1)); 547 548 /* Determine which tile(s) intersect the triangle's bounding box 549 */ 550 if (iy0 == iy1 && ix0 == ix1) 551 { 552 /* Triangle is contained in a single tile: 553 */ 554 lp_scene_bin_command( scene, ix0, iy0, 555 lp_rast_tri_tab[nr_planes], 556 lp_rast_arg_triangle(line, (1<<nr_planes)-1) ); 557 } 558 else 559 { 560 int c[8]; 561 int ei[8]; 562 int eo[8]; 563 int xstep[8]; 564 int ystep[8]; 565 int x, y; 566 int is_blit = -1; /* undetermined */ 567 568 for (i = 0; i < nr_planes; i++) { 569 c[i] = (line->plane[i].c + 570 line->plane[i].dcdy * iy0 * TILE_SIZE - 571 line->plane[i].dcdx * ix0 * TILE_SIZE); 572 573 ei[i] = line->plane[i].ei << TILE_ORDER; 574 eo[i] = line->plane[i].eo << TILE_ORDER; 575 xstep[i] = -(line->plane[i].dcdx << TILE_ORDER); 576 ystep[i] = line->plane[i].dcdy << TILE_ORDER; 577 } 578 579 580 581 /* Test tile-sized blocks against the triangle. 582 * Discard blocks fully outside the tri. If the block is fully 583 * contained inside the tri, bin an lp_rast_shade_tile command. 584 * Else, bin a lp_rast_triangle command. 585 */ 586 for (y = iy0; y <= iy1; y++) 587 { 588 boolean in = FALSE; /* are we inside the triangle? */ 589 int cx[8]; 590 591 for (i = 0; i < nr_planes; i++) 592 cx[i] = c[i]; 593 594 for (x = ix0; x <= ix1; x++) 595 { 596 int out = 0; 597 int partial = 0; 598 599 for (i = 0; i < nr_planes; i++) { 600 int planeout = cx[i] + eo[i]; 601 int planepartial = cx[i] + ei[i] - 1; 602 out |= (planeout >> 31); 603 partial |= (planepartial >> 31) & (1<<i); 604 } 605 if (out) { 606 /* do nothing */ 607 if (in) 608 break; /* exiting triangle, all done with this row */ 609 LP_COUNT(nr_empty_64); 610 } 611 else if (partial) { 612 /* Not trivially accepted by at least one plane - 613 * rasterize/shade partial tile 614 */ 615 int count = util_bitcount(partial); 616 in = TRUE; 617 lp_scene_bin_command( scene, x, y, 618 lp_rast_tri_tab[count], 619 lp_rast_arg_triangle(line, partial) ); 620 621 LP_COUNT(nr_partially_covered_64); 622 } 623 else { 624 /* triangle covers the whole tile- shade whole tile */ 625 LP_COUNT(nr_fully_covered_64); 626 in = TRUE; 627 /* leverages on existing code in lp_setup_tri.c */ 628 do_triangle_ccw_whole_tile(setup, scene, line, x, y, 629 opaque, &is_blit); 630 } 631 632 /* Iterate cx values across the region: 633 */ 634 for (i = 0; i < nr_planes; i++) 635 cx[i] += xstep[i]; 636 } 637 638 /* Iterate c values down the region: 639 */ 640 for (i = 0; i < nr_planes; i++) 641 c[i] += ystep[i]; 642 } 643 } 644} 645 646 647void lp_setup_choose_line( struct lp_setup_context *setup ) 648{ 649 setup->line = lp_setup_line; 650} 651 652 653