lp_setup.c revision 26c78a4968a3c10ca006699d240150e6aa4b4250
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \brief Primitive rasterization/rendering (points, lines, triangles) 30 * 31 * \author Keith Whitwell <keith@tungstengraphics.com> 32 * \author Brian Paul 33 */ 34 35#include "lp_context.h" 36#include "lp_quad.h" 37#include "lp_setup.h" 38#include "lp_state.h" 39#include "draw/draw_context.h" 40#include "draw/draw_private.h" 41#include "draw/draw_vertex.h" 42#include "pipe/p_shader_tokens.h" 43#include "pipe/p_thread.h" 44#include "util/u_format.h" 45#include "util/u_math.h" 46#include "util/u_memory.h" 47#include "lp_bld_debug.h" 48#include "lp_tile_cache.h" 49#include "lp_tile_soa.h" 50 51 52#define DEBUG_VERTS 0 53#define DEBUG_FRAGS 0 54 55/** 56 * Triangle edge info 57 */ 58struct edge { 59 float dx; /**< X(v1) - X(v0), used only during setup */ 60 float dy; /**< Y(v1) - Y(v0), used only during setup */ 61 float dxdy; /**< dx/dy */ 62 float sx, sy; /**< first sample point coord */ 63 int lines; /**< number of lines on this edge */ 64}; 65 66 67#define MAX_QUADS 16 68 69 70/** 71 * Triangle setup info (derived from draw_stage). 72 * Also used for line drawing (taking some liberties). 73 */ 74struct setup_context { 75 struct llvmpipe_context *llvmpipe; 76 77 /* Vertices are just an array of floats making up each attribute in 78 * turn. Currently fixed at 4 floats, but should change in time. 79 * Codegen will help cope with this. 80 */ 81 const float (*vmax)[4]; 82 const float (*vmid)[4]; 83 const float (*vmin)[4]; 84 const float (*vprovoke)[4]; 85 86 struct edge ebot; 87 struct edge etop; 88 struct edge emaj; 89 90 float oneoverarea; 91 int facing; 92 93 float pixel_offset; 94 95 struct quad_header quad[MAX_QUADS]; 96 struct quad_header *quad_ptrs[MAX_QUADS]; 97 unsigned count; 98 99 struct quad_interp_coef coef; 100 101 struct { 102 int left[2]; /**< [0] = row0, [1] = row1 */ 103 int right[2]; 104 int y; 105 } span; 106 107#if DEBUG_FRAGS 108 uint numFragsEmitted; /**< per primitive */ 109 uint numFragsWritten; /**< per primitive */ 110#endif 111 112 unsigned winding; /* which winding to cull */ 113}; 114 115 116 117/** 118 * Execute fragment shader for the four fragments in the quad. 119 */ 120PIPE_ALIGN_STACK 121static void 122shade_quads(struct llvmpipe_context *llvmpipe, 123 struct quad_header *quads[], 124 unsigned nr) 125{ 126 struct lp_fragment_shader *fs = llvmpipe->fs; 127 struct quad_header *quad = quads[0]; 128 const unsigned x = quad->input.x0; 129 const unsigned y = quad->input.y0; 130 uint8_t *tile; 131 uint8_t *color; 132 void *depth; 133 PIPE_ALIGN_VAR(16) uint32_t mask[4][NUM_CHANNELS]; 134 unsigned chan_index; 135 unsigned q; 136 137 assert(fs->current); 138 if(!fs->current) 139 return; 140 141 /* Sanity checks */ 142 assert(nr * QUAD_SIZE == TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH); 143 assert(x % TILE_VECTOR_WIDTH == 0); 144 assert(y % TILE_VECTOR_HEIGHT == 0); 145 for (q = 0; q < nr; ++q) { 146 assert(quads[q]->input.x0 == x + q*2); 147 assert(quads[q]->input.y0 == y); 148 } 149 150 /* mask */ 151 for (q = 0; q < 4; ++q) 152 for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) 153 mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0; 154 155 /* color buffer */ 156 if(llvmpipe->framebuffer.nr_cbufs >= 1 && 157 llvmpipe->framebuffer.cbufs[0]) { 158 tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y); 159 color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0); 160 } 161 else 162 color = NULL; 163 164 /* depth buffer */ 165 if(llvmpipe->zsbuf_map) { 166 assert((x % 2) == 0); 167 assert((y % 2) == 0); 168 depth = llvmpipe->zsbuf_map + 169 y*llvmpipe->zsbuf_transfer->stride + 170 2*x*util_format_get_blocksize(llvmpipe->zsbuf_transfer->texture->format); 171 } 172 else 173 depth = NULL; 174 175 /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */ 176 assert(lp_check_alignment(mask, 16)); 177 178 assert(lp_check_alignment(depth, 16)); 179 assert(lp_check_alignment(color, 16)); 180 assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16)); 181 182 /* run shader */ 183 fs->current->jit_function( &llvmpipe->jit_context, 184 x, y, 185 quad->coef->a0, 186 quad->coef->dadx, 187 quad->coef->dady, 188 &mask[0][0], 189 color, 190 depth); 191} 192 193 194 195 196/** 197 * Do triangle cull test using tri determinant (sign indicates orientation) 198 * \return true if triangle is to be culled. 199 */ 200static INLINE boolean 201cull_tri(const struct setup_context *setup, float det) 202{ 203 if (det != 0) { 204 /* if (det < 0 then Z points toward camera and triangle is 205 * counter-clockwise winding. 206 */ 207 unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; 208 209 if ((winding & setup->winding) == 0) 210 return FALSE; 211 } 212 213 /* Culled: 214 */ 215 return TRUE; 216} 217 218 219 220/** 221 * Clip setup->quad against the scissor/surface bounds. 222 */ 223static INLINE void 224quad_clip( struct setup_context *setup, struct quad_header *quad ) 225{ 226 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; 227 const int minx = (int) cliprect->minx; 228 const int maxx = (int) cliprect->maxx; 229 const int miny = (int) cliprect->miny; 230 const int maxy = (int) cliprect->maxy; 231 232 if (quad->input.x0 >= maxx || 233 quad->input.y0 >= maxy || 234 quad->input.x0 + 1 < minx || 235 quad->input.y0 + 1 < miny) { 236 /* totally clipped */ 237 quad->inout.mask = 0x0; 238 return; 239 } 240 if (quad->input.x0 < minx) 241 quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); 242 if (quad->input.y0 < miny) 243 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); 244 if (quad->input.x0 == maxx - 1) 245 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); 246 if (quad->input.y0 == maxy - 1) 247 quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); 248} 249 250 251 252/** 253 * Given an X or Y coordinate, return the block/quad coordinate that it 254 * belongs to. 255 */ 256static INLINE int block( int x ) 257{ 258 return x & ~(2-1); 259} 260 261static INLINE int block_x( int x ) 262{ 263 return x & ~(TILE_VECTOR_WIDTH - 1); 264} 265 266 267/** 268 * Emit a quad (pass to next stage) with clipping. 269 */ 270static INLINE void 271clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) 272{ 273 quad_clip( setup, quad ); 274 275 if (quad->inout.mask) { 276 struct llvmpipe_context *lp = setup->llvmpipe; 277 278#if 1 279 /* XXX: The blender expects 4 quads. This is far from efficient, but 280 * until we codegenerate single-quad variants of the fragment pipeline 281 * we need this hack. */ 282 const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; 283 struct quad_header quads[4]; 284 struct quad_header *quad_ptrs[4]; 285 int x0 = block_x(quad->input.x0); 286 unsigned i; 287 288 assert(nr_quads == 4); 289 290 for(i = 0; i < nr_quads; ++i) { 291 int x = x0 + 2*i; 292 if(x == quad->input.x0) 293 memcpy(&quads[i], quad, sizeof quads[i]); 294 else { 295 memset(&quads[i], 0, sizeof quads[i]); 296 quads[i].input.x0 = x; 297 quads[i].input.y0 = quad->input.y0; 298 quads[i].coef = quad->coef; 299 } 300 quad_ptrs[i] = &quads[i]; 301 } 302 303 shade_quads( lp, quad_ptrs, nr_quads ); 304#else 305 shade_quads( lp, &quad, 1 ); 306#endif 307 } 308} 309 310 311/** 312 * Render a horizontal span of quads 313 */ 314static void flush_spans( struct setup_context *setup ) 315{ 316 const int step = TILE_VECTOR_WIDTH; 317 const int xleft0 = setup->span.left[0]; 318 const int xleft1 = setup->span.left[1]; 319 const int xright0 = setup->span.right[0]; 320 const int xright1 = setup->span.right[1]; 321 322 323 int minleft = block_x(MIN2(xleft0, xleft1)); 324 int maxright = MAX2(xright0, xright1); 325 int x; 326 327 for (x = minleft; x < maxright; x += step) { 328 unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); 329 unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); 330 unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); 331 unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); 332 unsigned lx = x; 333 const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; 334 unsigned q = 0; 335 336 unsigned skipmask_left0 = (1U << skip_left0) - 1U; 337 unsigned skipmask_left1 = (1U << skip_left1) - 1U; 338 339 /* These calculations fail when step == 32 and skip_right == 0. 340 */ 341 unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); 342 unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); 343 344 unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; 345 unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; 346 347 if (mask0 | mask1) { 348 for(q = 0; q < nr_quads; ++q) { 349 unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); 350 setup->quad[q].input.x0 = lx; 351 setup->quad[q].input.y0 = setup->span.y; 352 setup->quad[q].inout.mask = quadmask; 353 setup->quad_ptrs[q] = &setup->quad[q]; 354 mask0 >>= 2; 355 mask1 >>= 2; 356 lx += 2; 357 } 358 assert(!(mask0 | mask1)); 359 360 shade_quads(setup->llvmpipe, setup->quad_ptrs, nr_quads ); 361 } 362 } 363 364 365 setup->span.y = 0; 366 setup->span.right[0] = 0; 367 setup->span.right[1] = 0; 368 setup->span.left[0] = 1000000; /* greater than right[0] */ 369 setup->span.left[1] = 1000000; /* greater than right[1] */ 370} 371 372 373#if DEBUG_VERTS 374static void print_vertex(const struct setup_context *setup, 375 const float (*v)[4]) 376{ 377 int i; 378 debug_printf(" Vertex: (%p)\n", v); 379 for (i = 0; i < setup->quad[0].nr_attrs; i++) { 380 debug_printf(" %d: %f %f %f %f\n", i, 381 v[i][0], v[i][1], v[i][2], v[i][3]); 382 if (util_is_inf_or_nan(v[i][0])) { 383 debug_printf(" NaN!\n"); 384 } 385 } 386} 387#endif 388 389/** 390 * Sort the vertices from top to bottom order, setting up the triangle 391 * edge fields (ebot, emaj, etop). 392 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise 393 */ 394static boolean setup_sort_vertices( struct setup_context *setup, 395 float det, 396 const float (*v0)[4], 397 const float (*v1)[4], 398 const float (*v2)[4] ) 399{ 400 setup->vprovoke = v2; 401 402 /* determine bottom to top order of vertices */ 403 { 404 float y0 = v0[0][1]; 405 float y1 = v1[0][1]; 406 float y2 = v2[0][1]; 407 if (y0 <= y1) { 408 if (y1 <= y2) { 409 /* y0<=y1<=y2 */ 410 setup->vmin = v0; 411 setup->vmid = v1; 412 setup->vmax = v2; 413 } 414 else if (y2 <= y0) { 415 /* y2<=y0<=y1 */ 416 setup->vmin = v2; 417 setup->vmid = v0; 418 setup->vmax = v1; 419 } 420 else { 421 /* y0<=y2<=y1 */ 422 setup->vmin = v0; 423 setup->vmid = v2; 424 setup->vmax = v1; 425 } 426 } 427 else { 428 if (y0 <= y2) { 429 /* y1<=y0<=y2 */ 430 setup->vmin = v1; 431 setup->vmid = v0; 432 setup->vmax = v2; 433 } 434 else if (y2 <= y1) { 435 /* y2<=y1<=y0 */ 436 setup->vmin = v2; 437 setup->vmid = v1; 438 setup->vmax = v0; 439 } 440 else { 441 /* y1<=y2<=y0 */ 442 setup->vmin = v1; 443 setup->vmid = v2; 444 setup->vmax = v0; 445 } 446 } 447 } 448 449 setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; 450 setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; 451 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; 452 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; 453 setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; 454 setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; 455 456 /* 457 * Compute triangle's area. Use 1/area to compute partial 458 * derivatives of attributes later. 459 * 460 * The area will be the same as prim->det, but the sign may be 461 * different depending on how the vertices get sorted above. 462 * 463 * To determine whether the primitive is front or back facing we 464 * use the prim->det value because its sign is correct. 465 */ 466 { 467 const float area = (setup->emaj.dx * setup->ebot.dy - 468 setup->ebot.dx * setup->emaj.dy); 469 470 setup->oneoverarea = 1.0f / area; 471 472 /* 473 debug_printf("%s one-over-area %f area %f det %f\n", 474 __FUNCTION__, setup->oneoverarea, area, det ); 475 */ 476 if (util_is_inf_or_nan(setup->oneoverarea)) 477 return FALSE; 478 } 479 480 /* We need to know if this is a front or back-facing triangle for: 481 * - the GLSL gl_FrontFacing fragment attribute (bool) 482 * - two-sided stencil test 483 */ 484 setup->facing = 485 ((det > 0.0) ^ 486 (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); 487 488 /* Prepare pixel offset for rasterisation: 489 * - pixel center (0.5, 0.5) for GL, or 490 * - assume (0.0, 0.0) for other APIs. 491 */ 492 if (setup->llvmpipe->rasterizer->gl_rasterization_rules) { 493 setup->pixel_offset = 0.5f; 494 } else { 495 setup->pixel_offset = 0.0f; 496 } 497 498 return TRUE; 499} 500 501 502/** 503 * Compute a0, dadx and dady for a linearly interpolated coefficient, 504 * for a triangle. 505 */ 506static void tri_pos_coeff( struct setup_context *setup, 507 uint vertSlot, unsigned i) 508{ 509 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; 510 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 511 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 512 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 513 float dadx = a * setup->oneoverarea; 514 float dady = b * setup->oneoverarea; 515 516 assert(i <= 3); 517 518 setup->coef.dadx[0][i] = dadx; 519 setup->coef.dady[0][i] = dady; 520 521 /* calculate a0 as the value which would be sampled for the 522 * fragment at (0,0), taking into account that we want to sample at 523 * pixel centers, in other words (pixel_offset, pixel_offset). 524 * 525 * this is neat but unfortunately not a good way to do things for 526 * triangles with very large values of dadx or dady as it will 527 * result in the subtraction and re-addition from a0 of a very 528 * large number, which means we'll end up loosing a lot of the 529 * fractional bits and precision from a0. the way to fix this is 530 * to define a0 as the sample at a pixel center somewhere near vmin 531 * instead - i'll switch to this later. 532 */ 533 setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - 534 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 535 dady * (setup->vmin[0][1] - setup->pixel_offset))); 536 537 /* 538 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", 539 slot, "xyzw"[i], 540 setup->coef[slot].a0[i], 541 setup->coef[slot].dadx[i], 542 setup->coef[slot].dady[i]); 543 */ 544} 545 546 547/** 548 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 549 * The value value comes from vertex[slot][i]. 550 * The result will be put into setup->coef[slot].a0[i]. 551 * \param slot which attribute slot 552 * \param i which component of the slot (0..3) 553 */ 554static void const_pos_coeff( struct setup_context *setup, 555 uint vertSlot, unsigned i) 556{ 557 setup->coef.dadx[0][i] = 0; 558 setup->coef.dady[0][i] = 0; 559 560 /* need provoking vertex info! 561 */ 562 setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i]; 563} 564 565 566/** 567 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 568 * The value value comes from vertex[slot][i]. 569 * The result will be put into setup->coef[slot].a0[i]. 570 * \param slot which attribute slot 571 * \param i which component of the slot (0..3) 572 */ 573static void const_coeff( struct setup_context *setup, 574 unsigned attrib, 575 uint vertSlot) 576{ 577 unsigned i; 578 for (i = 0; i < NUM_CHANNELS; ++i) { 579 setup->coef.dadx[1 + attrib][i] = 0; 580 setup->coef.dady[1 + attrib][i] = 0; 581 582 /* need provoking vertex info! 583 */ 584 setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i]; 585 } 586} 587 588 589/** 590 * Compute a0, dadx and dady for a linearly interpolated coefficient, 591 * for a triangle. 592 */ 593static void tri_linear_coeff( struct setup_context *setup, 594 unsigned attrib, 595 uint vertSlot) 596{ 597 unsigned i; 598 for (i = 0; i < NUM_CHANNELS; ++i) { 599 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; 600 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 601 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 602 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 603 float dadx = a * setup->oneoverarea; 604 float dady = b * setup->oneoverarea; 605 606 assert(i <= 3); 607 608 setup->coef.dadx[1 + attrib][i] = dadx; 609 setup->coef.dady[1 + attrib][i] = dady; 610 611 /* calculate a0 as the value which would be sampled for the 612 * fragment at (0,0), taking into account that we want to sample at 613 * pixel centers, in other words (0.5, 0.5). 614 * 615 * this is neat but unfortunately not a good way to do things for 616 * triangles with very large values of dadx or dady as it will 617 * result in the subtraction and re-addition from a0 of a very 618 * large number, which means we'll end up loosing a lot of the 619 * fractional bits and precision from a0. the way to fix this is 620 * to define a0 as the sample at a pixel center somewhere near vmin 621 * instead - i'll switch to this later. 622 */ 623 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 624 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 625 dady * (setup->vmin[0][1] - setup->pixel_offset))); 626 627 /* 628 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", 629 slot, "xyzw"[i], 630 setup->coef[slot].a0[i], 631 setup->coef[slot].dadx[i], 632 setup->coef[slot].dady[i]); 633 */ 634 } 635} 636 637 638/** 639 * Compute a0, dadx and dady for a perspective-corrected interpolant, 640 * for a triangle. 641 * We basically multiply the vertex value by 1/w before computing 642 * the plane coefficients (a0, dadx, dady). 643 * Later, when we compute the value at a particular fragment position we'll 644 * divide the interpolated value by the interpolated W at that fragment. 645 */ 646static void tri_persp_coeff( struct setup_context *setup, 647 unsigned attrib, 648 uint vertSlot) 649{ 650 unsigned i; 651 for (i = 0; i < NUM_CHANNELS; ++i) { 652 /* premultiply by 1/w (v[0][3] is always W): 653 */ 654 float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; 655 float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; 656 float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; 657 float botda = mida - mina; 658 float majda = maxa - mina; 659 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 660 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 661 float dadx = a * setup->oneoverarea; 662 float dady = b * setup->oneoverarea; 663 664 /* 665 debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, 666 setup->vmin[vertSlot][i], 667 setup->vmid[vertSlot][i], 668 setup->vmax[vertSlot][i] 669 ); 670 */ 671 assert(i <= 3); 672 673 setup->coef.dadx[1 + attrib][i] = dadx; 674 setup->coef.dady[1 + attrib][i] = dady; 675 setup->coef.a0[1 + attrib][i] = (mina - 676 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 677 dady * (setup->vmin[0][1] - setup->pixel_offset))); 678 } 679} 680 681 682/** 683 * Special coefficient setup for gl_FragCoord. 684 * X and Y are trivial, though Y has to be inverted for OpenGL. 685 * Z and W are copied from posCoef which should have already been computed. 686 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. 687 */ 688static void 689setup_fragcoord_coeff(struct setup_context *setup, uint slot) 690{ 691 /*X*/ 692 setup->coef.a0[1 + slot][0] = 0; 693 setup->coef.dadx[1 + slot][0] = 1.0; 694 setup->coef.dady[1 + slot][0] = 0.0; 695 /*Y*/ 696 setup->coef.a0[1 + slot][1] = 0.0; 697 setup->coef.dadx[1 + slot][1] = 0.0; 698 setup->coef.dady[1 + slot][1] = 1.0; 699 /*Z*/ 700 setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2]; 701 setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2]; 702 setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2]; 703 /*W*/ 704 setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3]; 705 setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3]; 706 setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3]; 707} 708 709 710 711/** 712 * Compute the setup->coef[] array dadx, dady, a0 values. 713 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. 714 */ 715static void setup_tri_coefficients( struct setup_context *setup ) 716{ 717 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 718 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 719 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 720 uint fragSlot; 721 722 /* z and w are done by linear interpolation: 723 */ 724 tri_pos_coeff(setup, 0, 2); 725 tri_pos_coeff(setup, 0, 3); 726 727 /* setup interpolation for all the remaining attributes: 728 */ 729 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 730 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 731 732 switch (vinfo->attrib[fragSlot].interp_mode) { 733 case INTERP_CONSTANT: 734 const_coeff(setup, fragSlot, vertSlot); 735 break; 736 case INTERP_LINEAR: 737 tri_linear_coeff(setup, fragSlot, vertSlot); 738 break; 739 case INTERP_PERSPECTIVE: 740 tri_persp_coeff(setup, fragSlot, vertSlot); 741 break; 742 case INTERP_POS: 743 setup_fragcoord_coeff(setup, fragSlot); 744 break; 745 default: 746 assert(0); 747 } 748 749 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 750 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 751 setup->coef.dadx[1 + fragSlot][0] = 0.0; 752 setup->coef.dady[1 + fragSlot][0] = 0.0; 753 } 754 } 755} 756 757 758 759static void setup_tri_edges( struct setup_context *setup ) 760{ 761 float vmin_x = setup->vmin[0][0] + setup->pixel_offset; 762 float vmid_x = setup->vmid[0][0] + setup->pixel_offset; 763 764 float vmin_y = setup->vmin[0][1] - setup->pixel_offset; 765 float vmid_y = setup->vmid[0][1] - setup->pixel_offset; 766 float vmax_y = setup->vmax[0][1] - setup->pixel_offset; 767 768 setup->emaj.sy = ceilf(vmin_y); 769 setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); 770 setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; 771 setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; 772 773 setup->etop.sy = ceilf(vmid_y); 774 setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); 775 setup->etop.dxdy = setup->etop.dx / setup->etop.dy; 776 setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; 777 778 setup->ebot.sy = ceilf(vmin_y); 779 setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); 780 setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; 781 setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; 782} 783 784 785/** 786 * Render the upper or lower half of a triangle. 787 * Scissoring/cliprect is applied here too. 788 */ 789static void subtriangle( struct setup_context *setup, 790 struct edge *eleft, 791 struct edge *eright, 792 unsigned lines ) 793{ 794 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; 795 const int minx = (int) cliprect->minx; 796 const int maxx = (int) cliprect->maxx; 797 const int miny = (int) cliprect->miny; 798 const int maxy = (int) cliprect->maxy; 799 int y, start_y, finish_y; 800 int sy = (int)eleft->sy; 801 802 assert((int)eleft->sy == (int) eright->sy); 803 804 /* clip top/bottom */ 805 start_y = sy; 806 if (start_y < miny) 807 start_y = miny; 808 809 finish_y = sy + lines; 810 if (finish_y > maxy) 811 finish_y = maxy; 812 813 start_y -= sy; 814 finish_y -= sy; 815 816 /* 817 debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); 818 */ 819 820 for (y = start_y; y < finish_y; y++) { 821 822 /* avoid accumulating adds as floats don't have the precision to 823 * accurately iterate large triangle edges that way. luckily we 824 * can just multiply these days. 825 * 826 * this is all drowned out by the attribute interpolation anyway. 827 */ 828 int left = (int)(eleft->sx + y * eleft->dxdy); 829 int right = (int)(eright->sx + y * eright->dxdy); 830 831 /* clip left/right */ 832 if (left < minx) 833 left = minx; 834 if (right > maxx) 835 right = maxx; 836 837 if (left < right) { 838 int _y = sy + y; 839 if (block(_y) != setup->span.y) { 840 flush_spans(setup); 841 setup->span.y = block(_y); 842 } 843 844 setup->span.left[_y&1] = left; 845 setup->span.right[_y&1] = right; 846 } 847 } 848 849 850 /* save the values so that emaj can be restarted: 851 */ 852 eleft->sx += lines * eleft->dxdy; 853 eright->sx += lines * eright->dxdy; 854 eleft->sy += lines; 855 eright->sy += lines; 856} 857 858 859/** 860 * Recalculate prim's determinant. This is needed as we don't have 861 * get this information through the vbuf_render interface & we must 862 * calculate it here. 863 */ 864static float 865calc_det( const float (*v0)[4], 866 const float (*v1)[4], 867 const float (*v2)[4] ) 868{ 869 /* edge vectors e = v0 - v2, f = v1 - v2 */ 870 const float ex = v0[0][0] - v2[0][0]; 871 const float ey = v0[0][1] - v2[0][1]; 872 const float fx = v1[0][0] - v2[0][0]; 873 const float fy = v1[0][1] - v2[0][1]; 874 875 /* det = cross(e,f).z */ 876 return ex * fy - ey * fx; 877} 878 879 880/** 881 * Do setup for triangle rasterization, then render the triangle. 882 */ 883void llvmpipe_setup_tri( struct setup_context *setup, 884 const float (*v0)[4], 885 const float (*v1)[4], 886 const float (*v2)[4] ) 887{ 888 float det; 889 890#if DEBUG_VERTS 891 debug_printf("Setup triangle:\n"); 892 print_vertex(setup, v0); 893 print_vertex(setup, v1); 894 print_vertex(setup, v2); 895#endif 896 897 if (setup->llvmpipe->no_rast) 898 return; 899 900 det = calc_det(v0, v1, v2); 901 /* 902 debug_printf("%s\n", __FUNCTION__ ); 903 */ 904 905#if DEBUG_FRAGS 906 setup->numFragsEmitted = 0; 907 setup->numFragsWritten = 0; 908#endif 909 910 if (cull_tri( setup, det )) 911 return; 912 913 if (!setup_sort_vertices( setup, det, v0, v1, v2 )) 914 return; 915 setup_tri_coefficients( setup ); 916 setup_tri_edges( setup ); 917 918 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES); 919 920 setup->span.y = 0; 921 setup->span.right[0] = 0; 922 setup->span.right[1] = 0; 923 /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ 924 925 /* init_constant_attribs( setup ); */ 926 927 if (setup->oneoverarea < 0.0) { 928 /* emaj on left: 929 */ 930 subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); 931 subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); 932 } 933 else { 934 /* emaj on right: 935 */ 936 subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); 937 subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); 938 } 939 940 flush_spans( setup ); 941 942#if DEBUG_FRAGS 943 printf("Tri: %u frags emitted, %u written\n", 944 setup->numFragsEmitted, 945 setup->numFragsWritten); 946#endif 947} 948 949 950 951/** 952 * Compute a0, dadx and dady for a linearly interpolated coefficient, 953 * for a line. 954 */ 955static void 956linear_pos_coeff(struct setup_context *setup, 957 uint vertSlot, uint i) 958{ 959 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 960 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 961 const float dady = da * setup->emaj.dy * setup->oneoverarea; 962 setup->coef.dadx[0][i] = dadx; 963 setup->coef.dady[0][i] = dady; 964 setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - 965 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 966 dady * (setup->vmin[0][1] - setup->pixel_offset))); 967} 968 969 970/** 971 * Compute a0, dadx and dady for a linearly interpolated coefficient, 972 * for a line. 973 */ 974static void 975line_linear_coeff(struct setup_context *setup, 976 unsigned attrib, 977 uint vertSlot) 978{ 979 unsigned i; 980 for (i = 0; i < NUM_CHANNELS; ++i) { 981 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 982 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 983 const float dady = da * setup->emaj.dy * setup->oneoverarea; 984 setup->coef.dadx[1 + attrib][i] = dadx; 985 setup->coef.dady[1 + attrib][i] = dady; 986 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 987 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 988 dady * (setup->vmin[0][1] - setup->pixel_offset))); 989 } 990} 991 992 993/** 994 * Compute a0, dadx and dady for a perspective-corrected interpolant, 995 * for a line. 996 */ 997static void 998line_persp_coeff(struct setup_context *setup, 999 unsigned attrib, 1000 uint vertSlot) 1001{ 1002 unsigned i; 1003 for (i = 0; i < NUM_CHANNELS; ++i) { 1004 /* XXX double-check/verify this arithmetic */ 1005 const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; 1006 const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; 1007 const float da = a1 - a0; 1008 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 1009 const float dady = da * setup->emaj.dy * setup->oneoverarea; 1010 setup->coef.dadx[1 + attrib][i] = dadx; 1011 setup->coef.dady[1 + attrib][i] = dady; 1012 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 1013 (dadx * (setup->vmin[0][0] - setup->pixel_offset) + 1014 dady * (setup->vmin[0][1] - setup->pixel_offset))); 1015 } 1016} 1017 1018 1019/** 1020 * Compute the setup->coef[] array dadx, dady, a0 values. 1021 * Must be called after setup->vmin,vmax are initialized. 1022 */ 1023static INLINE boolean 1024setup_line_coefficients(struct setup_context *setup, 1025 const float (*v0)[4], 1026 const float (*v1)[4]) 1027{ 1028 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 1029 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 1030 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 1031 uint fragSlot; 1032 float area; 1033 1034 /* use setup->vmin, vmax to point to vertices */ 1035 if (llvmpipe->rasterizer->flatshade_first) 1036 setup->vprovoke = v0; 1037 else 1038 setup->vprovoke = v1; 1039 setup->vmin = v0; 1040 setup->vmax = v1; 1041 1042 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; 1043 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; 1044 1045 /* NOTE: this is not really area but something proportional to it */ 1046 area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; 1047 if (area == 0.0f || util_is_inf_or_nan(area)) 1048 return FALSE; 1049 setup->oneoverarea = 1.0f / area; 1050 1051 /* z and w are done by linear interpolation: 1052 */ 1053 linear_pos_coeff(setup, 0, 2); 1054 linear_pos_coeff(setup, 0, 3); 1055 1056 /* setup interpolation for all the remaining attributes: 1057 */ 1058 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 1059 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 1060 1061 switch (vinfo->attrib[fragSlot].interp_mode) { 1062 case INTERP_CONSTANT: 1063 const_coeff(setup, fragSlot, vertSlot); 1064 break; 1065 case INTERP_LINEAR: 1066 line_linear_coeff(setup, fragSlot, vertSlot); 1067 break; 1068 case INTERP_PERSPECTIVE: 1069 line_persp_coeff(setup, fragSlot, vertSlot); 1070 break; 1071 case INTERP_POS: 1072 setup_fragcoord_coeff(setup, fragSlot); 1073 break; 1074 default: 1075 assert(0); 1076 } 1077 1078 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 1079 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 1080 setup->coef.dadx[1 + fragSlot][0] = 0.0; 1081 setup->coef.dady[1 + fragSlot][0] = 0.0; 1082 } 1083 } 1084 return TRUE; 1085} 1086 1087 1088/** 1089 * Plot a pixel in a line segment. 1090 */ 1091static INLINE void 1092plot(struct setup_context *setup, int x, int y) 1093{ 1094 const int iy = y & 1; 1095 const int ix = x & 1; 1096 const int quadX = x - ix; 1097 const int quadY = y - iy; 1098 const int mask = (1 << ix) << (2 * iy); 1099 1100 if (quadX != setup->quad[0].input.x0 || 1101 quadY != setup->quad[0].input.y0) 1102 { 1103 /* flush prev quad, start new quad */ 1104 1105 if (setup->quad[0].input.x0 != -1) 1106 clip_emit_quad( setup, &setup->quad[0] ); 1107 1108 setup->quad[0].input.x0 = quadX; 1109 setup->quad[0].input.y0 = quadY; 1110 setup->quad[0].inout.mask = 0x0; 1111 } 1112 1113 setup->quad[0].inout.mask |= mask; 1114} 1115 1116 1117/** 1118 * Do setup for line rasterization, then render the line. 1119 * Single-pixel width, no stipple, etc. We rely on the 'draw' module 1120 * to handle stippling and wide lines. 1121 */ 1122void 1123llvmpipe_setup_line(struct setup_context *setup, 1124 const float (*v0)[4], 1125 const float (*v1)[4]) 1126{ 1127 int x0 = (int) v0[0][0]; 1128 int x1 = (int) v1[0][0]; 1129 int y0 = (int) v0[0][1]; 1130 int y1 = (int) v1[0][1]; 1131 int dx = x1 - x0; 1132 int dy = y1 - y0; 1133 int xstep, ystep; 1134 1135#if DEBUG_VERTS 1136 debug_printf("Setup line:\n"); 1137 print_vertex(setup, v0); 1138 print_vertex(setup, v1); 1139#endif 1140 1141 if (setup->llvmpipe->no_rast) 1142 return; 1143 1144 if (dx == 0 && dy == 0) 1145 return; 1146 1147 if (!setup_line_coefficients(setup, v0, v1)) 1148 return; 1149 1150 assert(v0[0][0] < 1.0e9); 1151 assert(v0[0][1] < 1.0e9); 1152 assert(v1[0][0] < 1.0e9); 1153 assert(v1[0][1] < 1.0e9); 1154 1155 if (dx < 0) { 1156 dx = -dx; /* make positive */ 1157 xstep = -1; 1158 } 1159 else { 1160 xstep = 1; 1161 } 1162 1163 if (dy < 0) { 1164 dy = -dy; /* make positive */ 1165 ystep = -1; 1166 } 1167 else { 1168 ystep = 1; 1169 } 1170 1171 assert(dx >= 0); 1172 assert(dy >= 0); 1173 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES); 1174 1175 setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; 1176 setup->quad[0].inout.mask = 0x0; 1177 1178 /* XXX temporary: set coverage to 1.0 so the line appears 1179 * if AA mode happens to be enabled. 1180 */ 1181 setup->quad[0].input.coverage[0] = 1182 setup->quad[0].input.coverage[1] = 1183 setup->quad[0].input.coverage[2] = 1184 setup->quad[0].input.coverage[3] = 1.0; 1185 1186 if (dx > dy) { 1187 /*** X-major line ***/ 1188 int i; 1189 const int errorInc = dy + dy; 1190 int error = errorInc - dx; 1191 const int errorDec = error - dx; 1192 1193 for (i = 0; i < dx; i++) { 1194 plot(setup, x0, y0); 1195 1196 x0 += xstep; 1197 if (error < 0) { 1198 error += errorInc; 1199 } 1200 else { 1201 error += errorDec; 1202 y0 += ystep; 1203 } 1204 } 1205 } 1206 else { 1207 /*** Y-major line ***/ 1208 int i; 1209 const int errorInc = dx + dx; 1210 int error = errorInc - dy; 1211 const int errorDec = error - dy; 1212 1213 for (i = 0; i < dy; i++) { 1214 plot(setup, x0, y0); 1215 1216 y0 += ystep; 1217 if (error < 0) { 1218 error += errorInc; 1219 } 1220 else { 1221 error += errorDec; 1222 x0 += xstep; 1223 } 1224 } 1225 } 1226 1227 /* draw final quad */ 1228 if (setup->quad[0].inout.mask) { 1229 clip_emit_quad( setup, &setup->quad[0] ); 1230 } 1231} 1232 1233 1234static void 1235point_persp_coeff(struct setup_context *setup, 1236 const float (*vert)[4], 1237 unsigned attrib, 1238 uint vertSlot) 1239{ 1240 unsigned i; 1241 for(i = 0; i < NUM_CHANNELS; ++i) { 1242 setup->coef.dadx[1 + attrib][i] = 0.0F; 1243 setup->coef.dady[1 + attrib][i] = 0.0F; 1244 setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3]; 1245 } 1246} 1247 1248 1249/** 1250 * Do setup for point rasterization, then render the point. 1251 * Round or square points... 1252 * XXX could optimize a lot for 1-pixel points. 1253 */ 1254void 1255llvmpipe_setup_point( struct setup_context *setup, 1256 const float (*v0)[4] ) 1257{ 1258 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 1259 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 1260 const int sizeAttr = setup->llvmpipe->psize_slot; 1261 const float size 1262 = sizeAttr > 0 ? v0[sizeAttr][0] 1263 : setup->llvmpipe->rasterizer->point_size; 1264 const float halfSize = 0.5F * size; 1265 const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth; 1266 const float x = v0[0][0]; /* Note: data[0] is always position */ 1267 const float y = v0[0][1]; 1268 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 1269 uint fragSlot; 1270 1271#if DEBUG_VERTS 1272 debug_printf("Setup point:\n"); 1273 print_vertex(setup, v0); 1274#endif 1275 1276 if (llvmpipe->no_rast) 1277 return; 1278 1279 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS); 1280 1281 /* For points, all interpolants are constant-valued. 1282 * However, for point sprites, we'll need to setup texcoords appropriately. 1283 * XXX: which coefficients are the texcoords??? 1284 * We may do point sprites as textured quads... 1285 * 1286 * KW: We don't know which coefficients are texcoords - ultimately 1287 * the choice of what interpolation mode to use for each attribute 1288 * should be determined by the fragment program, using 1289 * per-attribute declaration statements that include interpolation 1290 * mode as a parameter. So either the fragment program will have 1291 * to be adjusted for pointsprite vs normal point behaviour, or 1292 * otherwise a special interpolation mode will have to be defined 1293 * which matches the required behaviour for point sprites. But - 1294 * the latter is not a feature of normal hardware, and as such 1295 * probably should be ruled out on that basis. 1296 */ 1297 setup->vprovoke = v0; 1298 1299 /* setup Z, W */ 1300 const_pos_coeff(setup, 0, 2); 1301 const_pos_coeff(setup, 0, 3); 1302 1303 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 1304 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 1305 1306 switch (vinfo->attrib[fragSlot].interp_mode) { 1307 case INTERP_CONSTANT: 1308 /* fall-through */ 1309 case INTERP_LINEAR: 1310 const_coeff(setup, fragSlot, vertSlot); 1311 break; 1312 case INTERP_PERSPECTIVE: 1313 point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot); 1314 break; 1315 case INTERP_POS: 1316 setup_fragcoord_coeff(setup, fragSlot); 1317 break; 1318 default: 1319 assert(0); 1320 } 1321 1322 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 1323 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 1324 setup->coef.dadx[1 + fragSlot][0] = 0.0; 1325 setup->coef.dady[1 + fragSlot][0] = 0.0; 1326 } 1327 } 1328 1329 1330 if (halfSize <= 0.5 && !round) { 1331 /* special case for 1-pixel points */ 1332 const int ix = ((int) x) & 1; 1333 const int iy = ((int) y) & 1; 1334 setup->quad[0].input.x0 = (int) x - ix; 1335 setup->quad[0].input.y0 = (int) y - iy; 1336 setup->quad[0].inout.mask = (1 << ix) << (2 * iy); 1337 clip_emit_quad( setup, &setup->quad[0] ); 1338 } 1339 else { 1340 if (round) { 1341 /* rounded points */ 1342 const int ixmin = block((int) (x - halfSize)); 1343 const int ixmax = block((int) (x + halfSize)); 1344 const int iymin = block((int) (y - halfSize)); 1345 const int iymax = block((int) (y + halfSize)); 1346 const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ 1347 const float rmax = halfSize + 0.7071F; 1348 const float rmin2 = MAX2(0.0F, rmin * rmin); 1349 const float rmax2 = rmax * rmax; 1350 const float cscale = 1.0F / (rmax2 - rmin2); 1351 int ix, iy; 1352 1353 for (iy = iymin; iy <= iymax; iy += 2) { 1354 for (ix = ixmin; ix <= ixmax; ix += 2) { 1355 float dx, dy, dist2, cover; 1356 1357 setup->quad[0].inout.mask = 0x0; 1358 1359 dx = (ix + 0.5f) - x; 1360 dy = (iy + 0.5f) - y; 1361 dist2 = dx * dx + dy * dy; 1362 if (dist2 <= rmax2) { 1363 cover = 1.0F - (dist2 - rmin2) * cscale; 1364 setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); 1365 setup->quad[0].inout.mask |= MASK_TOP_LEFT; 1366 } 1367 1368 dx = (ix + 1.5f) - x; 1369 dy = (iy + 0.5f) - y; 1370 dist2 = dx * dx + dy * dy; 1371 if (dist2 <= rmax2) { 1372 cover = 1.0F - (dist2 - rmin2) * cscale; 1373 setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); 1374 setup->quad[0].inout.mask |= MASK_TOP_RIGHT; 1375 } 1376 1377 dx = (ix + 0.5f) - x; 1378 dy = (iy + 1.5f) - y; 1379 dist2 = dx * dx + dy * dy; 1380 if (dist2 <= rmax2) { 1381 cover = 1.0F - (dist2 - rmin2) * cscale; 1382 setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); 1383 setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; 1384 } 1385 1386 dx = (ix + 1.5f) - x; 1387 dy = (iy + 1.5f) - y; 1388 dist2 = dx * dx + dy * dy; 1389 if (dist2 <= rmax2) { 1390 cover = 1.0F - (dist2 - rmin2) * cscale; 1391 setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); 1392 setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; 1393 } 1394 1395 if (setup->quad[0].inout.mask) { 1396 setup->quad[0].input.x0 = ix; 1397 setup->quad[0].input.y0 = iy; 1398 clip_emit_quad( setup, &setup->quad[0] ); 1399 } 1400 } 1401 } 1402 } 1403 else { 1404 /* square points */ 1405 const int xmin = (int) (x + 0.75 - halfSize); 1406 const int ymin = (int) (y + 0.25 - halfSize); 1407 const int xmax = xmin + (int) size; 1408 const int ymax = ymin + (int) size; 1409 /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ 1410 const int ixmin = block(xmin); 1411 const int ixmax = block(xmax - 1); 1412 const int iymin = block(ymin); 1413 const int iymax = block(ymax - 1); 1414 int ix, iy; 1415 1416 /* 1417 debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); 1418 */ 1419 for (iy = iymin; iy <= iymax; iy += 2) { 1420 uint rowMask = 0xf; 1421 if (iy < ymin) { 1422 /* above the top edge */ 1423 rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); 1424 } 1425 if (iy + 1 >= ymax) { 1426 /* below the bottom edge */ 1427 rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); 1428 } 1429 1430 for (ix = ixmin; ix <= ixmax; ix += 2) { 1431 uint mask = rowMask; 1432 1433 if (ix < xmin) { 1434 /* fragment is past left edge of point, turn off left bits */ 1435 mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); 1436 } 1437 if (ix + 1 >= xmax) { 1438 /* past the right edge */ 1439 mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); 1440 } 1441 1442 setup->quad[0].inout.mask = mask; 1443 setup->quad[0].input.x0 = ix; 1444 setup->quad[0].input.y0 = iy; 1445 clip_emit_quad( setup, &setup->quad[0] ); 1446 } 1447 } 1448 } 1449 } 1450} 1451 1452void llvmpipe_setup_prepare( struct setup_context *setup ) 1453{ 1454 struct llvmpipe_context *lp = setup->llvmpipe; 1455 1456 if (lp->dirty) { 1457 llvmpipe_update_derived(lp); 1458 } 1459 1460 if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && 1461 lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && 1462 lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { 1463 /* we'll do culling */ 1464 setup->winding = lp->rasterizer->cull_mode; 1465 } 1466 else { 1467 /* 'draw' will do culling */ 1468 setup->winding = PIPE_WINDING_NONE; 1469 } 1470} 1471 1472 1473 1474void llvmpipe_setup_destroy_context( struct setup_context *setup ) 1475{ 1476 align_free( setup ); 1477} 1478 1479 1480/** 1481 * Create a new primitive setup/render stage. 1482 */ 1483struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ) 1484{ 1485 struct setup_context *setup; 1486 unsigned i; 1487 1488 setup = align_malloc(sizeof(struct setup_context), 16); 1489 if (!setup) 1490 return NULL; 1491 1492 memset(setup, 0, sizeof *setup); 1493 setup->llvmpipe = llvmpipe; 1494 1495 for (i = 0; i < MAX_QUADS; i++) { 1496 setup->quad[i].coef = &setup->coef; 1497 } 1498 1499 setup->span.left[0] = 1000000; /* greater than right[0] */ 1500 setup->span.left[1] = 1000000; /* greater than right[1] */ 1501 1502 return setup; 1503} 1504 1505