lp_setup.c revision b9f2f01315646c3af92e64152f51a593b65a5ac7
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/************************************************************************** 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * All Rights Reserved. 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Permission is hereby granted, free of charge, to any person obtaining a 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * copy of this software and associated documentation files (the 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * "Software"), to deal in the Software without restriction, including 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * without limitation the rights to use, copy, modify, merge, publish, 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * distribute, sub license, and/or sell copies of the Software, and to 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * permit persons to whom the Software is furnished to do so, subject to 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * the following conditions: 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * The above copyright notice and this permission notice (including the 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * next paragraph) shall be included in all copies or substantial portions 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * of the Software. 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) **************************************************************************/ 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/** 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * \brief Primitive rasterization/rendering (points, lines, triangles) 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * \author Keith Whitwell <keith@tungstengraphics.com> 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * \author Brian Paul 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "lp_context.h" 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "lp_prim_setup.h" 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "lp_quad.h" 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "lp_quad_pipe.h" 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "lp_setup.h" 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "lp_state.h" 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "draw/draw_context.h" 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "draw/draw_private.h" 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "draw/draw_vertex.h" 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "pipe/p_shader_tokens.h" 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "pipe/p_thread.h" 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/u_math.h" 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/u_memory.h" 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "lp_tile_soa.h" 497dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 507dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 517dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch#define DEBUG_VERTS 0 52ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#define DEBUG_FRAGS 0 53ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch 54ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch/** 55ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch * Triangle edge info 56ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch */ 57ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdochstruct edge { 58ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch float dx; /**< X(v1) - X(v0), used only during setup */ 59ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch float dy; /**< Y(v1) - Y(v0), used only during setup */ 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float dxdy; /**< dx/dy */ 61 float sx, sy; /**< first sample point coord */ 62 int lines; /**< number of lines on this edge */ 63}; 64 65 66#define MAX_QUADS 16 67 68 69/** 70 * Triangle setup info (derived from draw_stage). 71 * Also used for line drawing (taking some liberties). 72 */ 73struct setup_context { 74 struct llvmpipe_context *llvmpipe; 75 76 /* Vertices are just an array of floats making up each attribute in 77 * turn. Currently fixed at 4 floats, but should change in time. 78 * Codegen will help cope with this. 79 */ 80 const float (*vmax)[4]; 81 const float (*vmid)[4]; 82 const float (*vmin)[4]; 83 const float (*vprovoke)[4]; 84 85 struct edge ebot; 86 struct edge etop; 87 struct edge emaj; 88 89 float oneoverarea; 90 int facing; 91 92 struct quad_header quad[MAX_QUADS]; 93 struct quad_header *quad_ptrs[MAX_QUADS]; 94 unsigned count; 95 96 struct quad_interp_coef coef; 97 98 struct { 99 int left[2]; /**< [0] = row0, [1] = row1 */ 100 int right[2]; 101 int y; 102 } span; 103 104#if DEBUG_FRAGS 105 uint numFragsEmitted; /**< per primitive */ 106 uint numFragsWritten; /**< per primitive */ 107#endif 108 109 unsigned winding; /* which winding to cull */ 110}; 111 112 113 114 115/** 116 * Do triangle cull test using tri determinant (sign indicates orientation) 117 * \return true if triangle is to be culled. 118 */ 119static INLINE boolean 120cull_tri(const struct setup_context *setup, float det) 121{ 122 if (det != 0) { 123 /* if (det < 0 then Z points toward camera and triangle is 124 * counter-clockwise winding. 125 */ 126 unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; 127 128 if ((winding & setup->winding) == 0) 129 return FALSE; 130 } 131 132 /* Culled: 133 */ 134 return TRUE; 135} 136 137 138 139/** 140 * Clip setup->quad against the scissor/surface bounds. 141 */ 142static INLINE void 143quad_clip( struct setup_context *setup, struct quad_header *quad ) 144{ 145 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; 146 const int minx = (int) cliprect->minx; 147 const int maxx = (int) cliprect->maxx; 148 const int miny = (int) cliprect->miny; 149 const int maxy = (int) cliprect->maxy; 150 151 if (quad->input.x0 >= maxx || 152 quad->input.y0 >= maxy || 153 quad->input.x0 + 1 < minx || 154 quad->input.y0 + 1 < miny) { 155 /* totally clipped */ 156 quad->inout.mask = 0x0; 157 return; 158 } 159 if (quad->input.x0 < minx) 160 quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); 161 if (quad->input.y0 < miny) 162 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); 163 if (quad->input.x0 == maxx - 1) 164 quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); 165 if (quad->input.y0 == maxy - 1) 166 quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); 167} 168 169 170 171/** 172 * Given an X or Y coordinate, return the block/quad coordinate that it 173 * belongs to. 174 */ 175static INLINE int block( int x ) 176{ 177 return x & ~(2-1); 178} 179 180static INLINE int block_x( int x ) 181{ 182 return x & ~(TILE_VECTOR_WIDTH - 1); 183} 184 185 186/** 187 * Emit a quad (pass to next stage) with clipping. 188 */ 189static INLINE void 190clip_emit_quad( struct setup_context *setup, struct quad_header *quad ) 191{ 192 quad_clip( setup, quad ); 193 194 if (quad->inout.mask) { 195 struct llvmpipe_context *lp = setup->llvmpipe; 196 197#if 1 198 /* XXX: The blender expects 4 quads. This is far from efficient, but 199 * until we codegenerate single-quad variants of the fragment pipeline 200 * we need this hack. */ 201 const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; 202 struct quad_header quads[nr_quads]; 203 struct quad_header *quad_ptrs[nr_quads]; 204 int x0 = block_x(quad->input.x0); 205 unsigned i; 206 207 for(i = 0; i < nr_quads; ++i) { 208 int x = x0 + 2*i; 209 if(x == quad->input.x0) 210 memcpy(&quads[i], quad, sizeof quads[i]); 211 else { 212 memset(&quads[i], 0, sizeof quads[i]); 213 quads[i].input.x0 = x; 214 quads[i].input.y0 = quad->input.y0; 215 quads[i].coef = quad->coef; 216 } 217 quad_ptrs[i] = &quads[i]; 218 } 219 220 lp->quad.first->run( lp->quad.first, quad_ptrs, nr_quads ); 221#else 222 lp->quad.first->run( lp->quad.first, &quad, 1 ); 223#endif 224 } 225} 226 227 228/** 229 * Render a horizontal span of quads 230 */ 231static void flush_spans( struct setup_context *setup ) 232{ 233 const int step = TILE_VECTOR_WIDTH; 234 const int xleft0 = setup->span.left[0]; 235 const int xleft1 = setup->span.left[1]; 236 const int xright0 = setup->span.right[0]; 237 const int xright1 = setup->span.right[1]; 238 struct quad_stage *pipe = setup->llvmpipe->quad.first; 239 240 241 int minleft = block_x(MIN2(xleft0, xleft1)); 242 int maxright = MAX2(xright0, xright1); 243 int x; 244 245 for (x = minleft; x < maxright; x += step) { 246 unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); 247 unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); 248 unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); 249 unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); 250 unsigned lx = x; 251 const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE; 252 unsigned q = 0; 253 254 unsigned skipmask_left0 = (1U << skip_left0) - 1U; 255 unsigned skipmask_left1 = (1U << skip_left1) - 1U; 256 257 /* These calculations fail when step == 32 and skip_right == 0. 258 */ 259 unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); 260 unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); 261 262 unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; 263 unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; 264 265 if (mask0 | mask1) { 266 for(q = 0; q < nr_quads; ++q) { 267 unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); 268 setup->quad[q].input.x0 = lx; 269 setup->quad[q].input.y0 = setup->span.y; 270 setup->quad[q].inout.mask = quadmask; 271 setup->quad_ptrs[q] = &setup->quad[q]; 272 mask0 >>= 2; 273 mask1 >>= 2; 274 lx += 2; 275 } 276 assert(!(mask0 | mask1)); 277 278 pipe->run( pipe, setup->quad_ptrs, nr_quads ); 279 } 280 } 281 282 283 setup->span.y = 0; 284 setup->span.right[0] = 0; 285 setup->span.right[1] = 0; 286 setup->span.left[0] = 1000000; /* greater than right[0] */ 287 setup->span.left[1] = 1000000; /* greater than right[1] */ 288} 289 290 291#if DEBUG_VERTS 292static void print_vertex(const struct setup_context *setup, 293 const float (*v)[4]) 294{ 295 int i; 296 debug_printf(" Vertex: (%p)\n", v); 297 for (i = 0; i < setup->quad[0].nr_attrs; i++) { 298 debug_printf(" %d: %f %f %f %f\n", i, 299 v[i][0], v[i][1], v[i][2], v[i][3]); 300 if (util_is_inf_or_nan(v[i][0])) { 301 debug_printf(" NaN!\n"); 302 } 303 } 304} 305#endif 306 307/** 308 * Sort the vertices from top to bottom order, setting up the triangle 309 * edge fields (ebot, emaj, etop). 310 * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise 311 */ 312static boolean setup_sort_vertices( struct setup_context *setup, 313 float det, 314 const float (*v0)[4], 315 const float (*v1)[4], 316 const float (*v2)[4] ) 317{ 318 setup->vprovoke = v2; 319 320 /* determine bottom to top order of vertices */ 321 { 322 float y0 = v0[0][1]; 323 float y1 = v1[0][1]; 324 float y2 = v2[0][1]; 325 if (y0 <= y1) { 326 if (y1 <= y2) { 327 /* y0<=y1<=y2 */ 328 setup->vmin = v0; 329 setup->vmid = v1; 330 setup->vmax = v2; 331 } 332 else if (y2 <= y0) { 333 /* y2<=y0<=y1 */ 334 setup->vmin = v2; 335 setup->vmid = v0; 336 setup->vmax = v1; 337 } 338 else { 339 /* y0<=y2<=y1 */ 340 setup->vmin = v0; 341 setup->vmid = v2; 342 setup->vmax = v1; 343 } 344 } 345 else { 346 if (y0 <= y2) { 347 /* y1<=y0<=y2 */ 348 setup->vmin = v1; 349 setup->vmid = v0; 350 setup->vmax = v2; 351 } 352 else if (y2 <= y1) { 353 /* y2<=y1<=y0 */ 354 setup->vmin = v2; 355 setup->vmid = v1; 356 setup->vmax = v0; 357 } 358 else { 359 /* y1<=y2<=y0 */ 360 setup->vmin = v1; 361 setup->vmid = v2; 362 setup->vmax = v0; 363 } 364 } 365 } 366 367 setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; 368 setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; 369 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; 370 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; 371 setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; 372 setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; 373 374 /* 375 * Compute triangle's area. Use 1/area to compute partial 376 * derivatives of attributes later. 377 * 378 * The area will be the same as prim->det, but the sign may be 379 * different depending on how the vertices get sorted above. 380 * 381 * To determine whether the primitive is front or back facing we 382 * use the prim->det value because its sign is correct. 383 */ 384 { 385 const float area = (setup->emaj.dx * setup->ebot.dy - 386 setup->ebot.dx * setup->emaj.dy); 387 388 setup->oneoverarea = 1.0f / area; 389 390 /* 391 debug_printf("%s one-over-area %f area %f det %f\n", 392 __FUNCTION__, setup->oneoverarea, area, det ); 393 */ 394 if (util_is_inf_or_nan(setup->oneoverarea)) 395 return FALSE; 396 } 397 398 /* We need to know if this is a front or back-facing triangle for: 399 * - the GLSL gl_FrontFacing fragment attribute (bool) 400 * - two-sided stencil test 401 */ 402 setup->facing = 403 ((det > 0.0) ^ 404 (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW)); 405 406 return TRUE; 407} 408 409 410/** 411 * Compute a0, dadx and dady for a linearly interpolated coefficient, 412 * for a triangle. 413 */ 414static void tri_pos_coeff( struct setup_context *setup, 415 uint vertSlot, unsigned i) 416{ 417 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; 418 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 419 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 420 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 421 float dadx = a * setup->oneoverarea; 422 float dady = b * setup->oneoverarea; 423 424 assert(i <= 3); 425 426 setup->coef.dadx[0][i] = dadx; 427 setup->coef.dady[0][i] = dady; 428 429 /* calculate a0 as the value which would be sampled for the 430 * fragment at (0,0), taking into account that we want to sample at 431 * pixel centers, in other words (0.5, 0.5). 432 * 433 * this is neat but unfortunately not a good way to do things for 434 * triangles with very large values of dadx or dady as it will 435 * result in the subtraction and re-addition from a0 of a very 436 * large number, which means we'll end up loosing a lot of the 437 * fractional bits and precision from a0. the way to fix this is 438 * to define a0 as the sample at a pixel center somewhere near vmin 439 * instead - i'll switch to this later. 440 */ 441 setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - 442 (dadx * (setup->vmin[0][0] - 0.5f) + 443 dady * (setup->vmin[0][1] - 0.5f))); 444 445 /* 446 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", 447 slot, "xyzw"[i], 448 setup->coef[slot].a0[i], 449 setup->coef[slot].dadx[i], 450 setup->coef[slot].dady[i]); 451 */ 452} 453 454 455/** 456 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 457 * The value value comes from vertex[slot][i]. 458 * The result will be put into setup->coef[slot].a0[i]. 459 * \param slot which attribute slot 460 * \param i which component of the slot (0..3) 461 */ 462static void const_pos_coeff( struct setup_context *setup, 463 uint vertSlot, unsigned i) 464{ 465 setup->coef.dadx[0][i] = 0; 466 setup->coef.dady[0][i] = 0; 467 468 /* need provoking vertex info! 469 */ 470 setup->coef.a0[0][i] = setup->vprovoke[vertSlot][i]; 471} 472 473 474/** 475 * Compute a0 for a constant-valued coefficient (GL_FLAT shading). 476 * The value value comes from vertex[slot][i]. 477 * The result will be put into setup->coef[slot].a0[i]. 478 * \param slot which attribute slot 479 * \param i which component of the slot (0..3) 480 */ 481static void const_coeff( struct setup_context *setup, 482 unsigned attrib, 483 uint vertSlot) 484{ 485 unsigned i; 486 for (i = 0; i < NUM_CHANNELS; ++i) { 487 setup->coef.dadx[1 + attrib][i] = 0; 488 setup->coef.dady[1 + attrib][i] = 0; 489 490 /* need provoking vertex info! 491 */ 492 setup->coef.a0[1 + attrib][i] = setup->vprovoke[vertSlot][i]; 493 } 494} 495 496 497/** 498 * Compute a0, dadx and dady for a linearly interpolated coefficient, 499 * for a triangle. 500 */ 501static void tri_linear_coeff( struct setup_context *setup, 502 unsigned attrib, 503 uint vertSlot) 504{ 505 unsigned i; 506 for (i = 0; i < NUM_CHANNELS; ++i) { 507 float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; 508 float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 509 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 510 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 511 float dadx = a * setup->oneoverarea; 512 float dady = b * setup->oneoverarea; 513 514 assert(i <= 3); 515 516 setup->coef.dadx[1 + attrib][i] = dadx; 517 setup->coef.dady[1 + attrib][i] = dady; 518 519 /* calculate a0 as the value which would be sampled for the 520 * fragment at (0,0), taking into account that we want to sample at 521 * pixel centers, in other words (0.5, 0.5). 522 * 523 * this is neat but unfortunately not a good way to do things for 524 * triangles with very large values of dadx or dady as it will 525 * result in the subtraction and re-addition from a0 of a very 526 * large number, which means we'll end up loosing a lot of the 527 * fractional bits and precision from a0. the way to fix this is 528 * to define a0 as the sample at a pixel center somewhere near vmin 529 * instead - i'll switch to this later. 530 */ 531 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 532 (dadx * (setup->vmin[0][0] - 0.5f) + 533 dady * (setup->vmin[0][1] - 0.5f))); 534 535 /* 536 debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", 537 slot, "xyzw"[i], 538 setup->coef[slot].a0[i], 539 setup->coef[slot].dadx[i], 540 setup->coef[slot].dady[i]); 541 */ 542 } 543} 544 545 546/** 547 * Compute a0, dadx and dady for a perspective-corrected interpolant, 548 * for a triangle. 549 * We basically multiply the vertex value by 1/w before computing 550 * the plane coefficients (a0, dadx, dady). 551 * Later, when we compute the value at a particular fragment position we'll 552 * divide the interpolated value by the interpolated W at that fragment. 553 */ 554static void tri_persp_coeff( struct setup_context *setup, 555 unsigned attrib, 556 uint vertSlot) 557{ 558 unsigned i; 559 for (i = 0; i < NUM_CHANNELS; ++i) { 560 /* premultiply by 1/w (v[0][3] is always W): 561 */ 562 float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; 563 float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; 564 float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; 565 float botda = mida - mina; 566 float majda = maxa - mina; 567 float a = setup->ebot.dy * majda - botda * setup->emaj.dy; 568 float b = setup->emaj.dx * botda - majda * setup->ebot.dx; 569 float dadx = a * setup->oneoverarea; 570 float dady = b * setup->oneoverarea; 571 572 /* 573 debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, 574 setup->vmin[vertSlot][i], 575 setup->vmid[vertSlot][i], 576 setup->vmax[vertSlot][i] 577 ); 578 */ 579 assert(i <= 3); 580 581 setup->coef.dadx[1 + attrib][i] = dadx; 582 setup->coef.dady[1 + attrib][i] = dady; 583 setup->coef.a0[1 + attrib][i] = (mina - 584 (dadx * (setup->vmin[0][0] - 0.5f) + 585 dady * (setup->vmin[0][1] - 0.5f))); 586 } 587} 588 589 590/** 591 * Special coefficient setup for gl_FragCoord. 592 * X and Y are trivial, though Y has to be inverted for OpenGL. 593 * Z and W are copied from posCoef which should have already been computed. 594 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. 595 */ 596static void 597setup_fragcoord_coeff(struct setup_context *setup, uint slot) 598{ 599 /*X*/ 600 setup->coef.a0[1 + slot][0] = 0; 601 setup->coef.dadx[1 + slot][0] = 1.0; 602 setup->coef.dady[1 + slot][0] = 0.0; 603 /*Y*/ 604 setup->coef.a0[1 + slot][1] = 0.0; 605 setup->coef.dadx[1 + slot][1] = 0.0; 606 setup->coef.dady[1 + slot][1] = 1.0; 607 /*Z*/ 608 setup->coef.a0[1 + slot][2] = setup->coef.a0[0][2]; 609 setup->coef.dadx[1 + slot][2] = setup->coef.dadx[0][2]; 610 setup->coef.dady[1 + slot][2] = setup->coef.dady[0][2]; 611 /*W*/ 612 setup->coef.a0[1 + slot][3] = setup->coef.a0[0][3]; 613 setup->coef.dadx[1 + slot][3] = setup->coef.dadx[0][3]; 614 setup->coef.dady[1 + slot][3] = setup->coef.dady[0][3]; 615} 616 617 618 619/** 620 * Compute the setup->coef[] array dadx, dady, a0 values. 621 * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. 622 */ 623static void setup_tri_coefficients( struct setup_context *setup ) 624{ 625 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 626 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 627 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 628 uint fragSlot; 629 630 /* z and w are done by linear interpolation: 631 */ 632 tri_pos_coeff(setup, 0, 2); 633 tri_pos_coeff(setup, 0, 3); 634 635 /* setup interpolation for all the remaining attributes: 636 */ 637 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 638 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 639 640 switch (vinfo->attrib[fragSlot].interp_mode) { 641 case INTERP_CONSTANT: 642 const_coeff(setup, fragSlot, vertSlot); 643 break; 644 case INTERP_LINEAR: 645 tri_linear_coeff(setup, fragSlot, vertSlot); 646 break; 647 case INTERP_PERSPECTIVE: 648 tri_persp_coeff(setup, fragSlot, vertSlot); 649 break; 650 case INTERP_POS: 651 setup_fragcoord_coeff(setup, fragSlot); 652 break; 653 default: 654 assert(0); 655 } 656 657 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 658 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 659 setup->coef.dadx[1 + fragSlot][0] = 0.0; 660 setup->coef.dady[1 + fragSlot][0] = 0.0; 661 } 662 } 663} 664 665 666 667static void setup_tri_edges( struct setup_context *setup ) 668{ 669 float vmin_x = setup->vmin[0][0] + 0.5f; 670 float vmid_x = setup->vmid[0][0] + 0.5f; 671 672 float vmin_y = setup->vmin[0][1] - 0.5f; 673 float vmid_y = setup->vmid[0][1] - 0.5f; 674 float vmax_y = setup->vmax[0][1] - 0.5f; 675 676 setup->emaj.sy = ceilf(vmin_y); 677 setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); 678 setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; 679 setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; 680 681 setup->etop.sy = ceilf(vmid_y); 682 setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); 683 setup->etop.dxdy = setup->etop.dx / setup->etop.dy; 684 setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; 685 686 setup->ebot.sy = ceilf(vmin_y); 687 setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); 688 setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; 689 setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; 690} 691 692 693/** 694 * Render the upper or lower half of a triangle. 695 * Scissoring/cliprect is applied here too. 696 */ 697static void subtriangle( struct setup_context *setup, 698 struct edge *eleft, 699 struct edge *eright, 700 unsigned lines ) 701{ 702 const struct pipe_scissor_state *cliprect = &setup->llvmpipe->cliprect; 703 const int minx = (int) cliprect->minx; 704 const int maxx = (int) cliprect->maxx; 705 const int miny = (int) cliprect->miny; 706 const int maxy = (int) cliprect->maxy; 707 int y, start_y, finish_y; 708 int sy = (int)eleft->sy; 709 710 assert((int)eleft->sy == (int) eright->sy); 711 712 /* clip top/bottom */ 713 start_y = sy; 714 if (start_y < miny) 715 start_y = miny; 716 717 finish_y = sy + lines; 718 if (finish_y > maxy) 719 finish_y = maxy; 720 721 start_y -= sy; 722 finish_y -= sy; 723 724 /* 725 debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); 726 */ 727 728 for (y = start_y; y < finish_y; y++) { 729 730 /* avoid accumulating adds as floats don't have the precision to 731 * accurately iterate large triangle edges that way. luckily we 732 * can just multiply these days. 733 * 734 * this is all drowned out by the attribute interpolation anyway. 735 */ 736 int left = (int)(eleft->sx + y * eleft->dxdy); 737 int right = (int)(eright->sx + y * eright->dxdy); 738 739 /* clip left/right */ 740 if (left < minx) 741 left = minx; 742 if (right > maxx) 743 right = maxx; 744 745 if (left < right) { 746 int _y = sy + y; 747 if (block(_y) != setup->span.y) { 748 flush_spans(setup); 749 setup->span.y = block(_y); 750 } 751 752 setup->span.left[_y&1] = left; 753 setup->span.right[_y&1] = right; 754 } 755 } 756 757 758 /* save the values so that emaj can be restarted: 759 */ 760 eleft->sx += lines * eleft->dxdy; 761 eright->sx += lines * eright->dxdy; 762 eleft->sy += lines; 763 eright->sy += lines; 764} 765 766 767/** 768 * Recalculate prim's determinant. This is needed as we don't have 769 * get this information through the vbuf_render interface & we must 770 * calculate it here. 771 */ 772static float 773calc_det( const float (*v0)[4], 774 const float (*v1)[4], 775 const float (*v2)[4] ) 776{ 777 /* edge vectors e = v0 - v2, f = v1 - v2 */ 778 const float ex = v0[0][0] - v2[0][0]; 779 const float ey = v0[0][1] - v2[0][1]; 780 const float fx = v1[0][0] - v2[0][0]; 781 const float fy = v1[0][1] - v2[0][1]; 782 783 /* det = cross(e,f).z */ 784 return ex * fy - ey * fx; 785} 786 787 788/** 789 * Do setup for triangle rasterization, then render the triangle. 790 */ 791void llvmpipe_setup_tri( struct setup_context *setup, 792 const float (*v0)[4], 793 const float (*v1)[4], 794 const float (*v2)[4] ) 795{ 796 float det; 797 798#if DEBUG_VERTS 799 debug_printf("Setup triangle:\n"); 800 print_vertex(setup, v0); 801 print_vertex(setup, v1); 802 print_vertex(setup, v2); 803#endif 804 805 if (setup->llvmpipe->no_rast) 806 return; 807 808 det = calc_det(v0, v1, v2); 809 /* 810 debug_printf("%s\n", __FUNCTION__ ); 811 */ 812 813#if DEBUG_FRAGS 814 setup->numFragsEmitted = 0; 815 setup->numFragsWritten = 0; 816#endif 817 818 if (cull_tri( setup, det )) 819 return; 820 821 if (!setup_sort_vertices( setup, det, v0, v1, v2 )) 822 return; 823 setup_tri_coefficients( setup ); 824 setup_tri_edges( setup ); 825 826 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_TRIANGLES); 827 828 setup->span.y = 0; 829 setup->span.right[0] = 0; 830 setup->span.right[1] = 0; 831 /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ 832 833 /* init_constant_attribs( setup ); */ 834 835 if (setup->oneoverarea < 0.0) { 836 /* emaj on left: 837 */ 838 subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); 839 subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); 840 } 841 else { 842 /* emaj on right: 843 */ 844 subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); 845 subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); 846 } 847 848 flush_spans( setup ); 849 850#if DEBUG_FRAGS 851 printf("Tri: %u frags emitted, %u written\n", 852 setup->numFragsEmitted, 853 setup->numFragsWritten); 854#endif 855} 856 857 858 859/** 860 * Compute a0, dadx and dady for a linearly interpolated coefficient, 861 * for a line. 862 */ 863static void 864linear_pos_coeff(struct setup_context *setup, 865 uint vertSlot, uint i) 866{ 867 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 868 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 869 const float dady = da * setup->emaj.dy * setup->oneoverarea; 870 setup->coef.dadx[0][i] = dadx; 871 setup->coef.dady[0][i] = dady; 872 setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] - 873 (dadx * (setup->vmin[0][0] - 0.5f) + 874 dady * (setup->vmin[0][1] - 0.5f))); 875} 876 877 878/** 879 * Compute a0, dadx and dady for a linearly interpolated coefficient, 880 * for a line. 881 */ 882static void 883line_linear_coeff(struct setup_context *setup, 884 unsigned attrib, 885 uint vertSlot) 886{ 887 unsigned i; 888 for (i = 0; i < NUM_CHANNELS; ++i) { 889 const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; 890 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 891 const float dady = da * setup->emaj.dy * setup->oneoverarea; 892 setup->coef.dadx[1 + attrib][i] = dadx; 893 setup->coef.dady[1 + attrib][i] = dady; 894 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 895 (dadx * (setup->vmin[0][0] - 0.5f) + 896 dady * (setup->vmin[0][1] - 0.5f))); 897 } 898} 899 900 901/** 902 * Compute a0, dadx and dady for a perspective-corrected interpolant, 903 * for a line. 904 */ 905static void 906line_persp_coeff(struct setup_context *setup, 907 unsigned attrib, 908 uint vertSlot) 909{ 910 unsigned i; 911 for (i = 0; i < NUM_CHANNELS; ++i) { 912 /* XXX double-check/verify this arithmetic */ 913 const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; 914 const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; 915 const float da = a1 - a0; 916 const float dadx = da * setup->emaj.dx * setup->oneoverarea; 917 const float dady = da * setup->emaj.dy * setup->oneoverarea; 918 setup->coef.dadx[1 + attrib][i] = dadx; 919 setup->coef.dady[1 + attrib][i] = dady; 920 setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] - 921 (dadx * (setup->vmin[0][0] - 0.5f) + 922 dady * (setup->vmin[0][1] - 0.5f))); 923 } 924} 925 926 927/** 928 * Compute the setup->coef[] array dadx, dady, a0 values. 929 * Must be called after setup->vmin,vmax are initialized. 930 */ 931static INLINE boolean 932setup_line_coefficients(struct setup_context *setup, 933 const float (*v0)[4], 934 const float (*v1)[4]) 935{ 936 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 937 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 938 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 939 uint fragSlot; 940 float area; 941 942 /* use setup->vmin, vmax to point to vertices */ 943 if (llvmpipe->rasterizer->flatshade_first) 944 setup->vprovoke = v0; 945 else 946 setup->vprovoke = v1; 947 setup->vmin = v0; 948 setup->vmax = v1; 949 950 setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; 951 setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; 952 953 /* NOTE: this is not really area but something proportional to it */ 954 area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; 955 if (area == 0.0f || util_is_inf_or_nan(area)) 956 return FALSE; 957 setup->oneoverarea = 1.0f / area; 958 959 /* z and w are done by linear interpolation: 960 */ 961 linear_pos_coeff(setup, 0, 2); 962 linear_pos_coeff(setup, 0, 3); 963 964 /* setup interpolation for all the remaining attributes: 965 */ 966 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 967 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 968 969 switch (vinfo->attrib[fragSlot].interp_mode) { 970 case INTERP_CONSTANT: 971 const_coeff(setup, fragSlot, vertSlot); 972 break; 973 case INTERP_LINEAR: 974 line_linear_coeff(setup, fragSlot, vertSlot); 975 break; 976 case INTERP_PERSPECTIVE: 977 line_persp_coeff(setup, fragSlot, vertSlot); 978 break; 979 case INTERP_POS: 980 setup_fragcoord_coeff(setup, fragSlot); 981 break; 982 default: 983 assert(0); 984 } 985 986 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 987 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 988 setup->coef.dadx[1 + fragSlot][0] = 0.0; 989 setup->coef.dady[1 + fragSlot][0] = 0.0; 990 } 991 } 992 return TRUE; 993} 994 995 996/** 997 * Plot a pixel in a line segment. 998 */ 999static INLINE void 1000plot(struct setup_context *setup, int x, int y) 1001{ 1002 const int iy = y & 1; 1003 const int ix = x & 1; 1004 const int quadX = x - ix; 1005 const int quadY = y - iy; 1006 const int mask = (1 << ix) << (2 * iy); 1007 1008 if (quadX != setup->quad[0].input.x0 || 1009 quadY != setup->quad[0].input.y0) 1010 { 1011 /* flush prev quad, start new quad */ 1012 1013 if (setup->quad[0].input.x0 != -1) 1014 clip_emit_quad( setup, &setup->quad[0] ); 1015 1016 setup->quad[0].input.x0 = quadX; 1017 setup->quad[0].input.y0 = quadY; 1018 setup->quad[0].inout.mask = 0x0; 1019 } 1020 1021 setup->quad[0].inout.mask |= mask; 1022} 1023 1024 1025/** 1026 * Do setup for line rasterization, then render the line. 1027 * Single-pixel width, no stipple, etc. We rely on the 'draw' module 1028 * to handle stippling and wide lines. 1029 */ 1030void 1031llvmpipe_setup_line(struct setup_context *setup, 1032 const float (*v0)[4], 1033 const float (*v1)[4]) 1034{ 1035 int x0 = (int) v0[0][0]; 1036 int x1 = (int) v1[0][0]; 1037 int y0 = (int) v0[0][1]; 1038 int y1 = (int) v1[0][1]; 1039 int dx = x1 - x0; 1040 int dy = y1 - y0; 1041 int xstep, ystep; 1042 1043#if DEBUG_VERTS 1044 debug_printf("Setup line:\n"); 1045 print_vertex(setup, v0); 1046 print_vertex(setup, v1); 1047#endif 1048 1049 if (setup->llvmpipe->no_rast) 1050 return; 1051 1052 if (dx == 0 && dy == 0) 1053 return; 1054 1055 if (!setup_line_coefficients(setup, v0, v1)) 1056 return; 1057 1058 assert(v0[0][0] < 1.0e9); 1059 assert(v0[0][1] < 1.0e9); 1060 assert(v1[0][0] < 1.0e9); 1061 assert(v1[0][1] < 1.0e9); 1062 1063 if (dx < 0) { 1064 dx = -dx; /* make positive */ 1065 xstep = -1; 1066 } 1067 else { 1068 xstep = 1; 1069 } 1070 1071 if (dy < 0) { 1072 dy = -dy; /* make positive */ 1073 ystep = -1; 1074 } 1075 else { 1076 ystep = 1; 1077 } 1078 1079 assert(dx >= 0); 1080 assert(dy >= 0); 1081 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_LINES); 1082 1083 setup->quad[0].input.x0 = setup->quad[0].input.y0 = -1; 1084 setup->quad[0].inout.mask = 0x0; 1085 1086 /* XXX temporary: set coverage to 1.0 so the line appears 1087 * if AA mode happens to be enabled. 1088 */ 1089 setup->quad[0].input.coverage[0] = 1090 setup->quad[0].input.coverage[1] = 1091 setup->quad[0].input.coverage[2] = 1092 setup->quad[0].input.coverage[3] = 1.0; 1093 1094 if (dx > dy) { 1095 /*** X-major line ***/ 1096 int i; 1097 const int errorInc = dy + dy; 1098 int error = errorInc - dx; 1099 const int errorDec = error - dx; 1100 1101 for (i = 0; i < dx; i++) { 1102 plot(setup, x0, y0); 1103 1104 x0 += xstep; 1105 if (error < 0) { 1106 error += errorInc; 1107 } 1108 else { 1109 error += errorDec; 1110 y0 += ystep; 1111 } 1112 } 1113 } 1114 else { 1115 /*** Y-major line ***/ 1116 int i; 1117 const int errorInc = dx + dx; 1118 int error = errorInc - dy; 1119 const int errorDec = error - dy; 1120 1121 for (i = 0; i < dy; i++) { 1122 plot(setup, x0, y0); 1123 1124 y0 += ystep; 1125 if (error < 0) { 1126 error += errorInc; 1127 } 1128 else { 1129 error += errorDec; 1130 x0 += xstep; 1131 } 1132 } 1133 } 1134 1135 /* draw final quad */ 1136 if (setup->quad[0].inout.mask) { 1137 clip_emit_quad( setup, &setup->quad[0] ); 1138 } 1139} 1140 1141 1142static void 1143point_persp_coeff(struct setup_context *setup, 1144 const float (*vert)[4], 1145 unsigned attrib, 1146 uint vertSlot) 1147{ 1148 unsigned i; 1149 for(i = 0; i < NUM_CHANNELS; ++i) { 1150 setup->coef.dadx[1 + attrib][i] = 0.0F; 1151 setup->coef.dady[1 + attrib][i] = 0.0F; 1152 setup->coef.a0[1 + attrib][i] = vert[vertSlot][i] * vert[0][3]; 1153 } 1154} 1155 1156 1157/** 1158 * Do setup for point rasterization, then render the point. 1159 * Round or square points... 1160 * XXX could optimize a lot for 1-pixel points. 1161 */ 1162void 1163llvmpipe_setup_point( struct setup_context *setup, 1164 const float (*v0)[4] ) 1165{ 1166 struct llvmpipe_context *llvmpipe = setup->llvmpipe; 1167 const struct lp_fragment_shader *lpfs = llvmpipe->fs; 1168 const int sizeAttr = setup->llvmpipe->psize_slot; 1169 const float size 1170 = sizeAttr > 0 ? v0[sizeAttr][0] 1171 : setup->llvmpipe->rasterizer->point_size; 1172 const float halfSize = 0.5F * size; 1173 const boolean round = (boolean) setup->llvmpipe->rasterizer->point_smooth; 1174 const float x = v0[0][0]; /* Note: data[0] is always position */ 1175 const float y = v0[0][1]; 1176 const struct vertex_info *vinfo = llvmpipe_get_vertex_info(llvmpipe); 1177 uint fragSlot; 1178 1179#if DEBUG_VERTS 1180 debug_printf("Setup point:\n"); 1181 print_vertex(setup, v0); 1182#endif 1183 1184 if (llvmpipe->no_rast) 1185 return; 1186 1187 assert(setup->llvmpipe->reduced_prim == PIPE_PRIM_POINTS); 1188 1189 /* For points, all interpolants are constant-valued. 1190 * However, for point sprites, we'll need to setup texcoords appropriately. 1191 * XXX: which coefficients are the texcoords??? 1192 * We may do point sprites as textured quads... 1193 * 1194 * KW: We don't know which coefficients are texcoords - ultimately 1195 * the choice of what interpolation mode to use for each attribute 1196 * should be determined by the fragment program, using 1197 * per-attribute declaration statements that include interpolation 1198 * mode as a parameter. So either the fragment program will have 1199 * to be adjusted for pointsprite vs normal point behaviour, or 1200 * otherwise a special interpolation mode will have to be defined 1201 * which matches the required behaviour for point sprites. But - 1202 * the latter is not a feature of normal hardware, and as such 1203 * probably should be ruled out on that basis. 1204 */ 1205 setup->vprovoke = v0; 1206 1207 /* setup Z, W */ 1208 const_pos_coeff(setup, 0, 2); 1209 const_pos_coeff(setup, 0, 3); 1210 1211 for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) { 1212 const uint vertSlot = vinfo->attrib[fragSlot].src_index; 1213 1214 switch (vinfo->attrib[fragSlot].interp_mode) { 1215 case INTERP_CONSTANT: 1216 /* fall-through */ 1217 case INTERP_LINEAR: 1218 const_coeff(setup, fragSlot, vertSlot); 1219 break; 1220 case INTERP_PERSPECTIVE: 1221 point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot); 1222 break; 1223 case INTERP_POS: 1224 setup_fragcoord_coeff(setup, fragSlot); 1225 break; 1226 default: 1227 assert(0); 1228 } 1229 1230 if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) { 1231 setup->coef.a0[1 + fragSlot][0] = 1.0f - setup->facing; 1232 setup->coef.dadx[1 + fragSlot][0] = 0.0; 1233 setup->coef.dady[1 + fragSlot][0] = 0.0; 1234 } 1235 } 1236 1237 1238 if (halfSize <= 0.5 && !round) { 1239 /* special case for 1-pixel points */ 1240 const int ix = ((int) x) & 1; 1241 const int iy = ((int) y) & 1; 1242 setup->quad[0].input.x0 = (int) x - ix; 1243 setup->quad[0].input.y0 = (int) y - iy; 1244 setup->quad[0].inout.mask = (1 << ix) << (2 * iy); 1245 clip_emit_quad( setup, &setup->quad[0] ); 1246 } 1247 else { 1248 if (round) { 1249 /* rounded points */ 1250 const int ixmin = block((int) (x - halfSize)); 1251 const int ixmax = block((int) (x + halfSize)); 1252 const int iymin = block((int) (y - halfSize)); 1253 const int iymax = block((int) (y + halfSize)); 1254 const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ 1255 const float rmax = halfSize + 0.7071F; 1256 const float rmin2 = MAX2(0.0F, rmin * rmin); 1257 const float rmax2 = rmax * rmax; 1258 const float cscale = 1.0F / (rmax2 - rmin2); 1259 int ix, iy; 1260 1261 for (iy = iymin; iy <= iymax; iy += 2) { 1262 for (ix = ixmin; ix <= ixmax; ix += 2) { 1263 float dx, dy, dist2, cover; 1264 1265 setup->quad[0].inout.mask = 0x0; 1266 1267 dx = (ix + 0.5f) - x; 1268 dy = (iy + 0.5f) - y; 1269 dist2 = dx * dx + dy * dy; 1270 if (dist2 <= rmax2) { 1271 cover = 1.0F - (dist2 - rmin2) * cscale; 1272 setup->quad[0].input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); 1273 setup->quad[0].inout.mask |= MASK_TOP_LEFT; 1274 } 1275 1276 dx = (ix + 1.5f) - x; 1277 dy = (iy + 0.5f) - y; 1278 dist2 = dx * dx + dy * dy; 1279 if (dist2 <= rmax2) { 1280 cover = 1.0F - (dist2 - rmin2) * cscale; 1281 setup->quad[0].input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); 1282 setup->quad[0].inout.mask |= MASK_TOP_RIGHT; 1283 } 1284 1285 dx = (ix + 0.5f) - x; 1286 dy = (iy + 1.5f) - y; 1287 dist2 = dx * dx + dy * dy; 1288 if (dist2 <= rmax2) { 1289 cover = 1.0F - (dist2 - rmin2) * cscale; 1290 setup->quad[0].input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); 1291 setup->quad[0].inout.mask |= MASK_BOTTOM_LEFT; 1292 } 1293 1294 dx = (ix + 1.5f) - x; 1295 dy = (iy + 1.5f) - y; 1296 dist2 = dx * dx + dy * dy; 1297 if (dist2 <= rmax2) { 1298 cover = 1.0F - (dist2 - rmin2) * cscale; 1299 setup->quad[0].input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); 1300 setup->quad[0].inout.mask |= MASK_BOTTOM_RIGHT; 1301 } 1302 1303 if (setup->quad[0].inout.mask) { 1304 setup->quad[0].input.x0 = ix; 1305 setup->quad[0].input.y0 = iy; 1306 clip_emit_quad( setup, &setup->quad[0] ); 1307 } 1308 } 1309 } 1310 } 1311 else { 1312 /* square points */ 1313 const int xmin = (int) (x + 0.75 - halfSize); 1314 const int ymin = (int) (y + 0.25 - halfSize); 1315 const int xmax = xmin + (int) size; 1316 const int ymax = ymin + (int) size; 1317 /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ 1318 const int ixmin = block(xmin); 1319 const int ixmax = block(xmax - 1); 1320 const int iymin = block(ymin); 1321 const int iymax = block(ymax - 1); 1322 int ix, iy; 1323 1324 /* 1325 debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); 1326 */ 1327 for (iy = iymin; iy <= iymax; iy += 2) { 1328 uint rowMask = 0xf; 1329 if (iy < ymin) { 1330 /* above the top edge */ 1331 rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); 1332 } 1333 if (iy + 1 >= ymax) { 1334 /* below the bottom edge */ 1335 rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); 1336 } 1337 1338 for (ix = ixmin; ix <= ixmax; ix += 2) { 1339 uint mask = rowMask; 1340 1341 if (ix < xmin) { 1342 /* fragment is past left edge of point, turn off left bits */ 1343 mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); 1344 } 1345 if (ix + 1 >= xmax) { 1346 /* past the right edge */ 1347 mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); 1348 } 1349 1350 setup->quad[0].inout.mask = mask; 1351 setup->quad[0].input.x0 = ix; 1352 setup->quad[0].input.y0 = iy; 1353 clip_emit_quad( setup, &setup->quad[0] ); 1354 } 1355 } 1356 } 1357 } 1358} 1359 1360void llvmpipe_setup_prepare( struct setup_context *setup ) 1361{ 1362 struct llvmpipe_context *lp = setup->llvmpipe; 1363 1364 if (lp->dirty) { 1365 llvmpipe_update_derived(lp); 1366 } 1367 1368 lp->quad.first->begin( lp->quad.first ); 1369 1370 if (lp->reduced_api_prim == PIPE_PRIM_TRIANGLES && 1371 lp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && 1372 lp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { 1373 /* we'll do culling */ 1374 setup->winding = lp->rasterizer->cull_mode; 1375 } 1376 else { 1377 /* 'draw' will do culling */ 1378 setup->winding = PIPE_WINDING_NONE; 1379 } 1380} 1381 1382 1383 1384void llvmpipe_setup_destroy_context( struct setup_context *setup ) 1385{ 1386 align_free( setup ); 1387} 1388 1389 1390/** 1391 * Create a new primitive setup/render stage. 1392 */ 1393struct setup_context *llvmpipe_setup_create_context( struct llvmpipe_context *llvmpipe ) 1394{ 1395 struct setup_context *setup; 1396 unsigned i; 1397 1398 setup = align_malloc(sizeof(struct setup_context), 16); 1399 if (!setup) 1400 return NULL; 1401 1402 memset(setup, 0, sizeof *setup); 1403 setup->llvmpipe = llvmpipe; 1404 1405 for (i = 0; i < MAX_QUADS; i++) { 1406 setup->quad[i].coef = &setup->coef; 1407 } 1408 1409 setup->span.left[0] = 1000000; /* greater than right[0] */ 1410 setup->span.left[1] = 1000000; /* greater than right[1] */ 1411 1412 return setup; 1413} 1414 1415