t_vp_build.c revision dbeea25bb834479a29712100888c862348112018
1/* 2 * Mesa 3-D graphics library 3 * Version: 6.3 4 * 5 * Copyright (C) 2005 Tungsten Graphics All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 21 * WHETHER IN 22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/** 27 * \file t_vp_build.c 28 * Create a vertex program to execute the current fixed function T&L pipeline. 29 * \author Keith Whitwell 30 */ 31 32 33#include <strings.h> 34 35#include "glheader.h" 36#include "macros.h" 37#include "enums.h" 38#include "t_context.h" 39#include "t_vp_build.h" 40 41#include "shader/program.h" 42#include "shader/nvvertprog.h" 43#include "shader/arbvertparse.h" 44 45 46/* Very useful debugging tool - produces annotated listing of 47 * generated program with line/function references for each 48 * instruction back into this file: 49 */ 50#define DISASSEM 0 51 52/* Use uregs to represent registers internally, translate to Mesa's 53 * expected formats on emit. 54 * 55 * NOTE: These are passed by value extensively in this file rather 56 * than as usual by pointer reference. If this disturbs you, try 57 * remembering they are just 32bits in size. 58 * 59 * GCC is smart enough to deal with these dword-sized structures in 60 * much the same way as if I had defined them as dwords and was using 61 * macros to access and set the fields. This is much nicer and easier 62 * to evolve. 63 */ 64struct ureg { 65 GLuint file:4; 66 GLuint idx:8; 67 GLuint negate:1; 68 GLuint swz:12; 69 GLuint pad:7; 70}; 71 72 73struct tnl_program { 74 GLcontext *ctx; 75 struct vertex_program *program; 76 77 GLuint temp_flag; 78 GLuint temp_reserved; 79 80 struct ureg eye_position; 81 struct ureg eye_position_normalized; 82 struct ureg eye_normal; 83 struct ureg identity; 84 85 GLuint materials; 86 GLuint color_materials; 87}; 88 89 90const static struct ureg undef = { 91 ~0, 92 ~0, 93 0, 94 0, 95 0 96}; 97 98/* Local shorthand: 99 */ 100#define X SWIZZLE_X 101#define Y SWIZZLE_Y 102#define Z SWIZZLE_Z 103#define W SWIZZLE_W 104 105 106/* Construct a ureg: 107 */ 108static struct ureg make_ureg(GLuint file, GLuint idx) 109{ 110 struct ureg reg; 111 reg.file = file; 112 reg.idx = idx; 113 reg.negate = 0; 114 reg.swz = SWIZZLE_NOOP; 115 reg.pad = 0; 116 return reg; 117} 118 119 120 121static struct ureg negate( struct ureg reg ) 122{ 123 reg.negate ^= 1; 124 return reg; 125} 126 127 128static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 129{ 130 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 131 GET_SWZ(reg.swz, y), 132 GET_SWZ(reg.swz, z), 133 GET_SWZ(reg.swz, w)); 134 135 return reg; 136} 137 138static struct ureg swizzle1( struct ureg reg, int x ) 139{ 140 return swizzle(reg, x, x, x, x); 141} 142 143static struct ureg get_temp( struct tnl_program *p ) 144{ 145 int bit = ffs( ~p->temp_flag ); 146 if (!bit) { 147 fprintf(stderr, "%s: out of temporaries\n", __FILE__); 148 exit(1); 149 } 150 151 p->temp_flag |= 1<<(bit-1); 152 return make_ureg(PROGRAM_TEMPORARY, bit-1); 153} 154 155static struct ureg reserve_temp( struct tnl_program *p ) 156{ 157 struct ureg temp = get_temp( p ); 158 p->temp_reserved |= 1<<temp.idx; 159 return temp; 160} 161 162static void release_temp( struct tnl_program *p, struct ureg reg ) 163{ 164 if (reg.file == PROGRAM_TEMPORARY) { 165 p->temp_flag &= ~(1<<reg.idx); 166 p->temp_flag |= p->temp_reserved; /* can't release reserved temps */ 167 } 168} 169 170static void release_temps( struct tnl_program *p ) 171{ 172 p->temp_flag = p->temp_reserved; 173} 174 175 176 177static struct ureg register_input( struct tnl_program *p, GLuint input ) 178{ 179 p->program->InputsRead |= (1<<input); 180 return make_ureg(PROGRAM_INPUT, input); 181} 182 183static struct ureg register_output( struct tnl_program *p, GLuint output ) 184{ 185 p->program->OutputsWritten |= (1<<output); 186 return make_ureg(PROGRAM_OUTPUT, output); 187} 188 189static struct ureg register_const4f( struct tnl_program *p, 190 GLfloat s0, 191 GLfloat s1, 192 GLfloat s2, 193 GLfloat s3) 194{ 195 GLfloat values[4]; 196 GLuint idx; 197 values[0] = s0; 198 values[1] = s1; 199 values[2] = s2; 200 values[3] = s3; 201 idx = _mesa_add_unnamed_constant( p->program->Parameters, values ); 202 return make_ureg(PROGRAM_STATE_VAR, idx); 203} 204 205#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 206#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 207#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 208 209static GLboolean is_undef( struct ureg reg ) 210{ 211 return reg.file == 0xf; 212} 213 214static struct ureg get_identity_param( struct tnl_program *p ) 215{ 216 if (is_undef(p->identity)) 217 p->identity = register_const4f(p, 0,0,0,1); 218 219 return p->identity; 220} 221 222static struct ureg register_param6( struct tnl_program *p, 223 GLint s0, 224 GLint s1, 225 GLint s2, 226 GLint s3, 227 GLint s4, 228 GLint s5) 229{ 230 GLint tokens[6]; 231 GLuint idx; 232 tokens[0] = s0; 233 tokens[1] = s1; 234 tokens[2] = s2; 235 tokens[3] = s3; 236 tokens[4] = s4; 237 tokens[5] = s5; 238 idx = _mesa_add_state_reference( p->program->Parameters, tokens ); 239 return make_ureg(PROGRAM_STATE_VAR, idx); 240} 241 242 243#define register_param1(p,s0) register_param6(p,s0,0,0,0,0,0) 244#define register_param2(p,s0,s1) register_param6(p,s0,s1,0,0,0,0) 245#define register_param3(p,s0,s1,s2) register_param6(p,s0,s1,s2,0,0,0) 246#define register_param4(p,s0,s1,s2,s3) register_param6(p,s0,s1,s2,s3,0,0) 247 248 249static void register_matrix_param6( struct tnl_program *p, 250 GLint s0, 251 GLint s1, 252 GLint s2, 253 GLint s3, 254 GLint s4, 255 GLint s5, 256 struct ureg *matrix ) 257{ 258 GLuint i; 259 260 /* This is a bit sad as the support is there to pull the whole 261 * matrix out in one go: 262 */ 263 for (i = 0; i <= s4 - s3; i++) 264 matrix[i] = register_param6( p, s0, s1, s2, i, i, s5 ); 265} 266 267 268static void emit_arg( struct vp_src_register *src, 269 struct ureg reg ) 270{ 271 src->File = reg.file; 272 src->Index = reg.idx; 273 src->Swizzle = reg.swz; 274 src->Negate = reg.negate; 275 src->RelAddr = 0; 276 src->pad = 0; 277} 278 279static void emit_dst( struct vp_dst_register *dst, 280 struct ureg reg, GLuint mask ) 281{ 282 dst->File = reg.file; 283 dst->Index = reg.idx; 284 /* allow zero as a shorthand for xyzw */ 285 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 286 dst->pad = 0; 287} 288 289static void debug_insn( struct vp_instruction *inst, const char *fn, 290 GLuint line ) 291{ 292#if DISASSEM 293 static const char *last_fn; 294 295 if (fn != last_fn) { 296 last_fn = fn; 297 _mesa_printf("%s:\n", fn); 298 } 299 300 _mesa_printf("%d:\t", line); 301 _mesa_debug_vp_inst(1, inst); 302#endif 303} 304 305 306static void emit_op3fn(struct tnl_program *p, 307 GLuint op, 308 struct ureg dest, 309 GLuint mask, 310 struct ureg src0, 311 struct ureg src1, 312 struct ureg src2, 313 const char *fn, 314 GLuint line) 315{ 316 GLuint nr = p->program->Base.NumInstructions++; 317 struct vp_instruction *inst = &p->program->Instructions[nr]; 318 319 inst->Opcode = op; 320 inst->StringPos = 0; 321 inst->Data = 0; 322 323 emit_arg( &inst->SrcReg[0], src0 ); 324 emit_arg( &inst->SrcReg[1], src1 ); 325 emit_arg( &inst->SrcReg[2], src2 ); 326 327 emit_dst( &inst->DstReg, dest, mask ); 328 329 debug_insn(inst, fn, line); 330} 331 332 333 334#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 335 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 336 337#define emit_op2(p, op, dst, mask, src0, src1) \ 338 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 339 340#define emit_op1(p, op, dst, mask, src0) \ 341 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 342 343 344static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 345{ 346 if (reg.file == PROGRAM_TEMPORARY && 347 !(p->temp_reserved & (1<<reg.idx))) 348 return reg; 349 else { 350 struct ureg temp = get_temp(p); 351 emit_op1(p, VP_OPCODE_MOV, temp, 0, reg); 352 return temp; 353 } 354} 355 356 357/* Currently no tracking performed of input/output/register size or 358 * active elements. Could be used to reduce these operations, as 359 * could the matrix type. 360 */ 361static void emit_matrix_transform_vec4( struct tnl_program *p, 362 struct ureg dest, 363 const struct ureg *mat, 364 struct ureg src) 365{ 366 emit_op2(p, VP_OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 367 emit_op2(p, VP_OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 368 emit_op2(p, VP_OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 369 emit_op2(p, VP_OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 370} 371 372/* This version is much easier to implement if writemasks are not 373 * supported natively on the target or (like SSE), the target doesn't 374 * have a clean/obvious dotproduct implementation. 375 */ 376static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 377 struct ureg dest, 378 const struct ureg *mat, 379 struct ureg src) 380{ 381 struct ureg tmp; 382 383 if (dest.file != PROGRAM_TEMPORARY) 384 tmp = get_temp(p); 385 else 386 tmp = dest; 387 388 emit_op2(p, VP_OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 389 emit_op3(p, VP_OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 390 emit_op3(p, VP_OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 391 emit_op3(p, VP_OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 392 393 if (dest.file != PROGRAM_TEMPORARY) 394 release_temp(p, tmp); 395} 396 397static void emit_matrix_transform_vec3( struct tnl_program *p, 398 struct ureg dest, 399 const struct ureg *mat, 400 struct ureg src) 401{ 402 emit_op2(p, VP_OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 403 emit_op2(p, VP_OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 404 emit_op2(p, VP_OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 405} 406 407 408static void emit_normalize_vec3( struct tnl_program *p, 409 struct ureg dest, 410 struct ureg src ) 411{ 412 struct ureg tmp = get_temp(p); 413 emit_op2(p, VP_OPCODE_DP3, tmp, 0, src, src); 414 emit_op1(p, VP_OPCODE_RSQ, tmp, 0, tmp); 415 emit_op2(p, VP_OPCODE_MUL, dest, 0, src, tmp); 416 release_temp(p, tmp); 417} 418 419static struct ureg get_eye_position( struct tnl_program *p ) 420{ 421 if (is_undef(p->eye_position)) { 422 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 423 struct ureg modelview[4]; 424 425 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, 426 STATE_MATRIX_TRANSPOSE, modelview ); 427 p->eye_position = reserve_temp(p); 428 429 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 430 } 431 432 return p->eye_position; 433} 434 435 436static struct ureg get_eye_position_normalized( struct tnl_program *p ) 437{ 438 if (is_undef(p->eye_position_normalized)) { 439 struct ureg eye = get_eye_position(p); 440 p->eye_position_normalized = reserve_temp(p); 441 emit_normalize_vec3(p, p->eye_position_normalized, eye); 442 } 443 444 return p->eye_position_normalized; 445} 446 447 448static struct ureg get_eye_normal( struct tnl_program *p ) 449{ 450 if (is_undef(p->eye_normal)) { 451 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 452 struct ureg mvinv[3]; 453 454 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 2, 455 STATE_MATRIX_INVTRANS, mvinv ); 456 457 p->eye_normal = reserve_temp(p); 458 459 /* Transform to eye space: 460 */ 461 emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal ); 462 463 /* Normalize/Rescale: 464 */ 465 if (p->ctx->Transform.Normalize) { 466 emit_normalize_vec3( p, p->eye_normal, p->eye_normal ); 467 } 468 else if (p->ctx->Transform.RescaleNormals) { 469 struct ureg rescale = register_param2(p, STATE_INTERNAL, 470 STATE_NORMAL_SCALE); 471 472 emit_op2( p, VP_OPCODE_MUL, p->eye_normal, 0, normal, 473 swizzle1(rescale, X)); 474 } 475 } 476 477 return p->eye_normal; 478} 479 480 481 482static void build_hpos( struct tnl_program *p ) 483{ 484 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 485 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 486 struct ureg mvp[4]; 487 488 register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, 489 STATE_MATRIX_TRANSPOSE, mvp ); 490 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 491} 492 493 494static GLuint material_attrib( GLuint side, GLuint property ) 495{ 496 return (_TNL_ATTRIB_MAT_FRONT_AMBIENT + 497 (property - STATE_AMBIENT) * 2 + 498 side); 499} 500 501static void set_material_flags( struct tnl_program *p ) 502{ 503 GLcontext *ctx = p->ctx; 504 TNLcontext *tnl = TNL_CONTEXT(ctx); 505 GLuint i; 506 507 p->color_materials = 0; 508 p->materials = 0; 509 510 if (ctx->Light.ColorMaterialEnabled) { 511 p->materials = 512 p->color_materials = 513 ctx->Light.ColorMaterialBitmask << _TNL_ATTRIB_MAT_FRONT_AMBIENT; 514 } 515 516 for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT ; i < _TNL_ATTRIB_INDEX ; i++) 517 if (tnl->vb.AttribPtr[i]->stride) 518 p->materials |= 1<<i; 519} 520 521 522static struct ureg get_material( struct tnl_program *p, GLuint side, 523 GLuint property ) 524{ 525 GLuint attrib = material_attrib(side, property); 526 527 if (p->color_materials & (1<<attrib)) 528 return register_input(p, VERT_ATTRIB_COLOR0); 529 else if (p->materials & (1<<attrib)) 530 return register_input( p, attrib ); 531 else 532 return register_param3( p, STATE_MATERIAL, side, property ); 533} 534 535#define SCENE_COLOR_BITS(side) (( _TNL_BIT_MAT_FRONT_EMISSION | \ 536 _TNL_BIT_MAT_FRONT_AMBIENT | \ 537 _TNL_BIT_MAT_FRONT_DIFFUSE) << (side)) 538 539/* Either return a precalculated constant value or emit code to 540 * calculate these values dynamically in the case where material calls 541 * are present between begin/end pairs. 542 * 543 * Probably want to shift this to the program compilation phase - if 544 * we always emitted the calculation here, a smart compiler could 545 * detect that it was constant (given a certain set of inputs), and 546 * lift it out of the main loop. That way the programs created here 547 * would be independent of the vertex_buffer details. 548 */ 549static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 550{ 551 if (p->materials & SCENE_COLOR_BITS(side)) { 552 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 553 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 554 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 555 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 556 struct ureg tmp = make_temp(p, material_diffuse); 557 emit_op3(p, VP_OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 558 material_ambient, material_emission); 559 return tmp; 560 } 561 else 562 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 563} 564 565 566static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 567 GLuint side, GLuint property ) 568{ 569 GLuint attrib = material_attrib(side, property); 570 if (p->materials & (1<<attrib)) { 571 struct ureg light_value = 572 register_param3(p, STATE_LIGHT, light, property); 573 struct ureg material_value = get_material(p, side, property); 574 struct ureg tmp = get_temp(p); 575 emit_op2(p, VP_OPCODE_MUL, tmp, 0, light_value, material_value); 576 return tmp; 577 } 578 else 579 return register_param4(p, STATE_LIGHTPROD, light, side, property); 580} 581 582static struct ureg calculate_light_attenuation( struct tnl_program *p, 583 GLuint i, 584 struct gl_light *light, 585 struct ureg VPpli, 586 struct ureg dist ) 587{ 588 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 589 STATE_ATTENUATION); 590 struct ureg att = get_temp(p); 591 592 /* Calculate spot attenuation: 593 */ 594 if (light->SpotCutoff != 180.0F) { 595 struct ureg spot_dir = register_param3(p, STATE_LIGHT, i, 596 STATE_SPOT_DIRECTION); 597 struct ureg spot = get_temp(p); 598 struct ureg slt = get_temp(p); 599 600 emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */ 601 emit_op2(p, VP_OPCODE_DP3, spot, 0, negate(VPpli), spot_dir); 602 emit_op2(p, VP_OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot); 603 emit_op2(p, VP_OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 604 emit_op2(p, VP_OPCODE_MUL, att, 0, slt, spot); 605 606 release_temp(p, spot); 607 release_temp(p, slt); 608 } 609 610 /* Calculate distance attenuation: 611 */ 612 if (light->ConstantAttenuation != 1.0 || 613 light->LinearAttenuation != 1.0 || 614 light->QuadraticAttenuation != 1.0) { 615 616 /* 1/d,d,d,1/d */ 617 emit_op1(p, VP_OPCODE_RCP, dist, WRITEMASK_YZ, dist); 618 /* 1,d,d*d,1/d */ 619 emit_op2(p, VP_OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 620 /* 1/dist-atten */ 621 emit_op2(p, VP_OPCODE_DP3, dist, 0, attenuation, dist); 622 623 if (light->SpotCutoff != 180.0F) { 624 /* dist-atten */ 625 emit_op1(p, VP_OPCODE_RCP, dist, 0, dist); 626 /* spot-atten * dist-atten */ 627 emit_op2(p, VP_OPCODE_MUL, att, 0, dist, att); 628 } else { 629 /* dist-atten */ 630 emit_op1(p, VP_OPCODE_RCP, att, 0, dist); 631 } 632 } 633 634 return att; 635} 636 637 638 639 640 641/* Need to add some addtional parameters to allow lighting in object 642 * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye 643 * space lighting. 644 */ 645static void build_lighting( struct tnl_program *p ) 646{ 647 GLcontext *ctx = p->ctx; 648 const GLboolean twoside = ctx->Light.Model.TwoSide; 649 const GLboolean separate = (ctx->Light.Model.ColorControl == 650 GL_SEPARATE_SPECULAR_COLOR); 651 GLuint nr_lights = 0, count = 0; 652 struct ureg normal = get_eye_normal(p); 653 struct ureg lit = get_temp(p); 654 struct ureg dots = get_temp(p); 655 struct ureg _col0 = undef, _col1 = undef; 656 struct ureg _bfc0 = undef, _bfc1 = undef; 657 GLuint i; 658 659 for (i = 0; i < MAX_LIGHTS; i++) 660 if (ctx->Light.Light[i].Enabled) 661 nr_lights++; 662 663 set_material_flags(p); 664 665 { 666 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 667 emit_op1(p, VP_OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 668 release_temp(p, shininess); 669 670 _col0 = make_temp(p, get_scenecolor(p, 0)); 671 if (separate) 672 _col1 = make_temp(p, get_identity_param(p)); 673 else 674 _col1 = _col0; 675 676 } 677 678 if (twoside) { 679 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 680 emit_op1(p, VP_OPCODE_MOV, dots, WRITEMASK_Z, 681 negate(swizzle1(shininess,X))); 682 release_temp(p, shininess); 683 684 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 685 if (separate) 686 _bfc1 = make_temp(p, get_identity_param(p)); 687 else 688 _bfc1 = _bfc0; 689 } 690 691 for (i = 0; i < MAX_LIGHTS; i++) { 692 struct gl_light *light = &ctx->Light.Light[i]; 693 694 if (light->Enabled) { 695 struct ureg half = undef; 696 struct ureg att = undef, VPpli = undef; 697 698 count++; 699 700 if (light->EyePosition[3] == 0) { 701 /* Can used precomputed constants in this case. 702 * Attenuation never applies to infinite lights. 703 */ 704 VPpli = register_param3(p, STATE_LIGHT, i, 705 STATE_POSITION_NORMALIZED); 706 half = register_param3(p, STATE_LIGHT, i, STATE_HALF); 707 } 708 else { 709 struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 710 STATE_POSITION); 711 struct ureg V = get_eye_position(p); 712 struct ureg dist = get_temp(p); 713 714 VPpli = get_temp(p); 715 half = get_temp(p); 716 717 /* Calulate VPpli vector 718 */ 719 emit_op2(p, VP_OPCODE_SUB, VPpli, 0, Ppli, V); 720 721 /* Normalize VPpli. The dist value also used in 722 * attenuation below. 723 */ 724 emit_op2(p, VP_OPCODE_DP3, dist, 0, VPpli, VPpli); 725 emit_op1(p, VP_OPCODE_RSQ, dist, 0, dist); 726 emit_op2(p, VP_OPCODE_MUL, VPpli, 0, VPpli, dist); 727 728 729 /* Calculate attenuation: 730 */ 731 if (light->SpotCutoff != 180.0 || 732 light->ConstantAttenuation != 1.0 || 733 light->LinearAttenuation != 1.0 || 734 light->QuadraticAttenuation != 1.0) { 735 att = calculate_light_attenuation(p, i, light, VPpli, dist); 736 } 737 738 739 /* Calculate viewer direction, or use infinite viewer: 740 */ 741 if (ctx->Light.Model.LocalViewer) { 742 struct ureg eye_hat = get_eye_position_normalized(p); 743 emit_op2(p, VP_OPCODE_SUB, half, 0, VPpli, eye_hat); 744 } 745 else { 746 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 747 emit_op2(p, VP_OPCODE_ADD, half, 0, VPpli, z_dir); 748 } 749 750 emit_normalize_vec3(p, half, half); 751 752 release_temp(p, dist); 753 } 754 755 /* Calculate dot products: 756 */ 757 emit_op2(p, VP_OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 758 emit_op2(p, VP_OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 759 760 761 /* Front face lighting: 762 */ 763 { 764 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 765 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 766 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 767 struct ureg res0, res1; 768 769 emit_op1(p, VP_OPCODE_LIT, lit, 0, dots); 770 771 if (!is_undef(att)) 772 emit_op2(p, VP_OPCODE_MUL, lit, 0, lit, att); 773 774 775 if (count == nr_lights) { 776 if (separate) { 777 res0 = register_output( p, VERT_RESULT_COL0 ); 778 res1 = register_output( p, VERT_RESULT_COL1 ); 779 } 780 else { 781 res0 = _col0; 782 res1 = register_output( p, VERT_RESULT_COL0 ); 783 } 784 } else { 785 res0 = _col0; 786 res1 = _col1; 787 } 788 789 emit_op3(p, VP_OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 790 emit_op3(p, VP_OPCODE_MAD, res0, 0, swizzle1(lit,Y), diffuse, _col0); 791 emit_op3(p, VP_OPCODE_MAD, res1, 0, swizzle1(lit,Z), specular, _col1); 792 793 release_temp(p, ambient); 794 release_temp(p, diffuse); 795 release_temp(p, specular); 796 } 797 798 /* Back face lighting: 799 */ 800 if (twoside) { 801 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 802 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 803 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 804 struct ureg res0, res1; 805 806 emit_op1(p, VP_OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z))); 807 808 if (!is_undef(att)) 809 emit_op2(p, VP_OPCODE_MUL, lit, 0, lit, att); 810 811 if (count == nr_lights) { 812 if (separate) { 813 res0 = register_output( p, VERT_RESULT_BFC0 ); 814 res1 = register_output( p, VERT_RESULT_BFC1 ); 815 } 816 else { 817 res0 = _bfc0; 818 res1 = register_output( p, VERT_RESULT_BFC0 ); 819 } 820 } else { 821 res0 = _bfc0; 822 res1 = _bfc1; 823 } 824 825 826 emit_op3(p, VP_OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 827 emit_op3(p, VP_OPCODE_MAD, res0, 0, swizzle1(lit,Y), diffuse, _bfc0); 828 emit_op3(p, VP_OPCODE_MAD, res1, 0, swizzle1(lit,Z), specular, _bfc1); 829 830 release_temp(p, ambient); 831 release_temp(p, diffuse); 832 release_temp(p, specular); 833 } 834 835 release_temp(p, half); 836 release_temp(p, VPpli); 837 release_temp(p, att); 838 } 839 } 840 841 release_temps( p ); 842} 843 844 845static void build_fog( struct tnl_program *p ) 846{ 847 GLcontext *ctx = p->ctx; 848 TNLcontext *tnl = TNL_CONTEXT(ctx); 849 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 850 struct ureg input; 851 852 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) { 853 input = swizzle1(get_eye_position(p), Z); 854 } 855 else { 856 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 857 } 858 859 if (tnl->_DoVertexFog) { 860 struct ureg params = register_param1(p, STATE_FOG_PARAMS); 861 struct ureg tmp = get_temp(p); 862 863 switch (ctx->Fog.Mode) { 864 case GL_LINEAR: { 865 struct ureg id = get_identity_param(p); 866 emit_op2(p, VP_OPCODE_SUB, tmp, 0, swizzle1(params,Z), input); 867 emit_op2(p, VP_OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); 868 emit_op2(p, VP_OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ 869 emit_op2(p, VP_OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); 870 break; 871 } 872 case GL_EXP: 873 emit_op1(p, VP_OPCODE_ABS, tmp, 0, input); 874 emit_op2(p, VP_OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); 875 emit_op2(p, VP_OPCODE_POW, fog, WRITEMASK_X, 876 register_const1f(p, M_E), negate(tmp)); 877 break; 878 case GL_EXP2: 879 emit_op2(p, VP_OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); 880 emit_op2(p, VP_OPCODE_MUL, tmp, 0, tmp, tmp); 881 emit_op2(p, VP_OPCODE_POW, fog, WRITEMASK_X, 882 register_const1f(p, M_E), negate(tmp)); 883 break; 884 } 885 886 release_temp(p, tmp); 887 } 888 else { 889 /* results = incoming fog coords (compute fog per-fragment later) 890 * 891 * KW: Is it really necessary to do anything in this case? 892 */ 893 emit_op1(p, VP_OPCODE_MOV, fog, WRITEMASK_X, input); 894 } 895} 896 897static void build_reflect_texgen( struct tnl_program *p, 898 struct ureg dest, 899 GLuint writemask ) 900{ 901 struct ureg normal = get_eye_normal(p); 902 struct ureg eye_hat = get_eye_position_normalized(p); 903 struct ureg tmp = get_temp(p); 904 905 /* n.u */ 906 emit_op2(p, VP_OPCODE_DP3, tmp, 0, normal, eye_hat); 907 /* 2n.u */ 908 emit_op2(p, VP_OPCODE_ADD, tmp, 0, tmp, tmp); 909 /* (-2n.u)n + u */ 910 emit_op3(p, VP_OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 911} 912 913static void build_sphere_texgen( struct tnl_program *p, 914 struct ureg dest, 915 GLuint writemask ) 916{ 917 struct ureg normal = get_eye_normal(p); 918 struct ureg eye_hat = get_eye_position_normalized(p); 919 struct ureg tmp = get_temp(p); 920 struct ureg half = register_const1f(p, .5); 921 struct ureg r = get_temp(p); 922 struct ureg inv_m = get_temp(p); 923 struct ureg id = get_identity_param(p); 924 925 /* Could share the above calculations, but it would be 926 * a fairly odd state for someone to set (both sphere and 927 * reflection active for different texture coordinate 928 * components. Of course - if two texture units enable 929 * reflect and/or sphere, things start to tilt in favour 930 * of seperating this out: 931 */ 932 933 /* n.u */ 934 emit_op2(p, VP_OPCODE_DP3, tmp, 0, normal, eye_hat); 935 /* 2n.u */ 936 emit_op2(p, VP_OPCODE_ADD, tmp, 0, tmp, tmp); 937 /* (-2n.u)n + u */ 938 emit_op3(p, VP_OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 939 /* r + 0,0,1 */ 940 emit_op2(p, VP_OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 941 /* rx^2 + ry^2 + (rz+1)^2 */ 942 emit_op2(p, VP_OPCODE_DP3, tmp, 0, tmp, tmp); 943 /* 2/m */ 944 emit_op1(p, VP_OPCODE_RSQ, tmp, 0, tmp); 945 /* 1/m */ 946 emit_op2(p, VP_OPCODE_MUL, inv_m, 0, tmp, swizzle1(half,X)); 947 /* r/m + 1/2 */ 948 emit_op3(p, VP_OPCODE_MAD, dest, writemask, r, inv_m, swizzle1(half,X)); 949 950 release_temp(p, tmp); 951 release_temp(p, r); 952 release_temp(p, inv_m); 953} 954 955 956static void build_texture_transform( struct tnl_program *p ) 957{ 958 GLcontext *ctx = p->ctx; 959 GLuint i, j; 960 961 for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) { 962 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 963 GLuint texmat_enabled = ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i); 964 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 965 966 if (texUnit->TexGenEnabled || texmat_enabled) { 967 struct ureg out_texgen = undef; 968 969 if (texUnit->TexGenEnabled) { 970 GLuint copy_mask = 0; 971 GLuint sphere_mask = 0; 972 GLuint reflect_mask = 0; 973 GLuint normal_mask = 0; 974 GLuint modes[4]; 975 976 if (texmat_enabled) 977 out_texgen = get_temp(p); 978 else 979 out_texgen = out; 980 981 modes[0] = texUnit->GenModeS; 982 modes[1] = texUnit->GenModeT; 983 modes[2] = texUnit->GenModeR; 984 modes[3] = texUnit->GenModeQ; 985 986 for (j = 0; j < 4; j++) { 987 if (texUnit->TexGenEnabled & (1<<j)) { 988 switch (modes[j]) { 989 case GL_OBJECT_LINEAR: { 990 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 991 struct ureg plane = 992 register_param3(p, STATE_TEXGEN, i, 993 STATE_TEXGEN_OBJECT_S + j); 994 995 emit_op2(p, VP_OPCODE_DP4, out_texgen, WRITEMASK_X << j, 996 obj, plane ); 997 break; 998 } 999 case GL_EYE_LINEAR: { 1000 struct ureg eye = get_eye_position(p); 1001 struct ureg plane = 1002 register_param3(p, STATE_TEXGEN, i, 1003 STATE_TEXGEN_EYE_S + j); 1004 1005 emit_op2(p, VP_OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1006 eye, plane ); 1007 break; 1008 } 1009 case GL_SPHERE_MAP: 1010 sphere_mask |= WRITEMASK_X << j; 1011 break; 1012 case GL_REFLECTION_MAP_NV: 1013 reflect_mask |= WRITEMASK_X << j; 1014 break; 1015 case GL_NORMAL_MAP_NV: 1016 normal_mask |= WRITEMASK_X << j; 1017 break; 1018 } 1019 } 1020 else 1021 copy_mask |= WRITEMASK_X << j; 1022 } 1023 1024 1025 if (sphere_mask) { 1026 build_sphere_texgen(p, out_texgen, sphere_mask); 1027 } 1028 1029 if (reflect_mask) { 1030 build_reflect_texgen(p, out_texgen, reflect_mask); 1031 } 1032 1033 if (normal_mask) { 1034 struct ureg normal = get_eye_normal(p); 1035 emit_op1(p, VP_OPCODE_MOV, out_texgen, normal_mask, normal ); 1036 } 1037 1038 if (copy_mask) { 1039 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1040 emit_op1(p, VP_OPCODE_MOV, out_texgen, copy_mask, in ); 1041 } 1042 } 1043 1044 if (texmat_enabled) { 1045 struct ureg texmat[4]; 1046 struct ureg in = (!is_undef(out_texgen) ? 1047 out_texgen : 1048 register_input(p, VERT_ATTRIB_TEX0+i)); 1049 register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, 1050 0, 3, 0, texmat ); 1051 emit_matrix_transform_vec4( p, out, texmat, in ); 1052 } 1053 1054 release_temps(p); 1055 } 1056 } 1057} 1058 1059 1060/* Seems like it could be tighter: 1061 */ 1062static void build_pointsize( struct tnl_program *p ) 1063{ 1064 struct ureg eye = get_eye_position(p); 1065 struct ureg state_size = register_param1(p, STATE_POINT_SIZE); 1066 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1067 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1068 struct ureg ut = get_temp(p); 1069 1070 /* 1, -Z, Z * Z, 1 */ 1071 emit_op1(p, VP_OPCODE_MOV, ut, 0, swizzle1(get_identity_param(p), W)); 1072 emit_op2(p, VP_OPCODE_MUL, ut, WRITEMASK_YZ, ut, negate(swizzle1(eye, Z))); 1073 emit_op2(p, VP_OPCODE_MUL, ut, WRITEMASK_Z, ut, negate(swizzle1(eye, Z))); 1074 1075 1076 /* p1 + p2 * dist + p3 * dist * dist, 0 */ 1077 emit_op2(p, VP_OPCODE_DP3, ut, 0, ut, state_attenuation); 1078 1079 /* 1 / factor */ 1080 emit_op1(p, VP_OPCODE_RCP, ut, 0, ut ); 1081 1082 /* out = pointSize / factor */ 1083 emit_op2(p, VP_OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1084 1085 release_temp(p, ut); 1086} 1087 1088 1089static void build_passthrough( struct tnl_program *p, GLuint inputs ) 1090{ 1091} 1092 1093 1094 1095void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) 1096{ 1097 TNLcontext *tnl = TNL_CONTEXT(ctx); 1098 struct tnl_program p; 1099 GLuint db_NumInstructions; 1100 struct vp_instruction *db_Instructions; 1101 1102 if (ctx->VertexProgram._Enabled) 1103 return; 1104 1105 if (!ctx->_TnlProgram) 1106 ctx->_TnlProgram = (struct vertex_program *) 1107 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 1108 1109 memset(&p, 0, sizeof(p)); 1110 p.ctx = ctx; 1111 p.program = ctx->_TnlProgram; 1112 1113 p.eye_position = undef; 1114 p.eye_position_normalized = undef; 1115 p.eye_normal = undef; 1116 p.identity = undef; 1117 1118 p.temp_flag = 0; 1119 p.temp_reserved = ~((1<<MAX_NV_VERTEX_PROGRAM_TEMPS)-1); 1120 1121 db_Instructions = p.program->Instructions; 1122 db_NumInstructions = p.program->Base.NumInstructions; 1123 1124 p.program->Instructions = MALLOC(sizeof(struct vp_instruction) * 100); 1125 1126 /* Initialize the arb_program struct */ 1127 p.program->Base.String = 0; 1128 p.program->Base.NumInstructions = 1129 p.program->Base.NumTemporaries = 1130 p.program->Base.NumParameters = 1131 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1132 1133 if (p.program->Parameters) 1134 _mesa_free_parameters(p.program->Parameters); 1135 else 1136 p.program->Parameters = _mesa_new_parameter_list(); 1137 1138 p.program->InputsRead = 0; 1139 p.program->OutputsWritten = 0; 1140 1141 /* Emit the program, starting with modelviewproject: 1142 */ 1143 build_hpos(&p); 1144 1145 /* Lighting calculations: 1146 */ 1147 if (ctx->Light.Enabled) 1148 build_lighting(&p); 1149 1150 if (ctx->Fog.Enabled) 1151 build_fog(&p); 1152 1153 if (ctx->Texture._TexGenEnabled || ctx->Texture._TexMatEnabled) 1154 build_texture_transform(&p); 1155 1156 if (ctx->Point._Attenuated) 1157 build_pointsize(&p); 1158 1159 /* Is there a need to copy inputs to outputs? The software 1160 * implementation might do this more efficiently by just assigning 1161 * the missing results to point at input arrays. 1162 */ 1163 if (/* tnl->vp_copy_inputs && */ 1164 (tnl->render_inputs & ~p.program->OutputsWritten)) { 1165 build_passthrough(&p, tnl->render_inputs); 1166 } 1167 1168 1169 /* Finish up: 1170 */ 1171 emit_op1(&p, VP_OPCODE_END, undef, 0, undef); 1172 1173 /* Disassemble: 1174 */ 1175 if (DISASSEM) { 1176 _mesa_printf ("\n"); 1177 } 1178 1179 1180 /* Notify driver the fragment program has (actually) changed. 1181 */ 1182 if (db_Instructions == NULL || 1183 db_NumInstructions != p.program->Base.NumInstructions || 1184 memcmp(db_Instructions, p.program->Instructions, 1185 db_NumInstructions * sizeof(*db_Instructions)) != 0) { 1186 _mesa_printf("new program string\n"); 1187 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1188 &p.program->Base ); 1189 } 1190 1191 FREE(db_Instructions); 1192} 1193