t_vp_build.c revision 54dac2c84310536cce962101de29546d3eb80175
1/* 2 * Mesa 3-D graphics library 3 * Version: 6.5 4 * 5 * Copyright (C) 2006 Tungsten Graphics All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 21 * WHETHER IN 22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/** 27 * \file t_vp_build.c 28 * Create a vertex program to execute the current fixed function T&L pipeline. 29 * \author Keith Whitwell 30 */ 31 32 33#include "glheader.h" 34#include "macros.h" 35#include "enums.h" 36#include "t_context.h" /* NOTE: very light dependency on this */ 37#include "t_vp_build.h" 38 39#include "shader/program.h" 40#include "shader/program_instruction.h" 41 42struct state_key { 43 unsigned light_global_enabled:1; 44 unsigned light_local_viewer:1; 45 unsigned light_twoside:1; 46 unsigned light_color_material:1; 47 unsigned light_color_material_mask:12; 48 unsigned light_material_mask:12; 49 50 unsigned normalize:1; 51 unsigned rescale_normals:1; 52 unsigned fog_source_is_depth:1; 53 unsigned tnl_do_vertex_fog:1; 54 unsigned separate_specular:1; 55 unsigned fog_mode:2; 56 unsigned point_attenuated:1; 57 unsigned texture_enabled_global:1; 58 unsigned fragprog_inputs_read:12; 59 60 struct { 61 unsigned light_enabled:1; 62 unsigned light_eyepos3_is_zero:1; 63 unsigned light_spotcutoff_is_180:1; 64 unsigned light_attenuated:1; 65 unsigned texunit_really_enabled:1; 66 unsigned texmat_enabled:1; 67 unsigned texgen_enabled:4; 68 unsigned texgen_mode0:4; 69 unsigned texgen_mode1:4; 70 unsigned texgen_mode2:4; 71 unsigned texgen_mode3:4; 72 } unit[8]; 73}; 74 75 76 77#define FOG_NONE 0 78#define FOG_LINEAR 1 79#define FOG_EXP 2 80#define FOG_EXP2 3 81 82static GLuint translate_fog_mode( GLenum mode ) 83{ 84 switch (mode) { 85 case GL_LINEAR: return FOG_LINEAR; 86 case GL_EXP: return FOG_EXP; 87 case GL_EXP2: return FOG_EXP2; 88 default: return FOG_NONE; 89 } 90} 91 92#define TXG_NONE 0 93#define TXG_OBJ_LINEAR 1 94#define TXG_EYE_LINEAR 2 95#define TXG_SPHERE_MAP 3 96#define TXG_REFLECTION_MAP 4 97#define TXG_NORMAL_MAP 5 98 99static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 100{ 101 if (!enabled) 102 return TXG_NONE; 103 104 switch (mode) { 105 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 106 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 107 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 108 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 109 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 110 default: return TXG_NONE; 111 } 112} 113 114static struct state_key *make_state_key( GLcontext *ctx ) 115{ 116 TNLcontext *tnl = TNL_CONTEXT(ctx); 117 struct vertex_buffer *VB = &tnl->vb; 118 const struct gl_fragment_program *fp = ctx->FragmentProgram._Current; 119 struct state_key *key = CALLOC_STRUCT(state_key); 120 GLuint i; 121 122 /* This now relies on texenvprogram.c being active: 123 */ 124 assert(fp); 125 126 key->fragprog_inputs_read = fp->Base.InputsRead; 127 128 key->separate_specular = (ctx->Light.Model.ColorControl == 129 GL_SEPARATE_SPECULAR_COLOR); 130 131 if (ctx->Light.Enabled) { 132 key->light_global_enabled = 1; 133 134 if (ctx->Light.Model.LocalViewer) 135 key->light_local_viewer = 1; 136 137 if (ctx->Light.Model.TwoSide) 138 key->light_twoside = 1; 139 140 if (ctx->Light.ColorMaterialEnabled) { 141 key->light_color_material = 1; 142 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask; 143 } 144 145 for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) 146 if (VB->AttribPtr[i]->stride) 147 key->light_material_mask |= 1<<(i-_TNL_ATTRIB_MAT_FRONT_AMBIENT); 148 149 for (i = 0; i < MAX_LIGHTS; i++) { 150 struct gl_light *light = &ctx->Light.Light[i]; 151 152 if (light->Enabled) { 153 key->unit[i].light_enabled = 1; 154 155 if (light->EyePosition[3] == 0.0) 156 key->unit[i].light_eyepos3_is_zero = 1; 157 158 if (light->SpotCutoff == 180.0) 159 key->unit[i].light_spotcutoff_is_180 = 1; 160 161 if (light->ConstantAttenuation != 1.0 || 162 light->LinearAttenuation != 0.0 || 163 light->QuadraticAttenuation != 0.0) 164 key->unit[i].light_attenuated = 1; 165 } 166 } 167 } 168 169 if (ctx->Transform.Normalize) 170 key->normalize = 1; 171 172 if (ctx->Transform.RescaleNormals) 173 key->rescale_normals = 1; 174 175 key->fog_mode = translate_fog_mode(fp->FogOption); 176 177 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) 178 key->fog_source_is_depth = 1; 179 180 if (tnl->_DoVertexFog) 181 key->tnl_do_vertex_fog = 1; 182 183 if (ctx->Point._Attenuated) 184 key->point_attenuated = 1; 185 186 if (ctx->Texture._TexGenEnabled || 187 ctx->Texture._TexMatEnabled || 188 ctx->Texture._EnabledUnits) 189 key->texture_enabled_global = 1; 190 191 for (i = 0; i < MAX_TEXTURE_UNITS; i++) { 192 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 193 194 if (texUnit->_ReallyEnabled) 195 key->unit[i].texunit_really_enabled = 1; 196 197 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 198 key->unit[i].texmat_enabled = 1; 199 200 if (texUnit->TexGenEnabled) { 201 key->unit[i].texgen_enabled = 1; 202 203 key->unit[i].texgen_mode0 = 204 translate_texgen( texUnit->TexGenEnabled & (1<<0), 205 texUnit->GenModeS ); 206 key->unit[i].texgen_mode1 = 207 translate_texgen( texUnit->TexGenEnabled & (1<<1), 208 texUnit->GenModeT ); 209 key->unit[i].texgen_mode2 = 210 translate_texgen( texUnit->TexGenEnabled & (1<<2), 211 texUnit->GenModeR ); 212 key->unit[i].texgen_mode3 = 213 translate_texgen( texUnit->TexGenEnabled & (1<<3), 214 texUnit->GenModeQ ); 215 } 216 } 217 218 return key; 219} 220 221 222 223/* Very useful debugging tool - produces annotated listing of 224 * generated program with line/function references for each 225 * instruction back into this file: 226 */ 227#define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM) 228 229/* Should be tunable by the driver - do we want to do matrix 230 * multiplications with DP4's or with MUL/MAD's? SSE works better 231 * with the latter, drivers may differ. 232 */ 233#define PREFER_DP4 0 234 235#define MAX_INSN 256 236 237/* Use uregs to represent registers internally, translate to Mesa's 238 * expected formats on emit. 239 * 240 * NOTE: These are passed by value extensively in this file rather 241 * than as usual by pointer reference. If this disturbs you, try 242 * remembering they are just 32bits in size. 243 * 244 * GCC is smart enough to deal with these dword-sized structures in 245 * much the same way as if I had defined them as dwords and was using 246 * macros to access and set the fields. This is much nicer and easier 247 * to evolve. 248 */ 249struct ureg { 250 GLuint file:4; 251 GLint idx:8; /* relative addressing may be negative */ 252 GLuint negate:1; 253 GLuint swz:12; 254 GLuint pad:7; 255}; 256 257 258struct tnl_program { 259 const struct state_key *state; 260 struct gl_vertex_program *program; 261 262 GLuint temp_in_use; 263 GLuint temp_reserved; 264 265 struct ureg eye_position; 266 struct ureg eye_position_normalized; 267 struct ureg eye_normal; 268 struct ureg identity; 269 270 GLuint materials; 271 GLuint color_materials; 272}; 273 274 275static const struct ureg undef = { 276 PROGRAM_UNDEFINED, 277 ~0, 278 0, 279 0, 280 0 281}; 282 283/* Local shorthand: 284 */ 285#define X SWIZZLE_X 286#define Y SWIZZLE_Y 287#define Z SWIZZLE_Z 288#define W SWIZZLE_W 289 290 291/* Construct a ureg: 292 */ 293static struct ureg make_ureg(GLuint file, GLint idx) 294{ 295 struct ureg reg; 296 reg.file = file; 297 reg.idx = idx; 298 reg.negate = 0; 299 reg.swz = SWIZZLE_NOOP; 300 reg.pad = 0; 301 return reg; 302} 303 304 305 306static struct ureg negate( struct ureg reg ) 307{ 308 reg.negate ^= 1; 309 return reg; 310} 311 312 313static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 314{ 315 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 316 GET_SWZ(reg.swz, y), 317 GET_SWZ(reg.swz, z), 318 GET_SWZ(reg.swz, w)); 319 320 return reg; 321} 322 323static struct ureg swizzle1( struct ureg reg, int x ) 324{ 325 return swizzle(reg, x, x, x, x); 326} 327 328static struct ureg get_temp( struct tnl_program *p ) 329{ 330 int bit = _mesa_ffs( ~p->temp_in_use ); 331 if (!bit) { 332 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 333 _mesa_exit(1); 334 } 335 336 if ((GLuint) bit > p->program->Base.NumTemporaries) 337 p->program->Base.NumTemporaries = bit; 338 339 p->temp_in_use |= 1<<(bit-1); 340 return make_ureg(PROGRAM_TEMPORARY, bit-1); 341} 342 343static struct ureg reserve_temp( struct tnl_program *p ) 344{ 345 struct ureg temp = get_temp( p ); 346 p->temp_reserved |= 1<<temp.idx; 347 return temp; 348} 349 350static void release_temp( struct tnl_program *p, struct ureg reg ) 351{ 352 if (reg.file == PROGRAM_TEMPORARY) { 353 p->temp_in_use &= ~(1<<reg.idx); 354 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 355 } 356} 357 358static void release_temps( struct tnl_program *p ) 359{ 360 p->temp_in_use = p->temp_reserved; 361} 362 363 364 365static struct ureg register_input( struct tnl_program *p, GLuint input ) 366{ 367 p->program->Base.InputsRead |= (1<<input); 368 return make_ureg(PROGRAM_INPUT, input); 369} 370 371static struct ureg register_output( struct tnl_program *p, GLuint output ) 372{ 373 p->program->Base.OutputsWritten |= (1<<output); 374 return make_ureg(PROGRAM_OUTPUT, output); 375} 376 377static struct ureg register_const4f( struct tnl_program *p, 378 GLfloat s0, 379 GLfloat s1, 380 GLfloat s2, 381 GLfloat s3) 382{ 383 GLfloat values[4]; 384 GLint idx; 385 values[0] = s0; 386 values[1] = s1; 387 values[2] = s2; 388 values[3] = s3; 389 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4 ); 390 return make_ureg(PROGRAM_STATE_VAR, idx); 391} 392 393#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 394#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 395#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 396#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 397 398static GLboolean is_undef( struct ureg reg ) 399{ 400 return reg.file == PROGRAM_UNDEFINED; 401} 402 403static struct ureg get_identity_param( struct tnl_program *p ) 404{ 405 if (is_undef(p->identity)) 406 p->identity = register_const4f(p, 0,0,0,1); 407 408 return p->identity; 409} 410 411static struct ureg register_param6( struct tnl_program *p, 412 GLint s0, 413 GLint s1, 414 GLint s2, 415 GLint s3, 416 GLint s4, 417 GLint s5) 418{ 419 GLint tokens[6]; 420 GLint idx; 421 tokens[0] = s0; 422 tokens[1] = s1; 423 tokens[2] = s2; 424 tokens[3] = s3; 425 tokens[4] = s4; 426 tokens[5] = s5; 427 idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); 428 return make_ureg(PROGRAM_STATE_VAR, idx); 429} 430 431 432#define register_param1(p,s0) register_param6(p,s0,0,0,0,0,0) 433#define register_param2(p,s0,s1) register_param6(p,s0,s1,0,0,0,0) 434#define register_param3(p,s0,s1,s2) register_param6(p,s0,s1,s2,0,0,0) 435#define register_param4(p,s0,s1,s2,s3) register_param6(p,s0,s1,s2,s3,0,0) 436 437 438static void register_matrix_param6( struct tnl_program *p, 439 GLint s0, 440 GLint s1, 441 GLint s2, 442 GLint s3, 443 GLint s4, 444 GLint s5, 445 struct ureg *matrix ) 446{ 447 GLint i; 448 449 /* This is a bit sad as the support is there to pull the whole 450 * matrix out in one go: 451 */ 452 for (i = 0; i <= s4 - s3; i++) 453 matrix[i] = register_param6( p, s0, s1, s2, i, i, s5 ); 454} 455 456 457static void emit_arg( struct prog_src_register *src, 458 struct ureg reg ) 459{ 460 src->File = reg.file; 461 src->Index = reg.idx; 462 src->Swizzle = reg.swz; 463 src->NegateBase = reg.negate ? NEGATE_XYZW : 0; 464 src->Abs = 0; 465 src->NegateAbs = 0; 466 src->RelAddr = 0; 467} 468 469static void emit_dst( struct prog_dst_register *dst, 470 struct ureg reg, GLuint mask ) 471{ 472 dst->File = reg.file; 473 dst->Index = reg.idx; 474 /* allow zero as a shorthand for xyzw */ 475 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 476 dst->CondMask = COND_TR; 477 dst->CondSwizzle = 0; 478 dst->CondSrc = 0; 479 dst->pad = 0; 480} 481 482static void debug_insn( struct prog_instruction *inst, const char *fn, 483 GLuint line ) 484{ 485 if (DISASSEM) { 486 static const char *last_fn; 487 488 if (fn != last_fn) { 489 last_fn = fn; 490 _mesa_printf("%s:\n", fn); 491 } 492 493 _mesa_printf("%d:\t", line); 494 _mesa_print_instruction(inst); 495 } 496} 497 498 499static void emit_op3fn(struct tnl_program *p, 500 GLuint op, 501 struct ureg dest, 502 GLuint mask, 503 struct ureg src0, 504 struct ureg src1, 505 struct ureg src2, 506 const char *fn, 507 GLuint line) 508{ 509 GLuint nr = p->program->Base.NumInstructions++; 510 struct prog_instruction *inst = &p->program->Base.Instructions[nr]; 511 512 if (p->program->Base.NumInstructions > MAX_INSN) { 513 _mesa_problem(0, "Out of instructions in emit_op3fn\n"); 514 return; 515 } 516 517 inst->Opcode = (enum prog_opcode) op; 518 inst->StringPos = 0; 519 inst->Data = 0; 520 521 emit_arg( &inst->SrcReg[0], src0 ); 522 emit_arg( &inst->SrcReg[1], src1 ); 523 emit_arg( &inst->SrcReg[2], src2 ); 524 525 emit_dst( &inst->DstReg, dest, mask ); 526 527 debug_insn(inst, fn, line); 528} 529 530 531#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 532 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 533 534#define emit_op2(p, op, dst, mask, src0, src1) \ 535 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 536 537#define emit_op1(p, op, dst, mask, src0) \ 538 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 539 540 541static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 542{ 543 if (reg.file == PROGRAM_TEMPORARY && 544 !(p->temp_reserved & (1<<reg.idx))) 545 return reg; 546 else { 547 struct ureg temp = get_temp(p); 548 emit_op1(p, OPCODE_MOV, temp, 0, reg); 549 return temp; 550 } 551} 552 553 554/* Currently no tracking performed of input/output/register size or 555 * active elements. Could be used to reduce these operations, as 556 * could the matrix type. 557 */ 558static void emit_matrix_transform_vec4( struct tnl_program *p, 559 struct ureg dest, 560 const struct ureg *mat, 561 struct ureg src) 562{ 563 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 564 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 565 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 566 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 567} 568 569/* This version is much easier to implement if writemasks are not 570 * supported natively on the target or (like SSE), the target doesn't 571 * have a clean/obvious dotproduct implementation. 572 */ 573static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 574 struct ureg dest, 575 const struct ureg *mat, 576 struct ureg src) 577{ 578 struct ureg tmp; 579 580 if (dest.file != PROGRAM_TEMPORARY) 581 tmp = get_temp(p); 582 else 583 tmp = dest; 584 585 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 586 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 587 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 588 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 589 590 if (dest.file != PROGRAM_TEMPORARY) 591 release_temp(p, tmp); 592} 593 594static void emit_matrix_transform_vec3( struct tnl_program *p, 595 struct ureg dest, 596 const struct ureg *mat, 597 struct ureg src) 598{ 599 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 600 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 601 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 602} 603 604 605static void emit_normalize_vec3( struct tnl_program *p, 606 struct ureg dest, 607 struct ureg src ) 608{ 609 struct ureg tmp = get_temp(p); 610 emit_op2(p, OPCODE_DP3, tmp, 0, src, src); 611 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 612 emit_op2(p, OPCODE_MUL, dest, 0, src, tmp); 613 release_temp(p, tmp); 614} 615 616static void emit_passthrough( struct tnl_program *p, 617 GLuint input, 618 GLuint output ) 619{ 620 struct ureg out = register_output(p, output); 621 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 622} 623 624static struct ureg get_eye_position( struct tnl_program *p ) 625{ 626 if (is_undef(p->eye_position)) { 627 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 628 struct ureg modelview[4]; 629 630 p->eye_position = reserve_temp(p); 631 632 if (PREFER_DP4) { 633 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, 634 STATE_MATRIX, modelview ); 635 636 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 637 } 638 else { 639 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, 640 STATE_MATRIX_TRANSPOSE, modelview ); 641 642 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 643 } 644 } 645 646 return p->eye_position; 647} 648 649 650static struct ureg get_eye_position_normalized( struct tnl_program *p ) 651{ 652 if (is_undef(p->eye_position_normalized)) { 653 struct ureg eye = get_eye_position(p); 654 p->eye_position_normalized = reserve_temp(p); 655 emit_normalize_vec3(p, p->eye_position_normalized, eye); 656 } 657 658 return p->eye_position_normalized; 659} 660 661 662static struct ureg get_eye_normal( struct tnl_program *p ) 663{ 664 if (is_undef(p->eye_normal)) { 665 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 666 struct ureg mvinv[3]; 667 668 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 2, 669 STATE_MATRIX_INVTRANS, mvinv ); 670 671 p->eye_normal = reserve_temp(p); 672 673 /* Transform to eye space: 674 */ 675 emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal ); 676 677 /* Normalize/Rescale: 678 */ 679 if (p->state->normalize) { 680 emit_normalize_vec3( p, p->eye_normal, p->eye_normal ); 681 } 682 else if (p->state->rescale_normals) { 683 struct ureg rescale = register_param2(p, STATE_INTERNAL, 684 STATE_NORMAL_SCALE); 685 686 emit_op2( p, OPCODE_MUL, p->eye_normal, 0, normal, 687 swizzle1(rescale, X)); 688 } 689 } 690 691 return p->eye_normal; 692} 693 694 695 696static void build_hpos( struct tnl_program *p ) 697{ 698 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 699 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 700 struct ureg mvp[4]; 701 702 if (PREFER_DP4) { 703 register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, 704 STATE_MATRIX, mvp ); 705 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 706 } 707 else { 708 register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, 709 STATE_MATRIX_TRANSPOSE, mvp ); 710 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 711 } 712} 713 714 715static GLuint material_attrib( GLuint side, GLuint property ) 716{ 717 return ((property - STATE_AMBIENT) * 2 + 718 side); 719} 720 721/* Get a bitmask of which material values vary on a per-vertex basis. 722 */ 723static void set_material_flags( struct tnl_program *p ) 724{ 725 p->color_materials = 0; 726 p->materials = 0; 727 728 if (p->state->light_color_material) { 729 p->materials = 730 p->color_materials = p->state->light_color_material_mask; 731 } 732 733 p->materials |= p->state->light_material_mask; 734} 735 736 737static struct ureg get_material( struct tnl_program *p, GLuint side, 738 GLuint property ) 739{ 740 GLuint attrib = material_attrib(side, property); 741 742 if (p->color_materials & (1<<attrib)) 743 return register_input(p, VERT_ATTRIB_COLOR0); 744 else if (p->materials & (1<<attrib)) 745 return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT ); 746 else 747 return register_param3( p, STATE_MATERIAL, side, property ); 748} 749 750#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 751 MAT_BIT_FRONT_AMBIENT | \ 752 MAT_BIT_FRONT_DIFFUSE) << (side)) 753 754/* Either return a precalculated constant value or emit code to 755 * calculate these values dynamically in the case where material calls 756 * are present between begin/end pairs. 757 * 758 * Probably want to shift this to the program compilation phase - if 759 * we always emitted the calculation here, a smart compiler could 760 * detect that it was constant (given a certain set of inputs), and 761 * lift it out of the main loop. That way the programs created here 762 * would be independent of the vertex_buffer details. 763 */ 764static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 765{ 766 if (p->materials & SCENE_COLOR_BITS(side)) { 767 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 768 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 769 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 770 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 771 struct ureg tmp = make_temp(p, material_diffuse); 772 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 773 material_ambient, material_emission); 774 return tmp; 775 } 776 else 777 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 778} 779 780 781static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 782 GLuint side, GLuint property ) 783{ 784 GLuint attrib = material_attrib(side, property); 785 if (p->materials & (1<<attrib)) { 786 struct ureg light_value = 787 register_param3(p, STATE_LIGHT, light, property); 788 struct ureg material_value = get_material(p, side, property); 789 struct ureg tmp = get_temp(p); 790 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 791 return tmp; 792 } 793 else 794 return register_param4(p, STATE_LIGHTPROD, light, side, property); 795} 796 797static struct ureg calculate_light_attenuation( struct tnl_program *p, 798 GLuint i, 799 struct ureg VPpli, 800 struct ureg dist ) 801{ 802 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 803 STATE_ATTENUATION); 804 struct ureg att = get_temp(p); 805 806 /* Calculate spot attenuation: 807 */ 808 if (!p->state->unit[i].light_spotcutoff_is_180) { 809 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 810 STATE_SPOT_DIR_NORMALIZED, i); 811 struct ureg spot = get_temp(p); 812 struct ureg slt = get_temp(p); 813 814 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 815 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 816 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 817 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 818 819 release_temp(p, spot); 820 release_temp(p, slt); 821 } 822 823 /* Calculate distance attenuation: 824 */ 825 if (p->state->unit[i].light_attenuated) { 826 827 /* 1/d,d,d,1/d */ 828 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 829 /* 1,d,d*d,1/d */ 830 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 831 /* 1/dist-atten */ 832 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 833 834 if (!p->state->unit[i].light_spotcutoff_is_180) { 835 /* dist-atten */ 836 emit_op1(p, OPCODE_RCP, dist, 0, dist); 837 /* spot-atten * dist-atten */ 838 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 839 } else { 840 /* dist-atten */ 841 emit_op1(p, OPCODE_RCP, att, 0, dist); 842 } 843 } 844 845 return att; 846} 847 848 849 850 851 852/* Need to add some addtional parameters to allow lighting in object 853 * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye 854 * space lighting. 855 */ 856static void build_lighting( struct tnl_program *p ) 857{ 858 const GLboolean twoside = p->state->light_twoside; 859 const GLboolean separate = p->state->separate_specular; 860 GLuint nr_lights = 0, count = 0; 861 struct ureg normal = get_eye_normal(p); 862 struct ureg lit = get_temp(p); 863 struct ureg dots = get_temp(p); 864 struct ureg _col0 = undef, _col1 = undef; 865 struct ureg _bfc0 = undef, _bfc1 = undef; 866 GLuint i; 867 868 for (i = 0; i < MAX_LIGHTS; i++) 869 if (p->state->unit[i].light_enabled) 870 nr_lights++; 871 872 set_material_flags(p); 873 874 { 875 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 876 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 877 release_temp(p, shininess); 878 879 _col0 = make_temp(p, get_scenecolor(p, 0)); 880 if (separate) 881 _col1 = make_temp(p, get_identity_param(p)); 882 else 883 _col1 = _col0; 884 885 } 886 887 if (twoside) { 888 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 889 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 890 negate(swizzle1(shininess,X))); 891 release_temp(p, shininess); 892 893 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 894 if (separate) 895 _bfc1 = make_temp(p, get_identity_param(p)); 896 else 897 _bfc1 = _bfc0; 898 } 899 900 901 /* If no lights, still need to emit the scenecolor. 902 */ 903 { 904 struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); 905 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 906 } 907 908 if (separate) { 909 struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); 910 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 911 } 912 913 if (twoside) { 914 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); 915 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 916 } 917 918 if (twoside && separate) { 919 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); 920 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 921 } 922 923 if (nr_lights == 0) { 924 release_temps(p); 925 return; 926 } 927 928 929 for (i = 0; i < MAX_LIGHTS; i++) { 930 if (p->state->unit[i].light_enabled) { 931 struct ureg half = undef; 932 struct ureg att = undef, VPpli = undef; 933 934 count++; 935 936 if (p->state->unit[i].light_eyepos3_is_zero) { 937 /* Can used precomputed constants in this case. 938 * Attenuation never applies to infinite lights. 939 */ 940 VPpli = register_param3(p, STATE_LIGHT, i, 941 STATE_POSITION_NORMALIZED); 942 half = register_param3(p, STATE_LIGHT, i, STATE_HALF); 943 } 944 else { 945 struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 946 STATE_POSITION); 947 struct ureg V = get_eye_position(p); 948 struct ureg dist = get_temp(p); 949 950 VPpli = get_temp(p); 951 half = get_temp(p); 952 953 /* Calulate VPpli vector 954 */ 955 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 956 957 /* Normalize VPpli. The dist value also used in 958 * attenuation below. 959 */ 960 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 961 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 962 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 963 964 965 /* Calculate attenuation: 966 */ 967 if (!p->state->unit[i].light_spotcutoff_is_180 || 968 p->state->unit[i].light_attenuated) { 969 att = calculate_light_attenuation(p, i, VPpli, dist); 970 } 971 972 973 /* Calculate viewer direction, or use infinite viewer: 974 */ 975 if (p->state->light_local_viewer) { 976 struct ureg eye_hat = get_eye_position_normalized(p); 977 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 978 } 979 else { 980 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 981 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 982 } 983 984 emit_normalize_vec3(p, half, half); 985 986 release_temp(p, dist); 987 } 988 989 /* Calculate dot products: 990 */ 991 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 992 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 993 994 995 /* Front face lighting: 996 */ 997 { 998 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 999 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1000 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1001 struct ureg res0, res1; 1002 GLuint mask0, mask1; 1003 1004 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1005 1006 if (!is_undef(att)) 1007 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1008 1009 1010 if (count == nr_lights) { 1011 if (separate) { 1012 mask0 = WRITEMASK_XYZ; 1013 mask1 = WRITEMASK_XYZ; 1014 res0 = register_output( p, VERT_RESULT_COL0 ); 1015 res1 = register_output( p, VERT_RESULT_COL1 ); 1016 } 1017 else { 1018 mask0 = 0; 1019 mask1 = WRITEMASK_XYZ; 1020 res0 = _col0; 1021 res1 = register_output( p, VERT_RESULT_COL0 ); 1022 } 1023 } else { 1024 mask0 = 0; 1025 mask1 = 0; 1026 res0 = _col0; 1027 res1 = _col1; 1028 } 1029 1030 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1031 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1032 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1033 1034 release_temp(p, ambient); 1035 release_temp(p, diffuse); 1036 release_temp(p, specular); 1037 } 1038 1039 /* Back face lighting: 1040 */ 1041 if (twoside) { 1042 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1043 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1044 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1045 struct ureg res0, res1; 1046 GLuint mask0, mask1; 1047 1048 emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z))); 1049 1050 if (!is_undef(att)) 1051 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1052 1053 if (count == nr_lights) { 1054 if (separate) { 1055 mask0 = WRITEMASK_XYZ; 1056 mask1 = WRITEMASK_XYZ; 1057 res0 = register_output( p, VERT_RESULT_BFC0 ); 1058 res1 = register_output( p, VERT_RESULT_BFC1 ); 1059 } 1060 else { 1061 mask0 = 0; 1062 mask1 = WRITEMASK_XYZ; 1063 res0 = _bfc0; 1064 res1 = register_output( p, VERT_RESULT_BFC0 ); 1065 } 1066 } else { 1067 res0 = _bfc0; 1068 res1 = _bfc1; 1069 mask0 = 0; 1070 mask1 = 0; 1071 } 1072 1073 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1074 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1075 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1076 1077 release_temp(p, ambient); 1078 release_temp(p, diffuse); 1079 release_temp(p, specular); 1080 } 1081 1082 release_temp(p, half); 1083 release_temp(p, VPpli); 1084 release_temp(p, att); 1085 } 1086 } 1087 1088 release_temps( p ); 1089} 1090 1091 1092static void build_fog( struct tnl_program *p ) 1093{ 1094 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 1095 struct ureg input; 1096 1097 if (p->state->fog_source_is_depth) { 1098 input = swizzle1(get_eye_position(p), Z); 1099 } 1100 else { 1101 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1102 } 1103 1104 if (p->state->tnl_do_vertex_fog) { 1105 struct ureg params = register_param1(p, STATE_FOG_PARAMS_OPTIMIZED); 1106 struct ureg tmp = get_temp(p); 1107 1108 switch (p->state->fog_mode) { 1109 case FOG_LINEAR: { 1110 struct ureg id = get_identity_param(p); 1111 emit_op3(p, OPCODE_MAD, tmp, 0, input, swizzle1(params,X), swizzle1(params,Y)); 1112 emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ 1113 emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); 1114 break; 1115 } 1116 case FOG_EXP: 1117 emit_op1(p, OPCODE_ABS, tmp, 0, input); 1118 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z)); 1119 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp)); 1120 break; 1121 case FOG_EXP2: 1122 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W)); 1123 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 1124 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp)); 1125 break; 1126 } 1127 1128 release_temp(p, tmp); 1129 } 1130 else { 1131 /* results = incoming fog coords (compute fog per-fragment later) 1132 * 1133 * KW: Is it really necessary to do anything in this case? 1134 */ 1135 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input); 1136 } 1137} 1138 1139static void build_reflect_texgen( struct tnl_program *p, 1140 struct ureg dest, 1141 GLuint writemask ) 1142{ 1143 struct ureg normal = get_eye_normal(p); 1144 struct ureg eye_hat = get_eye_position_normalized(p); 1145 struct ureg tmp = get_temp(p); 1146 1147 /* n.u */ 1148 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1149 /* 2n.u */ 1150 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1151 /* (-2n.u)n + u */ 1152 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1153 1154 release_temp(p, tmp); 1155} 1156 1157static void build_sphere_texgen( struct tnl_program *p, 1158 struct ureg dest, 1159 GLuint writemask ) 1160{ 1161 struct ureg normal = get_eye_normal(p); 1162 struct ureg eye_hat = get_eye_position_normalized(p); 1163 struct ureg tmp = get_temp(p); 1164 struct ureg half = register_scalar_const(p, .5); 1165 struct ureg r = get_temp(p); 1166 struct ureg inv_m = get_temp(p); 1167 struct ureg id = get_identity_param(p); 1168 1169 /* Could share the above calculations, but it would be 1170 * a fairly odd state for someone to set (both sphere and 1171 * reflection active for different texture coordinate 1172 * components. Of course - if two texture units enable 1173 * reflect and/or sphere, things start to tilt in favour 1174 * of seperating this out: 1175 */ 1176 1177 /* n.u */ 1178 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1179 /* 2n.u */ 1180 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1181 /* (-2n.u)n + u */ 1182 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1183 /* r + 0,0,1 */ 1184 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1185 /* rx^2 + ry^2 + (rz+1)^2 */ 1186 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1187 /* 2/m */ 1188 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1189 /* 1/m */ 1190 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1191 /* r/m + 1/2 */ 1192 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1193 1194 release_temp(p, tmp); 1195 release_temp(p, r); 1196 release_temp(p, inv_m); 1197} 1198 1199 1200static void build_texture_transform( struct tnl_program *p ) 1201{ 1202 GLuint i, j; 1203 1204 for (i = 0; i < MAX_TEXTURE_UNITS; i++) { 1205 1206 if (!(p->state->fragprog_inputs_read & (FRAG_BIT_TEX0<<i))) 1207 continue; 1208 1209 if (p->state->unit[i].texgen_enabled || 1210 p->state->unit[i].texmat_enabled) { 1211 1212 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1213 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 1214 struct ureg out_texgen = undef; 1215 1216 if (p->state->unit[i].texgen_enabled) { 1217 GLuint copy_mask = 0; 1218 GLuint sphere_mask = 0; 1219 GLuint reflect_mask = 0; 1220 GLuint normal_mask = 0; 1221 GLuint modes[4]; 1222 1223 if (texmat_enabled) 1224 out_texgen = get_temp(p); 1225 else 1226 out_texgen = out; 1227 1228 modes[0] = p->state->unit[i].texgen_mode0; 1229 modes[1] = p->state->unit[i].texgen_mode1; 1230 modes[2] = p->state->unit[i].texgen_mode2; 1231 modes[3] = p->state->unit[i].texgen_mode3; 1232 1233 for (j = 0; j < 4; j++) { 1234 switch (modes[j]) { 1235 case TXG_OBJ_LINEAR: { 1236 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1237 struct ureg plane = 1238 register_param3(p, STATE_TEXGEN, i, 1239 STATE_TEXGEN_OBJECT_S + j); 1240 1241 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1242 obj, plane ); 1243 break; 1244 } 1245 case TXG_EYE_LINEAR: { 1246 struct ureg eye = get_eye_position(p); 1247 struct ureg plane = 1248 register_param3(p, STATE_TEXGEN, i, 1249 STATE_TEXGEN_EYE_S + j); 1250 1251 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1252 eye, plane ); 1253 break; 1254 } 1255 case TXG_SPHERE_MAP: 1256 sphere_mask |= WRITEMASK_X << j; 1257 break; 1258 case TXG_REFLECTION_MAP: 1259 reflect_mask |= WRITEMASK_X << j; 1260 break; 1261 case TXG_NORMAL_MAP: 1262 normal_mask |= WRITEMASK_X << j; 1263 break; 1264 case TXG_NONE: 1265 copy_mask |= WRITEMASK_X << j; 1266 } 1267 1268 } 1269 1270 1271 if (sphere_mask) { 1272 build_sphere_texgen(p, out_texgen, sphere_mask); 1273 } 1274 1275 if (reflect_mask) { 1276 build_reflect_texgen(p, out_texgen, reflect_mask); 1277 } 1278 1279 if (normal_mask) { 1280 struct ureg normal = get_eye_normal(p); 1281 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1282 } 1283 1284 if (copy_mask) { 1285 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1286 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1287 } 1288 } 1289 1290 if (texmat_enabled) { 1291 struct ureg texmat[4]; 1292 struct ureg in = (!is_undef(out_texgen) ? 1293 out_texgen : 1294 register_input(p, VERT_ATTRIB_TEX0+i)); 1295 if (PREFER_DP4) { 1296 register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, 1297 0, 3, STATE_MATRIX, texmat ); 1298 emit_matrix_transform_vec4( p, out, texmat, in ); 1299 } 1300 else { 1301 register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, 1302 0, 3, STATE_MATRIX_TRANSPOSE, texmat ); 1303 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1304 } 1305 } 1306 1307 release_temps(p); 1308 } 1309 else { 1310 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); 1311 } 1312 } 1313} 1314 1315 1316static void build_pointsize( struct tnl_program *p ) 1317{ 1318 struct ureg eye = get_eye_position(p); 1319 struct ureg state_size = register_param1(p, STATE_POINT_SIZE); 1320 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1321 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1322 struct ureg ut = get_temp(p); 1323 1324 /* p1 + dist * (p2 + dist * p3); */ 1325 emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)), 1326 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1327 emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)), 1328 ut, swizzle1(state_attenuation, X)); 1329 1330 /* 1 / sqrt(factor) */ 1331 emit_op1(p, OPCODE_RSQ, ut, 0, ut ); 1332 1333#if 1 1334 /* out = pointSize / sqrt(factor) */ 1335 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1336#else 1337 /* not sure, might make sense to do clamping here, 1338 but it's not done in t_vb_points neither */ 1339 emit_op2(p, OPCODE_MUL, ut, 0, ut, state_size); 1340 emit_op2(p, OPCODE_MAX, ut, 0, ut, swizzle1(state_size, Y)); 1341 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1342#endif 1343 1344 release_temp(p, ut); 1345} 1346 1347static void build_tnl_program( struct tnl_program *p ) 1348{ /* Emit the program, starting with modelviewproject: 1349 */ 1350 build_hpos(p); 1351 1352 /* Lighting calculations: 1353 */ 1354 if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { 1355 if (p->state->light_global_enabled) 1356 build_lighting(p); 1357 else { 1358 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) 1359 emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); 1360 1361 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) 1362 emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1); 1363 } 1364 } 1365 1366 if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) || 1367 p->state->fog_mode != FOG_NONE) 1368 build_fog(p); 1369 1370 if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) 1371 build_texture_transform(p); 1372 1373 if (p->state->point_attenuated) 1374 build_pointsize(p); 1375 1376 /* Finish up: 1377 */ 1378 emit_op1(p, OPCODE_END, undef, 0, undef); 1379 1380 /* Disassemble: 1381 */ 1382 if (DISASSEM) { 1383 _mesa_printf ("\n"); 1384 } 1385} 1386 1387 1388static void 1389create_new_program( const struct state_key *key, 1390 struct gl_vertex_program *program, 1391 GLuint max_temps) 1392{ 1393 struct tnl_program p; 1394 1395 _mesa_memset(&p, 0, sizeof(p)); 1396 p.state = key; 1397 p.program = program; 1398 p.eye_position = undef; 1399 p.eye_position_normalized = undef; 1400 p.eye_normal = undef; 1401 p.identity = undef; 1402 p.temp_in_use = 0; 1403 1404 if (max_temps >= sizeof(int) * 8) 1405 p.temp_reserved = 0; 1406 else 1407 p.temp_reserved = ~((1<<max_temps)-1); 1408 1409 p.program->Base.Instructions 1410 = (struct prog_instruction*) MALLOC(sizeof(struct prog_instruction) * MAX_INSN); 1411 p.program->Base.String = 0; 1412 p.program->Base.NumInstructions = 1413 p.program->Base.NumTemporaries = 1414 p.program->Base.NumParameters = 1415 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1416 p.program->Base.Parameters = _mesa_new_parameter_list(); 1417 p.program->Base.InputsRead = 0; 1418 p.program->Base.OutputsWritten = 0; 1419 1420 build_tnl_program( &p ); 1421} 1422 1423static void *search_cache( struct tnl_cache *cache, 1424 GLuint hash, 1425 const void *key, 1426 GLuint keysize) 1427{ 1428 struct tnl_cache_item *c; 1429 1430 for (c = cache->items[hash % cache->size]; c; c = c->next) { 1431 if (c->hash == hash && _mesa_memcmp(c->key, key, keysize) == 0) 1432 return c->data; 1433 } 1434 1435 return NULL; 1436} 1437 1438static void rehash( struct tnl_cache *cache ) 1439{ 1440 struct tnl_cache_item **items; 1441 struct tnl_cache_item *c, *next; 1442 GLuint size, i; 1443 1444 size = cache->size * 3; 1445 items = (struct tnl_cache_item**) _mesa_malloc(size * sizeof(*items)); 1446 _mesa_memset(items, 0, size * sizeof(*items)); 1447 1448 for (i = 0; i < cache->size; i++) 1449 for (c = cache->items[i]; c; c = next) { 1450 next = c->next; 1451 c->next = items[c->hash % size]; 1452 items[c->hash % size] = c; 1453 } 1454 1455 FREE(cache->items); 1456 cache->items = items; 1457 cache->size = size; 1458} 1459 1460static void cache_item( struct tnl_cache *cache, 1461 GLuint hash, 1462 void *key, 1463 void *data ) 1464{ 1465 struct tnl_cache_item *c = (struct tnl_cache_item*) _mesa_malloc(sizeof(*c)); 1466 c->hash = hash; 1467 c->key = key; 1468 c->data = data; 1469 1470 if (++cache->n_items > cache->size * 1.5) 1471 rehash(cache); 1472 1473 c->next = cache->items[hash % cache->size]; 1474 cache->items[hash % cache->size] = c; 1475} 1476 1477static GLuint hash_key( struct state_key *key ) 1478{ 1479 GLuint *ikey = (GLuint *)key; 1480 GLuint hash = 0, i; 1481 1482 /* I'm sure this can be improved on, but speed is important: 1483 */ 1484 for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++) 1485 hash ^= ikey[i]; 1486 1487 return hash; 1488} 1489 1490void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) 1491{ 1492 TNLcontext *tnl = TNL_CONTEXT(ctx); 1493 struct state_key *key; 1494 GLuint hash; 1495 const struct gl_vertex_program *prev = ctx->VertexProgram._Current; 1496 1497 if (ctx->VertexProgram._Enabled == GL_FALSE) { 1498 /* Grab all the relevent state and put it in a single structure: 1499 */ 1500 key = make_state_key(ctx); 1501 hash = hash_key(key); 1502 1503 /* Look for an already-prepared program for this state: 1504 */ 1505 ctx->_TnlProgram = (struct gl_vertex_program *) 1506 search_cache( tnl->vp_cache, hash, key, sizeof(*key) ); 1507 1508 /* OK, we'll have to build a new one: 1509 */ 1510 if (!ctx->_TnlProgram) { 1511 if (0) 1512 _mesa_printf("Build new TNL program\n"); 1513 1514 ctx->_TnlProgram = (struct gl_vertex_program *) 1515 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 1516 1517 create_new_program( key, ctx->_TnlProgram, 1518 ctx->Const.VertexProgram.MaxTemps ); 1519 1520 if (ctx->Driver.ProgramStringNotify) 1521 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1522 &ctx->_TnlProgram->Base ); 1523 1524 cache_item(tnl->vp_cache, hash, key, ctx->_TnlProgram ); 1525 } 1526 else { 1527 FREE(key); 1528 if (0) 1529 _mesa_printf("Found existing TNL program for key %x\n", hash); 1530 } 1531 ctx->VertexProgram._Current = ctx->_TnlProgram; 1532 } 1533 else { 1534 ctx->VertexProgram._Current = ctx->VertexProgram.Current; 1535 } 1536 1537 /* Tell the driver about the change. Could define a new target for 1538 * this? 1539 */ 1540 if (ctx->VertexProgram._Current != prev && 1541 ctx->Driver.BindProgram) 1542 ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, 1543 (struct gl_program *) ctx->VertexProgram._Current); 1544} 1545 1546void _tnl_ProgramCacheInit( GLcontext *ctx ) 1547{ 1548 TNLcontext *tnl = TNL_CONTEXT(ctx); 1549 1550 tnl->vp_cache = (struct tnl_cache *) MALLOC(sizeof(*tnl->vp_cache)); 1551 tnl->vp_cache->size = 17; 1552 tnl->vp_cache->n_items = 0; 1553 tnl->vp_cache->items = (struct tnl_cache_item**) 1554 _mesa_calloc(tnl->vp_cache->size * sizeof(*tnl->vp_cache->items)); 1555} 1556 1557void _tnl_ProgramCacheDestroy( GLcontext *ctx ) 1558{ 1559 TNLcontext *tnl = TNL_CONTEXT(ctx); 1560 struct tnl_cache_item *c, *next; 1561 GLuint i; 1562 1563 for (i = 0; i < tnl->vp_cache->size; i++) 1564 for (c = tnl->vp_cache->items[i]; c; c = next) { 1565 next = c->next; 1566 FREE(c->key); 1567 FREE(c->data); 1568 FREE(c); 1569 } 1570 1571 FREE(tnl->vp_cache->items); 1572 FREE(tnl->vp_cache); 1573} 1574