t_vp_build.c revision 37f2eaa316d507b729ca392b651ae18ef92efcac
1/* 2 * Mesa 3-D graphics library 3 * Version: 6.3.1 4 * 5 * Copyright (C) 2005 Tungsten Graphics All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 21 * WHETHER IN 22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/** 27 * \file t_vp_build.c 28 * Create a vertex program to execute the current fixed function T&L pipeline. 29 * \author Keith Whitwell 30 */ 31 32 33#include "glheader.h" 34#include "macros.h" 35#include "enums.h" 36#include "t_context.h" 37#include "t_vp_build.h" 38 39#include "shader/program.h" 40#include "shader/nvvertprog.h" 41#include "shader/arbvertparse.h" 42 43struct state_key { 44 unsigned light_global_enabled:1; 45 unsigned light_local_viewer:1; 46 unsigned light_twoside:1; 47 unsigned light_color_material:1; 48 unsigned light_color_material_mask:12; 49 unsigned light_material_mask:12; 50 51 unsigned normalize:1; 52 unsigned rescale_normals:1; 53 unsigned fog_source_is_depth:1; 54 unsigned tnl_do_vertex_fog:1; 55 unsigned separate_specular:1; 56 unsigned fog_enabled:1; 57 unsigned fog_mode:2; 58 unsigned point_attenuated:1; 59 unsigned texture_enabled_global:1; 60 61 struct { 62 unsigned light_enabled:1; 63 unsigned light_eyepos3_is_zero:1; 64 unsigned light_spotcutoff_is_180:1; 65 unsigned light_attenuated:1; 66 unsigned texunit_really_enabled:1; 67 unsigned texmat_enabled:1; 68 unsigned texgen_enabled:4; 69 unsigned texgen_mode0:4; 70 unsigned texgen_mode1:4; 71 unsigned texgen_mode2:4; 72 unsigned texgen_mode3:4; 73 } unit[8]; 74}; 75 76 77 78#define FOG_LINEAR 0 79#define FOG_EXP 1 80#define FOG_EXP2 2 81#define FOG_UNKNOWN 3 82 83static GLuint translate_fog_mode( GLenum mode ) 84{ 85 switch (mode) { 86 case GL_LINEAR: return FOG_LINEAR; 87 case GL_EXP: return FOG_EXP; 88 case GL_EXP2: return FOG_EXP2; 89 default: return FOG_UNKNOWN; 90 } 91} 92 93#define TXG_NONE 0 94#define TXG_OBJ_LINEAR 1 95#define TXG_EYE_LINEAR 2 96#define TXG_SPHERE_MAP 3 97#define TXG_REFLECTION_MAP 4 98#define TXG_NORMAL_MAP 5 99 100static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 101{ 102 if (!enabled) 103 return TXG_NONE; 104 105 switch (mode) { 106 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 107 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 108 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 109 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 110 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 111 default: return TXG_NONE; 112 } 113} 114 115static struct state_key *make_state_key( GLcontext *ctx ) 116{ 117 TNLcontext *tnl = TNL_CONTEXT(ctx); 118 struct vertex_buffer *VB = &tnl->vb; 119 struct state_key *key = CALLOC_STRUCT(state_key); 120 GLuint i; 121 122 key->separate_specular = (ctx->Light.Model.ColorControl == 123 GL_SEPARATE_SPECULAR_COLOR); 124 125 if (ctx->Light.Enabled) { 126 key->light_global_enabled = 1; 127 128 if (ctx->Light.Model.LocalViewer) 129 key->light_local_viewer = 1; 130 131 if (ctx->Light.Model.TwoSide) 132 key->light_twoside = 1; 133 134 if (ctx->Light.ColorMaterialEnabled) { 135 key->light_color_material = 1; 136 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask; 137 } 138 139 for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT ; i < _TNL_ATTRIB_INDEX ; i++) 140 if (VB->AttribPtr[i]->stride) 141 key->light_material_mask |= 1<<(i-_TNL_ATTRIB_MAT_FRONT_AMBIENT); 142 143 for (i = 0; i < MAX_LIGHTS; i++) { 144 struct gl_light *light = &ctx->Light.Light[i]; 145 146 if (light->Enabled) { 147 key->unit[i].light_enabled = 1; 148 149 if (light->EyePosition[3] == 0.0) 150 key->unit[i].light_eyepos3_is_zero = 1; 151 152 if (light->SpotCutoff == 180.0) 153 key->unit[i].light_spotcutoff_is_180 = 1; 154 155 if (light->ConstantAttenuation != 1.0 || 156 light->LinearAttenuation != 0.0 || 157 light->QuadraticAttenuation != 0.0) 158 key->unit[i].light_attenuated = 1; 159 } 160 } 161 } 162 163 if (ctx->Transform.Normalize) 164 key->normalize = 1; 165 166 if (ctx->Transform.RescaleNormals) 167 key->rescale_normals = 1; 168 169 if (ctx->Fog.Enabled) 170 key->fog_enabled = 1; 171 172 if (key->fog_enabled) { 173 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) 174 key->fog_source_is_depth = 1; 175 176 if (tnl->_DoVertexFog) 177 key->tnl_do_vertex_fog = 1; 178 179 key->fog_mode = translate_fog_mode(ctx->Fog.Mode); 180 } 181 182 if (ctx->Point._Attenuated) 183 key->point_attenuated = 1; 184 185 if (ctx->Texture._TexGenEnabled || 186 ctx->Texture._TexMatEnabled || 187 ctx->Texture._EnabledUnits) 188 key->texture_enabled_global = 1; 189 190 for (i = 0; i < MAX_TEXTURE_UNITS; i++) { 191 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 192 193 if (texUnit->_ReallyEnabled) 194 key->unit[i].texunit_really_enabled = 1; 195 196 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 197 key->unit[i].texmat_enabled = 1; 198 199 if (texUnit->TexGenEnabled) { 200 key->unit[i].texgen_enabled = 1; 201 202 key->unit[i].texgen_mode0 = 203 translate_texgen( texUnit->TexGenEnabled & (1<<0), 204 texUnit->GenModeS ); 205 key->unit[i].texgen_mode1 = 206 translate_texgen( texUnit->TexGenEnabled & (1<<1), 207 texUnit->GenModeT ); 208 key->unit[i].texgen_mode2 = 209 translate_texgen( texUnit->TexGenEnabled & (1<<2), 210 texUnit->GenModeR ); 211 key->unit[i].texgen_mode3 = 212 translate_texgen( texUnit->TexGenEnabled & (1<<3), 213 texUnit->GenModeQ ); 214 } 215 } 216 217 return key; 218} 219 220 221 222/* Very useful debugging tool - produces annotated listing of 223 * generated program with line/function references for each 224 * instruction back into this file: 225 */ 226#define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM) 227 228/* Should be tunable by the driver - do we want to do matrix 229 * multiplications with DP4's or with MUL/MAD's? SSE works better 230 * with the latter, drivers may differ. 231 */ 232#define PREFER_DP4 0 233 234#define MAX_INSN 200 235 236/* Use uregs to represent registers internally, translate to Mesa's 237 * expected formats on emit. 238 * 239 * NOTE: These are passed by value extensively in this file rather 240 * than as usual by pointer reference. If this disturbs you, try 241 * remembering they are just 32bits in size. 242 * 243 * GCC is smart enough to deal with these dword-sized structures in 244 * much the same way as if I had defined them as dwords and was using 245 * macros to access and set the fields. This is much nicer and easier 246 * to evolve. 247 */ 248struct ureg { 249 GLuint file:4; 250 GLint idx:8; /* relative addressing may be negative */ 251 GLuint negate:1; 252 GLuint swz:12; 253 GLuint pad:7; 254}; 255 256 257struct tnl_program { 258 const struct state_key *state; 259 struct vertex_program *program; 260 261 GLuint temp_in_use; 262 GLuint temp_reserved; 263 264 struct ureg eye_position; 265 struct ureg eye_position_normalized; 266 struct ureg eye_normal; 267 struct ureg identity; 268 269 GLuint materials; 270 GLuint color_materials; 271}; 272 273 274const static struct ureg undef = { 275 ~0, 276 ~0, 277 0, 278 0, 279 0 280}; 281 282/* Local shorthand: 283 */ 284#define X SWIZZLE_X 285#define Y SWIZZLE_Y 286#define Z SWIZZLE_Z 287#define W SWIZZLE_W 288 289 290/* Construct a ureg: 291 */ 292static struct ureg make_ureg(GLuint file, GLint idx) 293{ 294 struct ureg reg; 295 reg.file = file; 296 reg.idx = idx; 297 reg.negate = 0; 298 reg.swz = SWIZZLE_NOOP; 299 reg.pad = 0; 300 return reg; 301} 302 303 304 305static struct ureg negate( struct ureg reg ) 306{ 307 reg.negate ^= 1; 308 return reg; 309} 310 311 312static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 313{ 314 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 315 GET_SWZ(reg.swz, y), 316 GET_SWZ(reg.swz, z), 317 GET_SWZ(reg.swz, w)); 318 319 return reg; 320} 321 322static struct ureg swizzle1( struct ureg reg, int x ) 323{ 324 return swizzle(reg, x, x, x, x); 325} 326 327static struct ureg get_temp( struct tnl_program *p ) 328{ 329 int bit = _mesa_ffs( ~p->temp_in_use ); 330 if (!bit) { 331 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 332 _mesa_exit(1); 333 } 334 335 p->temp_in_use |= 1<<(bit-1); 336 return make_ureg(PROGRAM_TEMPORARY, bit-1); 337} 338 339static struct ureg reserve_temp( struct tnl_program *p ) 340{ 341 struct ureg temp = get_temp( p ); 342 p->temp_reserved |= 1<<temp.idx; 343 return temp; 344} 345 346static void release_temp( struct tnl_program *p, struct ureg reg ) 347{ 348 if (reg.file == PROGRAM_TEMPORARY) { 349 p->temp_in_use &= ~(1<<reg.idx); 350 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 351 } 352} 353 354static void release_temps( struct tnl_program *p ) 355{ 356 p->temp_in_use = p->temp_reserved; 357} 358 359 360 361static struct ureg register_input( struct tnl_program *p, GLuint input ) 362{ 363 p->program->InputsRead |= (1<<input); 364 return make_ureg(PROGRAM_INPUT, input); 365} 366 367static struct ureg register_output( struct tnl_program *p, GLuint output ) 368{ 369 p->program->OutputsWritten |= (1<<output); 370 return make_ureg(PROGRAM_OUTPUT, output); 371} 372 373static struct ureg register_const4f( struct tnl_program *p, 374 GLfloat s0, 375 GLfloat s1, 376 GLfloat s2, 377 GLfloat s3) 378{ 379 GLfloat values[4]; 380 GLint idx; 381 values[0] = s0; 382 values[1] = s1; 383 values[2] = s2; 384 values[3] = s3; 385 idx = _mesa_add_unnamed_constant( p->program->Parameters, values ); 386 return make_ureg(PROGRAM_STATE_VAR, idx); 387} 388 389#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 390#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 391#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 392#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 393 394static GLboolean is_undef( struct ureg reg ) 395{ 396 return reg.file == 0xf; 397} 398 399static struct ureg get_identity_param( struct tnl_program *p ) 400{ 401 if (is_undef(p->identity)) 402 p->identity = register_const4f(p, 0,0,0,1); 403 404 return p->identity; 405} 406 407static struct ureg register_param6( struct tnl_program *p, 408 GLint s0, 409 GLint s1, 410 GLint s2, 411 GLint s3, 412 GLint s4, 413 GLint s5) 414{ 415 GLint tokens[6]; 416 GLint idx; 417 tokens[0] = s0; 418 tokens[1] = s1; 419 tokens[2] = s2; 420 tokens[3] = s3; 421 tokens[4] = s4; 422 tokens[5] = s5; 423 idx = _mesa_add_state_reference( p->program->Parameters, tokens ); 424 return make_ureg(PROGRAM_STATE_VAR, idx); 425} 426 427 428#define register_param1(p,s0) register_param6(p,s0,0,0,0,0,0) 429#define register_param2(p,s0,s1) register_param6(p,s0,s1,0,0,0,0) 430#define register_param3(p,s0,s1,s2) register_param6(p,s0,s1,s2,0,0,0) 431#define register_param4(p,s0,s1,s2,s3) register_param6(p,s0,s1,s2,s3,0,0) 432 433 434static void register_matrix_param6( struct tnl_program *p, 435 GLint s0, 436 GLint s1, 437 GLint s2, 438 GLint s3, 439 GLint s4, 440 GLint s5, 441 struct ureg *matrix ) 442{ 443 GLuint i; 444 445 /* This is a bit sad as the support is there to pull the whole 446 * matrix out in one go: 447 */ 448 for (i = 0; i <= s4 - s3; i++) 449 matrix[i] = register_param6( p, s0, s1, s2, i, i, s5 ); 450} 451 452 453static void emit_arg( struct vp_src_register *src, 454 struct ureg reg ) 455{ 456 src->File = reg.file; 457 src->Index = reg.idx; 458 src->Swizzle = reg.swz; 459 src->Negate = reg.negate; 460 src->RelAddr = 0; 461 src->pad = 0; 462} 463 464static void emit_dst( struct vp_dst_register *dst, 465 struct ureg reg, GLuint mask ) 466{ 467 dst->File = reg.file; 468 dst->Index = reg.idx; 469 /* allow zero as a shorthand for xyzw */ 470 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 471 dst->pad = 0; 472} 473 474static void debug_insn( struct vp_instruction *inst, const char *fn, 475 GLuint line ) 476{ 477 if (DISASSEM) { 478 static const char *last_fn; 479 480 if (fn != last_fn) { 481 last_fn = fn; 482 _mesa_printf("%s:\n", fn); 483 } 484 485 _mesa_printf("%d:\t", line); 486 _mesa_debug_vp_inst(1, inst); 487 } 488} 489 490 491static void emit_op3fn(struct tnl_program *p, 492 GLuint op, 493 struct ureg dest, 494 GLuint mask, 495 struct ureg src0, 496 struct ureg src1, 497 struct ureg src2, 498 const char *fn, 499 GLuint line) 500{ 501 GLuint nr = p->program->Base.NumInstructions++; 502 struct vp_instruction *inst = &p->program->Instructions[nr]; 503 504 if (p->program->Base.NumInstructions > MAX_INSN) { 505 _mesa_problem(0, "Out of instructions in emit_op3fn\n"); 506 return; 507 } 508 509 inst->Opcode = op; 510 inst->StringPos = 0; 511 inst->Data = 0; 512 513 emit_arg( &inst->SrcReg[0], src0 ); 514 emit_arg( &inst->SrcReg[1], src1 ); 515 emit_arg( &inst->SrcReg[2], src2 ); 516 517 emit_dst( &inst->DstReg, dest, mask ); 518 519 debug_insn(inst, fn, line); 520} 521 522 523#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 524 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 525 526#define emit_op2(p, op, dst, mask, src0, src1) \ 527 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 528 529#define emit_op1(p, op, dst, mask, src0) \ 530 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 531 532 533static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 534{ 535 if (reg.file == PROGRAM_TEMPORARY && 536 !(p->temp_reserved & (1<<reg.idx))) 537 return reg; 538 else { 539 struct ureg temp = get_temp(p); 540 emit_op1(p, VP_OPCODE_MOV, temp, 0, reg); 541 return temp; 542 } 543} 544 545 546/* Currently no tracking performed of input/output/register size or 547 * active elements. Could be used to reduce these operations, as 548 * could the matrix type. 549 */ 550static void emit_matrix_transform_vec4( struct tnl_program *p, 551 struct ureg dest, 552 const struct ureg *mat, 553 struct ureg src) 554{ 555 emit_op2(p, VP_OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 556 emit_op2(p, VP_OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 557 emit_op2(p, VP_OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 558 emit_op2(p, VP_OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 559} 560 561/* This version is much easier to implement if writemasks are not 562 * supported natively on the target or (like SSE), the target doesn't 563 * have a clean/obvious dotproduct implementation. 564 */ 565static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 566 struct ureg dest, 567 const struct ureg *mat, 568 struct ureg src) 569{ 570 struct ureg tmp; 571 572 if (dest.file != PROGRAM_TEMPORARY) 573 tmp = get_temp(p); 574 else 575 tmp = dest; 576 577 emit_op2(p, VP_OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 578 emit_op3(p, VP_OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 579 emit_op3(p, VP_OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 580 emit_op3(p, VP_OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 581 582 if (dest.file != PROGRAM_TEMPORARY) 583 release_temp(p, tmp); 584} 585 586static void emit_matrix_transform_vec3( struct tnl_program *p, 587 struct ureg dest, 588 const struct ureg *mat, 589 struct ureg src) 590{ 591 emit_op2(p, VP_OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 592 emit_op2(p, VP_OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 593 emit_op2(p, VP_OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 594} 595 596 597static void emit_normalize_vec3( struct tnl_program *p, 598 struct ureg dest, 599 struct ureg src ) 600{ 601 struct ureg tmp = get_temp(p); 602 emit_op2(p, VP_OPCODE_DP3, tmp, 0, src, src); 603 emit_op1(p, VP_OPCODE_RSQ, tmp, 0, tmp); 604 emit_op2(p, VP_OPCODE_MUL, dest, 0, src, tmp); 605 release_temp(p, tmp); 606} 607 608static void emit_passthrough( struct tnl_program *p, 609 GLuint input, 610 GLuint output ) 611{ 612 struct ureg out = register_output(p, output); 613 emit_op1(p, VP_OPCODE_MOV, out, 0, register_input(p, input)); 614} 615 616static struct ureg get_eye_position( struct tnl_program *p ) 617{ 618 if (is_undef(p->eye_position)) { 619 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 620 struct ureg modelview[4]; 621 622 p->eye_position = reserve_temp(p); 623 624 if (PREFER_DP4) { 625 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, 626 STATE_MATRIX, modelview ); 627 628 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 629 } 630 else { 631 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 3, 632 STATE_MATRIX_TRANSPOSE, modelview ); 633 634 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 635 } 636 } 637 638 return p->eye_position; 639} 640 641 642static struct ureg get_eye_position_normalized( struct tnl_program *p ) 643{ 644 if (is_undef(p->eye_position_normalized)) { 645 struct ureg eye = get_eye_position(p); 646 p->eye_position_normalized = reserve_temp(p); 647 emit_normalize_vec3(p, p->eye_position_normalized, eye); 648 } 649 650 return p->eye_position_normalized; 651} 652 653 654static struct ureg get_eye_normal( struct tnl_program *p ) 655{ 656 if (is_undef(p->eye_normal)) { 657 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 658 struct ureg mvinv[3]; 659 660 register_matrix_param6( p, STATE_MATRIX, STATE_MODELVIEW, 0, 0, 2, 661 STATE_MATRIX_INVTRANS, mvinv ); 662 663 p->eye_normal = reserve_temp(p); 664 665 /* Transform to eye space: 666 */ 667 emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal ); 668 669 /* Normalize/Rescale: 670 */ 671 if (p->state->normalize) { 672 emit_normalize_vec3( p, p->eye_normal, p->eye_normal ); 673 } 674 else if (p->state->rescale_normals) { 675 struct ureg rescale = register_param2(p, STATE_INTERNAL, 676 STATE_NORMAL_SCALE); 677 678 emit_op2( p, VP_OPCODE_MUL, p->eye_normal, 0, normal, 679 swizzle1(rescale, X)); 680 } 681 } 682 683 return p->eye_normal; 684} 685 686 687 688static void build_hpos( struct tnl_program *p ) 689{ 690 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 691 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 692 struct ureg mvp[4]; 693 694 if (PREFER_DP4) { 695 register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, 696 STATE_MATRIX, mvp ); 697 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 698 } 699 else { 700 register_matrix_param6( p, STATE_MATRIX, STATE_MVP, 0, 0, 3, 701 STATE_MATRIX_TRANSPOSE, mvp ); 702 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 703 } 704} 705 706 707static GLuint material_attrib( GLuint side, GLuint property ) 708{ 709 return ((property - STATE_AMBIENT) * 2 + 710 side); 711} 712 713static void set_material_flags( struct tnl_program *p ) 714{ 715 p->color_materials = 0; 716 p->materials = 0; 717 718 if (p->state->light_color_material) { 719 p->materials = 720 p->color_materials = p->state->light_color_material_mask; 721 } 722 723 p->materials |= p->state->light_material_mask; 724} 725 726 727static struct ureg get_material( struct tnl_program *p, GLuint side, 728 GLuint property ) 729{ 730 GLuint attrib = material_attrib(side, property); 731 732 if (p->color_materials & (1<<attrib)) 733 return register_input(p, VERT_ATTRIB_COLOR0); 734 else if (p->materials & (1<<attrib)) 735 return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT ); 736 else 737 return register_param3( p, STATE_MATERIAL, side, property ); 738} 739 740#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 741 MAT_BIT_FRONT_AMBIENT | \ 742 MAT_BIT_FRONT_DIFFUSE) << (side)) 743 744/* Either return a precalculated constant value or emit code to 745 * calculate these values dynamically in the case where material calls 746 * are present between begin/end pairs. 747 * 748 * Probably want to shift this to the program compilation phase - if 749 * we always emitted the calculation here, a smart compiler could 750 * detect that it was constant (given a certain set of inputs), and 751 * lift it out of the main loop. That way the programs created here 752 * would be independent of the vertex_buffer details. 753 */ 754static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 755{ 756 if (p->materials & SCENE_COLOR_BITS(side)) { 757 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 758 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 759 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 760 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 761 struct ureg tmp = make_temp(p, material_diffuse); 762 emit_op3(p, VP_OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 763 material_ambient, material_emission); 764 return tmp; 765 } 766 else 767 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 768} 769 770 771static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 772 GLuint side, GLuint property ) 773{ 774 GLuint attrib = material_attrib(side, property); 775 if (p->materials & (1<<attrib)) { 776 struct ureg light_value = 777 register_param3(p, STATE_LIGHT, light, property); 778 struct ureg material_value = get_material(p, side, property); 779 struct ureg tmp = get_temp(p); 780 emit_op2(p, VP_OPCODE_MUL, tmp, 0, light_value, material_value); 781 return tmp; 782 } 783 else 784 return register_param4(p, STATE_LIGHTPROD, light, side, property); 785} 786 787static struct ureg calculate_light_attenuation( struct tnl_program *p, 788 GLuint i, 789 struct ureg VPpli, 790 struct ureg dist ) 791{ 792 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 793 STATE_ATTENUATION); 794 struct ureg att = get_temp(p); 795 796 /* Calculate spot attenuation: 797 */ 798 if (!p->state->unit[i].light_spotcutoff_is_180) { 799 struct ureg spot_dir = register_param3(p, STATE_LIGHT, i, 800 STATE_SPOT_DIRECTION); 801 struct ureg spot = get_temp(p); 802 struct ureg slt = get_temp(p); 803 804 emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */ 805 emit_op2(p, VP_OPCODE_DP3, spot, 0, negate(VPpli), spot); 806 emit_op2(p, VP_OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot); 807 emit_op2(p, VP_OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 808 emit_op2(p, VP_OPCODE_MUL, att, 0, slt, spot); 809 810 release_temp(p, spot); 811 release_temp(p, slt); 812 } 813 814 /* Calculate distance attenuation: 815 */ 816 if (p->state->unit[i].light_attenuated) { 817 818 /* 1/d,d,d,1/d */ 819 emit_op1(p, VP_OPCODE_RCP, dist, WRITEMASK_YZ, dist); 820 /* 1,d,d*d,1/d */ 821 emit_op2(p, VP_OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 822 /* 1/dist-atten */ 823 emit_op2(p, VP_OPCODE_DP3, dist, 0, attenuation, dist); 824 825 if (!p->state->unit[i].light_spotcutoff_is_180) { 826 /* dist-atten */ 827 emit_op1(p, VP_OPCODE_RCP, dist, 0, dist); 828 /* spot-atten * dist-atten */ 829 emit_op2(p, VP_OPCODE_MUL, att, 0, dist, att); 830 } else { 831 /* dist-atten */ 832 emit_op1(p, VP_OPCODE_RCP, att, 0, dist); 833 } 834 } 835 836 return att; 837} 838 839 840 841 842 843/* Need to add some addtional parameters to allow lighting in object 844 * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye 845 * space lighting. 846 */ 847static void build_lighting( struct tnl_program *p ) 848{ 849 const GLboolean twoside = p->state->light_twoside; 850 const GLboolean separate = p->state->separate_specular; 851 GLuint nr_lights = 0, count = 0; 852 struct ureg normal = get_eye_normal(p); 853 struct ureg lit = get_temp(p); 854 struct ureg dots = get_temp(p); 855 struct ureg _col0 = undef, _col1 = undef; 856 struct ureg _bfc0 = undef, _bfc1 = undef; 857 GLuint i; 858 859 for (i = 0; i < MAX_LIGHTS; i++) 860 if (p->state->unit[i].light_enabled) 861 nr_lights++; 862 863 set_material_flags(p); 864 865 { 866 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 867 emit_op1(p, VP_OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 868 release_temp(p, shininess); 869 870 _col0 = make_temp(p, get_scenecolor(p, 0)); 871 if (separate) 872 _col1 = make_temp(p, get_identity_param(p)); 873 else 874 _col1 = _col0; 875 876 } 877 878 if (twoside) { 879 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 880 emit_op1(p, VP_OPCODE_MOV, dots, WRITEMASK_Z, 881 negate(swizzle1(shininess,X))); 882 release_temp(p, shininess); 883 884 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 885 if (separate) 886 _bfc1 = make_temp(p, get_identity_param(p)); 887 else 888 _bfc1 = _bfc0; 889 } 890 891 892 /* If no lights, still need to emit the scenecolor. 893 */ 894 { 895 struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); 896 emit_op1(p, VP_OPCODE_MOV, res0, 0, _col0); 897 } 898 899 if (separate) { 900 struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); 901 emit_op1(p, VP_OPCODE_MOV, res1, 0, _col1); 902 } 903 904 if (twoside) { 905 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); 906 emit_op1(p, VP_OPCODE_MOV, res0, 0, _bfc0); 907 } 908 909 if (twoside && separate) { 910 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); 911 emit_op1(p, VP_OPCODE_MOV, res1, 0, _bfc1); 912 } 913 914 if (nr_lights == 0) { 915 release_temps(p); 916 return; 917 } 918 919 920 for (i = 0; i < MAX_LIGHTS; i++) { 921 if (p->state->unit[i].light_enabled) { 922 struct ureg half = undef; 923 struct ureg att = undef, VPpli = undef; 924 925 count++; 926 927 if (p->state->unit[i].light_eyepos3_is_zero) { 928 /* Can used precomputed constants in this case. 929 * Attenuation never applies to infinite lights. 930 */ 931 VPpli = register_param3(p, STATE_LIGHT, i, 932 STATE_POSITION_NORMALIZED); 933 half = register_param3(p, STATE_LIGHT, i, STATE_HALF); 934 } 935 else { 936 struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 937 STATE_POSITION); 938 struct ureg V = get_eye_position(p); 939 struct ureg dist = get_temp(p); 940 941 VPpli = get_temp(p); 942 half = get_temp(p); 943 944 /* Calulate VPpli vector 945 */ 946 emit_op2(p, VP_OPCODE_SUB, VPpli, 0, Ppli, V); 947 948 /* Normalize VPpli. The dist value also used in 949 * attenuation below. 950 */ 951 emit_op2(p, VP_OPCODE_DP3, dist, 0, VPpli, VPpli); 952 emit_op1(p, VP_OPCODE_RSQ, dist, 0, dist); 953 emit_op2(p, VP_OPCODE_MUL, VPpli, 0, VPpli, dist); 954 955 956 /* Calculate attenuation: 957 */ 958 if (!p->state->unit[i].light_spotcutoff_is_180 || 959 p->state->unit[i].light_attenuated) { 960 att = calculate_light_attenuation(p, i, VPpli, dist); 961 } 962 963 964 /* Calculate viewer direction, or use infinite viewer: 965 */ 966 if (p->state->light_local_viewer) { 967 struct ureg eye_hat = get_eye_position_normalized(p); 968 emit_op2(p, VP_OPCODE_SUB, half, 0, VPpli, eye_hat); 969 } 970 else { 971 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 972 emit_op2(p, VP_OPCODE_ADD, half, 0, VPpli, z_dir); 973 } 974 975 emit_normalize_vec3(p, half, half); 976 977 release_temp(p, dist); 978 } 979 980 /* Calculate dot products: 981 */ 982 emit_op2(p, VP_OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 983 emit_op2(p, VP_OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 984 985 986 /* Front face lighting: 987 */ 988 { 989 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 990 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 991 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 992 struct ureg res0, res1; 993 GLuint mask0, mask1; 994 995 emit_op1(p, VP_OPCODE_LIT, lit, 0, dots); 996 997 if (!is_undef(att)) 998 emit_op2(p, VP_OPCODE_MUL, lit, 0, lit, att); 999 1000 1001 if (count == nr_lights) { 1002 if (separate) { 1003 mask0 = WRITEMASK_XYZ; 1004 mask1 = WRITEMASK_XYZ; 1005 res0 = register_output( p, VERT_RESULT_COL0 ); 1006 res1 = register_output( p, VERT_RESULT_COL1 ); 1007 } 1008 else { 1009 mask0 = 0; 1010 mask1 = WRITEMASK_XYZ; 1011 res0 = _col0; 1012 res1 = register_output( p, VERT_RESULT_COL0 ); 1013 } 1014 } else { 1015 mask0 = 0; 1016 mask1 = 0; 1017 res0 = _col0; 1018 res1 = _col1; 1019 } 1020 1021 emit_op3(p, VP_OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1022 emit_op3(p, VP_OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1023 emit_op3(p, VP_OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1024 1025 release_temp(p, ambient); 1026 release_temp(p, diffuse); 1027 release_temp(p, specular); 1028 } 1029 1030 /* Back face lighting: 1031 */ 1032 if (twoside) { 1033 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1034 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1035 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1036 struct ureg res0, res1; 1037 GLuint mask0, mask1; 1038 1039 emit_op1(p, VP_OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z))); 1040 1041 if (!is_undef(att)) 1042 emit_op2(p, VP_OPCODE_MUL, lit, 0, lit, att); 1043 1044 if (count == nr_lights) { 1045 if (separate) { 1046 mask0 = WRITEMASK_XYZ; 1047 mask1 = WRITEMASK_XYZ; 1048 res0 = register_output( p, VERT_RESULT_BFC0 ); 1049 res1 = register_output( p, VERT_RESULT_BFC1 ); 1050 } 1051 else { 1052 mask0 = 0; 1053 mask1 = WRITEMASK_XYZ; 1054 res0 = _bfc0; 1055 res1 = register_output( p, VERT_RESULT_BFC0 ); 1056 } 1057 } else { 1058 res0 = _bfc0; 1059 res1 = _bfc1; 1060 mask0 = 0; 1061 mask1 = 0; 1062 } 1063 1064 emit_op3(p, VP_OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1065 emit_op3(p, VP_OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1066 emit_op3(p, VP_OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1067 1068 release_temp(p, ambient); 1069 release_temp(p, diffuse); 1070 release_temp(p, specular); 1071 } 1072 1073 release_temp(p, half); 1074 release_temp(p, VPpli); 1075 release_temp(p, att); 1076 } 1077 } 1078 1079 release_temps( p ); 1080} 1081 1082 1083static void build_fog( struct tnl_program *p ) 1084{ 1085 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 1086 struct ureg input; 1087 1088 if (p->state->fog_source_is_depth) { 1089 input = swizzle1(get_eye_position(p), Z); 1090 } 1091 else { 1092 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1093 } 1094 1095 if (p->state->tnl_do_vertex_fog) { 1096 struct ureg params = register_param1(p, STATE_FOG_PARAMS); 1097 struct ureg tmp = get_temp(p); 1098 1099 switch (p->state->fog_mode) { 1100 case FOG_LINEAR: { 1101 struct ureg id = get_identity_param(p); 1102 emit_op2(p, VP_OPCODE_SUB, tmp, 0, swizzle1(params,Z), input); 1103 emit_op2(p, VP_OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); 1104 emit_op2(p, VP_OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ 1105 emit_op2(p, VP_OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); 1106 break; 1107 } 1108 case FOG_EXP: 1109 emit_op1(p, VP_OPCODE_ABS, tmp, 0, input); 1110 emit_op2(p, VP_OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); 1111 emit_op2(p, VP_OPCODE_POW, fog, WRITEMASK_X, 1112 register_const1f(p, M_E), negate(tmp)); 1113 break; 1114 case FOG_EXP2: 1115 emit_op2(p, VP_OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); 1116 emit_op2(p, VP_OPCODE_MUL, tmp, 0, tmp, tmp); 1117 emit_op2(p, VP_OPCODE_POW, fog, WRITEMASK_X, 1118 register_const1f(p, M_E), negate(tmp)); 1119 break; 1120 } 1121 1122 release_temp(p, tmp); 1123 } 1124 else { 1125 /* results = incoming fog coords (compute fog per-fragment later) 1126 * 1127 * KW: Is it really necessary to do anything in this case? 1128 */ 1129 emit_op1(p, VP_OPCODE_MOV, fog, WRITEMASK_X, input); 1130 } 1131} 1132 1133static void build_reflect_texgen( struct tnl_program *p, 1134 struct ureg dest, 1135 GLuint writemask ) 1136{ 1137 struct ureg normal = get_eye_normal(p); 1138 struct ureg eye_hat = get_eye_position_normalized(p); 1139 struct ureg tmp = get_temp(p); 1140 1141 /* n.u */ 1142 emit_op2(p, VP_OPCODE_DP3, tmp, 0, normal, eye_hat); 1143 /* 2n.u */ 1144 emit_op2(p, VP_OPCODE_ADD, tmp, 0, tmp, tmp); 1145 /* (-2n.u)n + u */ 1146 emit_op3(p, VP_OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1147} 1148 1149static void build_sphere_texgen( struct tnl_program *p, 1150 struct ureg dest, 1151 GLuint writemask ) 1152{ 1153 struct ureg normal = get_eye_normal(p); 1154 struct ureg eye_hat = get_eye_position_normalized(p); 1155 struct ureg tmp = get_temp(p); 1156 struct ureg half = register_scalar_const(p, .5); 1157 struct ureg r = get_temp(p); 1158 struct ureg inv_m = get_temp(p); 1159 struct ureg id = get_identity_param(p); 1160 1161 /* Could share the above calculations, but it would be 1162 * a fairly odd state for someone to set (both sphere and 1163 * reflection active for different texture coordinate 1164 * components. Of course - if two texture units enable 1165 * reflect and/or sphere, things start to tilt in favour 1166 * of seperating this out: 1167 */ 1168 1169 /* n.u */ 1170 emit_op2(p, VP_OPCODE_DP3, tmp, 0, normal, eye_hat); 1171 /* 2n.u */ 1172 emit_op2(p, VP_OPCODE_ADD, tmp, 0, tmp, tmp); 1173 /* (-2n.u)n + u */ 1174 emit_op3(p, VP_OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1175 /* r + 0,0,1 */ 1176 emit_op2(p, VP_OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1177 /* rx^2 + ry^2 + (rz+1)^2 */ 1178 emit_op2(p, VP_OPCODE_DP3, tmp, 0, tmp, tmp); 1179 /* 2/m */ 1180 emit_op1(p, VP_OPCODE_RSQ, tmp, 0, tmp); 1181 /* 1/m */ 1182 emit_op2(p, VP_OPCODE_MUL, inv_m, 0, tmp, half); 1183 /* r/m + 1/2 */ 1184 emit_op3(p, VP_OPCODE_MAD, dest, writemask, r, inv_m, half); 1185 1186 release_temp(p, tmp); 1187 release_temp(p, r); 1188 release_temp(p, inv_m); 1189} 1190 1191 1192static void build_texture_transform( struct tnl_program *p ) 1193{ 1194 GLuint i, j; 1195 1196 for (i = 0; i < MAX_TEXTURE_UNITS; i++) { 1197 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1198 1199 if (p->state->unit[i].texgen_enabled || texmat_enabled) { 1200 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 1201 struct ureg out_texgen = undef; 1202 1203 if (p->state->unit[i].texgen_enabled) { 1204 GLuint copy_mask = 0; 1205 GLuint sphere_mask = 0; 1206 GLuint reflect_mask = 0; 1207 GLuint normal_mask = 0; 1208 GLuint modes[4]; 1209 1210 if (texmat_enabled) 1211 out_texgen = get_temp(p); 1212 else 1213 out_texgen = out; 1214 1215 modes[0] = p->state->unit[i].texgen_mode0; 1216 modes[1] = p->state->unit[i].texgen_mode1; 1217 modes[2] = p->state->unit[i].texgen_mode2; 1218 modes[3] = p->state->unit[i].texgen_mode3; 1219 1220 for (j = 0; j < 4; j++) { 1221 switch (modes[j]) { 1222 case TXG_OBJ_LINEAR: { 1223 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1224 struct ureg plane = 1225 register_param3(p, STATE_TEXGEN, i, 1226 STATE_TEXGEN_OBJECT_S + j); 1227 1228 emit_op2(p, VP_OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1229 obj, plane ); 1230 break; 1231 } 1232 case TXG_EYE_LINEAR: { 1233 struct ureg eye = get_eye_position(p); 1234 struct ureg plane = 1235 register_param3(p, STATE_TEXGEN, i, 1236 STATE_TEXGEN_EYE_S + j); 1237 1238 emit_op2(p, VP_OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1239 eye, plane ); 1240 break; 1241 } 1242 case TXG_SPHERE_MAP: 1243 sphere_mask |= WRITEMASK_X << j; 1244 break; 1245 case TXG_REFLECTION_MAP: 1246 reflect_mask |= WRITEMASK_X << j; 1247 break; 1248 case TXG_NORMAL_MAP: 1249 normal_mask |= WRITEMASK_X << j; 1250 break; 1251 case TXG_NONE: 1252 copy_mask |= WRITEMASK_X << j; 1253 } 1254 1255 } 1256 1257 1258 if (sphere_mask) { 1259 build_sphere_texgen(p, out_texgen, sphere_mask); 1260 } 1261 1262 if (reflect_mask) { 1263 build_reflect_texgen(p, out_texgen, reflect_mask); 1264 } 1265 1266 if (normal_mask) { 1267 struct ureg normal = get_eye_normal(p); 1268 emit_op1(p, VP_OPCODE_MOV, out_texgen, normal_mask, normal ); 1269 } 1270 1271 if (copy_mask) { 1272 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1273 emit_op1(p, VP_OPCODE_MOV, out_texgen, copy_mask, in ); 1274 } 1275 } 1276 1277 if (texmat_enabled) { 1278 struct ureg texmat[4]; 1279 struct ureg in = (!is_undef(out_texgen) ? 1280 out_texgen : 1281 register_input(p, VERT_ATTRIB_TEX0+i)); 1282 if (PREFER_DP4) { 1283 register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, 1284 0, 3, STATE_MATRIX, texmat ); 1285 emit_matrix_transform_vec4( p, out, texmat, in ); 1286 } 1287 else { 1288 register_matrix_param6( p, STATE_MATRIX, STATE_TEXTURE, i, 1289 0, 3, STATE_MATRIX_TRANSPOSE, texmat ); 1290 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1291 } 1292 } 1293 1294 release_temps(p); 1295 } 1296 else if (p->state->unit[i].texunit_really_enabled) { 1297 /* KW: _ReallyEnabled isn't sufficient? Need to know whether 1298 * this texture unit is referenced by the fragment shader. 1299 */ 1300 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); 1301 } 1302 } 1303} 1304 1305 1306/* Seems like it could be tighter: 1307 */ 1308static void build_pointsize( struct tnl_program *p ) 1309{ 1310 struct ureg eye = get_eye_position(p); 1311 struct ureg state_size = register_param1(p, STATE_POINT_SIZE); 1312 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1313 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1314 struct ureg ut = get_temp(p); 1315 1316 /* 1, -Z, Z * Z, 1 */ 1317 emit_op1(p, VP_OPCODE_MOV, ut, 0, swizzle1(get_identity_param(p), W)); 1318 emit_op2(p, VP_OPCODE_MUL, ut, WRITEMASK_YZ, ut, negate(swizzle1(eye, Z))); 1319 emit_op2(p, VP_OPCODE_MUL, ut, WRITEMASK_Z, ut, negate(swizzle1(eye, Z))); 1320 1321 1322 /* p1 + p2 * dist + p3 * dist * dist, 0 */ 1323 emit_op2(p, VP_OPCODE_DP3, ut, 0, ut, state_attenuation); 1324 1325 /* 1 / factor */ 1326 emit_op1(p, VP_OPCODE_RCP, ut, 0, ut ); 1327 1328 /* out = pointSize / factor */ 1329 emit_op2(p, VP_OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1330 1331 release_temp(p, ut); 1332} 1333 1334static void build_tnl_program( struct tnl_program *p ) 1335{ /* Emit the program, starting with modelviewproject: 1336 */ 1337 build_hpos(p); 1338 1339 /* Lighting calculations: 1340 */ 1341 if (p->state->light_global_enabled) 1342 build_lighting(p); 1343 else 1344 emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); 1345 1346 if (p->state->fog_enabled) 1347 build_fog(p); 1348 1349 if (p->state->texture_enabled_global) 1350 build_texture_transform(p); 1351 1352 if (p->state->point_attenuated) 1353 build_pointsize(p); 1354 1355 /* Finish up: 1356 */ 1357 emit_op1(p, VP_OPCODE_END, undef, 0, undef); 1358 1359 /* Disassemble: 1360 */ 1361 if (DISASSEM) { 1362 _mesa_printf ("\n"); 1363 } 1364} 1365 1366 1367static void 1368create_new_program( const struct state_key *key, 1369 struct vertex_program *program, 1370 GLuint max_temps) 1371{ 1372 struct tnl_program p; 1373 1374 _mesa_memset(&p, 0, sizeof(p)); 1375 p.state = key; 1376 p.program = program; 1377 p.eye_position = undef; 1378 p.eye_position_normalized = undef; 1379 p.eye_normal = undef; 1380 p.identity = undef; 1381 p.temp_in_use = 0; 1382 1383 if (max_temps >= sizeof(int) * 8) 1384 p.temp_reserved = 0; 1385 else 1386 p.temp_reserved = ~((1<<max_temps)-1); 1387 1388 p.program->Instructions = MALLOC(sizeof(struct vp_instruction) * MAX_INSN); 1389 p.program->Base.String = 0; 1390 p.program->Base.NumInstructions = 1391 p.program->Base.NumTemporaries = 1392 p.program->Base.NumParameters = 1393 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1394 p.program->Parameters = _mesa_new_parameter_list(); 1395 p.program->InputsRead = 0; 1396 p.program->OutputsWritten = 0; 1397 1398 build_tnl_program( &p ); 1399} 1400 1401static void *search_cache( struct tnl_cache *cache, 1402 GLuint hash, 1403 const void *key, 1404 GLuint keysize) 1405{ 1406 struct tnl_cache_item *c; 1407 1408 for (c = cache->items[hash % cache->size]; c; c = c->next) { 1409 if (c->hash == hash && _mesa_memcmp(c->key, key, keysize) == 0) 1410 return c->data; 1411 } 1412 1413 return NULL; 1414} 1415 1416static void rehash( struct tnl_cache *cache ) 1417{ 1418 struct tnl_cache_item **items; 1419 struct tnl_cache_item *c, *next; 1420 GLuint size, i; 1421 1422 size = cache->size * 3; 1423 items = MALLOC(size * sizeof(*items)); 1424 _mesa_memset(items, 0, size * sizeof(*items)); 1425 1426 for (i = 0; i < cache->size; i++) 1427 for (c = cache->items[i]; c; c = next) { 1428 next = c->next; 1429 c->next = items[c->hash % size]; 1430 items[c->hash % size] = c; 1431 } 1432 1433 FREE(cache->items); 1434 cache->items = items; 1435 cache->size = size; 1436} 1437 1438static void cache_item( struct tnl_cache *cache, 1439 GLuint hash, 1440 void *key, 1441 void *data ) 1442{ 1443 struct tnl_cache_item *c = MALLOC(sizeof(*c)); 1444 c->hash = hash; 1445 c->key = key; 1446 c->data = data; 1447 1448 if (++cache->n_items > cache->size * 1.5) 1449 rehash(cache); 1450 1451 c->next = cache->items[hash % cache->size]; 1452 cache->items[hash % cache->size] = c; 1453} 1454 1455static GLuint hash_key( struct state_key *key ) 1456{ 1457 GLuint *ikey = (GLuint *)key; 1458 GLuint hash = 0, i; 1459 1460 /* I'm sure this can be improved on, but speed is important: 1461 */ 1462 for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++) 1463 hash ^= ikey[i]; 1464 1465 return hash; 1466} 1467 1468void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) 1469{ 1470 TNLcontext *tnl = TNL_CONTEXT(ctx); 1471 struct state_key *key; 1472 GLuint hash; 1473 1474 if (ctx->VertexProgram._Enabled) 1475 return; 1476 1477 /* Grab all the relevent state and put it in a single structure: 1478 */ 1479 key = make_state_key(ctx); 1480 hash = hash_key(key); 1481 1482 if (tnl->vp_cache == NULL) { 1483 tnl->vp_cache = MALLOC(sizeof(*tnl->vp_cache)); 1484 tnl->vp_cache->size = 5; 1485 tnl->vp_cache->n_items = 0; 1486 tnl->vp_cache->items = MALLOC(tnl->vp_cache->size * 1487 sizeof(*tnl->vp_cache->items)); 1488 _mesa_memset(tnl->vp_cache->items, 0, tnl->vp_cache->size * 1489 sizeof(*tnl->vp_cache->items)); 1490 } 1491 1492 /* Look for an already-prepared program for this state: 1493 */ 1494 ctx->_TnlProgram = (struct vertex_program *) 1495 search_cache( tnl->vp_cache, hash, key, sizeof(*key) ); 1496 1497 /* OK, we'll have to build a new one: 1498 */ 1499 if (!ctx->_TnlProgram) { 1500 if (0) 1501 _mesa_printf("Build new TNL program\n"); 1502 1503 ctx->_TnlProgram = (struct vertex_program *) 1504 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 1505 1506 create_new_program( key, ctx->_TnlProgram, 1507 ctx->Const.MaxVertexProgramTemps ); 1508 1509 cache_item(tnl->vp_cache, hash, key, ctx->_TnlProgram ); 1510 } 1511 else { 1512 FREE(key); 1513 if (0) 1514 _mesa_printf("Found existing TNL program for key %x\n", hash); 1515 } 1516 1517 /* Need a BindProgram callback for the driver? 1518 */ 1519} 1520 1521 1522void _tnl_ProgramCacheDestroy( GLcontext *ctx ) 1523{ 1524 TNLcontext *tnl = TNL_CONTEXT(ctx); 1525 struct tnl_cache_item *c, *next; 1526 GLuint i; 1527 1528 for (i = 0; i < tnl->vp_cache->size; i++) 1529 for (c = tnl->vp_cache->items[i]; c; c = next) { 1530 next = c->next; 1531 FREE(c->key); 1532 FREE(c->data); 1533 FREE(c); 1534 } 1535 1536 FREE(tnl->vp_cache->items); 1537 FREE(tnl->vp_cache); 1538} 1539