t_vp_build.c revision 6138ee9de0330b9a2bf300bc0d52b471191dd1ed
1/* 2 * Mesa 3-D graphics library 3 * Version: 7.1 4 * 5 * Copyright (C) 2007 Tungsten Graphics All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * TUNGSTEN GRAPHICS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 21 * WHETHER IN 22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26/** 27 * \file t_vp_build.c 28 * Create a vertex program to execute the current fixed function T&L pipeline. 29 * \author Keith Whitwell 30 */ 31 32 33#include "glheader.h" 34#include "macros.h" 35#include "enums.h" 36#include "shader/program.h" 37#include "shader/prog_instruction.h" 38#include "shader/prog_parameter.h" 39#include "shader/prog_print.h" 40#include "shader/prog_statevars.h" 41#include "t_context.h" /* NOTE: very light dependency on this */ 42#include "t_vp_build.h" 43 44 45struct state_key { 46 unsigned light_global_enabled:1; 47 unsigned light_local_viewer:1; 48 unsigned light_twoside:1; 49 unsigned light_color_material:1; 50 unsigned light_color_material_mask:12; 51 unsigned light_material_mask:12; 52 53 unsigned normalize:1; 54 unsigned rescale_normals:1; 55 unsigned fog_source_is_depth:1; 56 unsigned tnl_do_vertex_fog:1; 57 unsigned separate_specular:1; 58 unsigned fog_mode:2; 59 unsigned point_attenuated:1; 60 unsigned texture_enabled_global:1; 61 unsigned fragprog_inputs_read:12; 62 63 struct { 64 unsigned light_enabled:1; 65 unsigned light_eyepos3_is_zero:1; 66 unsigned light_spotcutoff_is_180:1; 67 unsigned light_attenuated:1; 68 unsigned texunit_really_enabled:1; 69 unsigned texmat_enabled:1; 70 unsigned texgen_enabled:4; 71 unsigned texgen_mode0:4; 72 unsigned texgen_mode1:4; 73 unsigned texgen_mode2:4; 74 unsigned texgen_mode3:4; 75 } unit[8]; 76}; 77 78 79 80#define FOG_NONE 0 81#define FOG_LINEAR 1 82#define FOG_EXP 2 83#define FOG_EXP2 3 84 85static GLuint translate_fog_mode( GLenum mode ) 86{ 87 switch (mode) { 88 case GL_LINEAR: return FOG_LINEAR; 89 case GL_EXP: return FOG_EXP; 90 case GL_EXP2: return FOG_EXP2; 91 default: return FOG_NONE; 92 } 93} 94 95#define TXG_NONE 0 96#define TXG_OBJ_LINEAR 1 97#define TXG_EYE_LINEAR 2 98#define TXG_SPHERE_MAP 3 99#define TXG_REFLECTION_MAP 4 100#define TXG_NORMAL_MAP 5 101 102static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 103{ 104 if (!enabled) 105 return TXG_NONE; 106 107 switch (mode) { 108 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 109 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 110 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 111 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 112 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 113 default: return TXG_NONE; 114 } 115} 116 117static struct state_key *make_state_key( GLcontext *ctx ) 118{ 119 TNLcontext *tnl = TNL_CONTEXT(ctx); 120 struct vertex_buffer *VB = &tnl->vb; 121 const struct gl_fragment_program *fp = ctx->FragmentProgram._Current; 122 struct state_key *key = CALLOC_STRUCT(state_key); 123 GLuint i; 124 125 /* This now relies on texenvprogram.c being active: 126 */ 127 assert(fp); 128 129 key->fragprog_inputs_read = fp->Base.InputsRead; 130 131 key->separate_specular = (ctx->Light.Model.ColorControl == 132 GL_SEPARATE_SPECULAR_COLOR); 133 134 if (ctx->Light.Enabled) { 135 key->light_global_enabled = 1; 136 137 if (ctx->Light.Model.LocalViewer) 138 key->light_local_viewer = 1; 139 140 if (ctx->Light.Model.TwoSide) 141 key->light_twoside = 1; 142 143 if (ctx->Light.ColorMaterialEnabled) { 144 key->light_color_material = 1; 145 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask; 146 } 147 148 for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) 149 if (VB->AttribPtr[i]->stride) 150 key->light_material_mask |= 1<<(i-_TNL_ATTRIB_MAT_FRONT_AMBIENT); 151 152 for (i = 0; i < MAX_LIGHTS; i++) { 153 struct gl_light *light = &ctx->Light.Light[i]; 154 155 if (light->Enabled) { 156 key->unit[i].light_enabled = 1; 157 158 if (light->EyePosition[3] == 0.0) 159 key->unit[i].light_eyepos3_is_zero = 1; 160 161 if (light->SpotCutoff == 180.0) 162 key->unit[i].light_spotcutoff_is_180 = 1; 163 164 if (light->ConstantAttenuation != 1.0 || 165 light->LinearAttenuation != 0.0 || 166 light->QuadraticAttenuation != 0.0) 167 key->unit[i].light_attenuated = 1; 168 } 169 } 170 } 171 172 if (ctx->Transform.Normalize) 173 key->normalize = 1; 174 175 if (ctx->Transform.RescaleNormals) 176 key->rescale_normals = 1; 177 178 key->fog_mode = translate_fog_mode(fp->FogOption); 179 180 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) 181 key->fog_source_is_depth = 1; 182 183 if (tnl->_DoVertexFog) 184 key->tnl_do_vertex_fog = 1; 185 186 if (ctx->Point._Attenuated) 187 key->point_attenuated = 1; 188 189 if (ctx->Texture._TexGenEnabled || 190 ctx->Texture._TexMatEnabled || 191 ctx->Texture._EnabledUnits) 192 key->texture_enabled_global = 1; 193 194 for (i = 0; i < MAX_TEXTURE_UNITS; i++) { 195 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 196 197 if (texUnit->_ReallyEnabled) 198 key->unit[i].texunit_really_enabled = 1; 199 200 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 201 key->unit[i].texmat_enabled = 1; 202 203 if (texUnit->TexGenEnabled) { 204 key->unit[i].texgen_enabled = 1; 205 206 key->unit[i].texgen_mode0 = 207 translate_texgen( texUnit->TexGenEnabled & (1<<0), 208 texUnit->GenModeS ); 209 key->unit[i].texgen_mode1 = 210 translate_texgen( texUnit->TexGenEnabled & (1<<1), 211 texUnit->GenModeT ); 212 key->unit[i].texgen_mode2 = 213 translate_texgen( texUnit->TexGenEnabled & (1<<2), 214 texUnit->GenModeR ); 215 key->unit[i].texgen_mode3 = 216 translate_texgen( texUnit->TexGenEnabled & (1<<3), 217 texUnit->GenModeQ ); 218 } 219 } 220 221 return key; 222} 223 224 225 226/* Very useful debugging tool - produces annotated listing of 227 * generated program with line/function references for each 228 * instruction back into this file: 229 */ 230#define DISASSEM (MESA_VERBOSE&VERBOSE_DISASSEM) 231 232/* Should be tunable by the driver - do we want to do matrix 233 * multiplications with DP4's or with MUL/MAD's? SSE works better 234 * with the latter, drivers may differ. 235 */ 236#define PREFER_DP4 0 237 238#define MAX_INSN 350 239 240/* Use uregs to represent registers internally, translate to Mesa's 241 * expected formats on emit. 242 * 243 * NOTE: These are passed by value extensively in this file rather 244 * than as usual by pointer reference. If this disturbs you, try 245 * remembering they are just 32bits in size. 246 * 247 * GCC is smart enough to deal with these dword-sized structures in 248 * much the same way as if I had defined them as dwords and was using 249 * macros to access and set the fields. This is much nicer and easier 250 * to evolve. 251 */ 252struct ureg { 253 GLuint file:4; 254 GLint idx:8; /* relative addressing may be negative */ 255 GLuint negate:1; 256 GLuint swz:12; 257 GLuint pad:7; 258}; 259 260 261struct tnl_program { 262 const struct state_key *state; 263 struct gl_vertex_program *program; 264 265 GLuint temp_in_use; 266 GLuint temp_reserved; 267 268 struct ureg eye_position; 269 struct ureg eye_position_normalized; 270 struct ureg eye_normal; 271 struct ureg identity; 272 273 GLuint materials; 274 GLuint color_materials; 275}; 276 277 278static const struct ureg undef = { 279 PROGRAM_UNDEFINED, 280 ~0, 281 0, 282 0, 283 0 284}; 285 286/* Local shorthand: 287 */ 288#define X SWIZZLE_X 289#define Y SWIZZLE_Y 290#define Z SWIZZLE_Z 291#define W SWIZZLE_W 292 293 294/* Construct a ureg: 295 */ 296static struct ureg make_ureg(GLuint file, GLint idx) 297{ 298 struct ureg reg; 299 reg.file = file; 300 reg.idx = idx; 301 reg.negate = 0; 302 reg.swz = SWIZZLE_NOOP; 303 reg.pad = 0; 304 return reg; 305} 306 307 308 309static struct ureg negate( struct ureg reg ) 310{ 311 reg.negate ^= 1; 312 return reg; 313} 314 315 316static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 317{ 318 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 319 GET_SWZ(reg.swz, y), 320 GET_SWZ(reg.swz, z), 321 GET_SWZ(reg.swz, w)); 322 323 return reg; 324} 325 326static struct ureg swizzle1( struct ureg reg, int x ) 327{ 328 return swizzle(reg, x, x, x, x); 329} 330 331static struct ureg get_temp( struct tnl_program *p ) 332{ 333 int bit = _mesa_ffs( ~p->temp_in_use ); 334 if (!bit) { 335 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 336 _mesa_exit(1); 337 } 338 339 if ((GLuint) bit > p->program->Base.NumTemporaries) 340 p->program->Base.NumTemporaries = bit; 341 342 p->temp_in_use |= 1<<(bit-1); 343 return make_ureg(PROGRAM_TEMPORARY, bit-1); 344} 345 346static struct ureg reserve_temp( struct tnl_program *p ) 347{ 348 struct ureg temp = get_temp( p ); 349 p->temp_reserved |= 1<<temp.idx; 350 return temp; 351} 352 353static void release_temp( struct tnl_program *p, struct ureg reg ) 354{ 355 if (reg.file == PROGRAM_TEMPORARY) { 356 p->temp_in_use &= ~(1<<reg.idx); 357 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 358 } 359} 360 361static void release_temps( struct tnl_program *p ) 362{ 363 p->temp_in_use = p->temp_reserved; 364} 365 366 367 368static struct ureg register_input( struct tnl_program *p, GLuint input ) 369{ 370 p->program->Base.InputsRead |= (1<<input); 371 return make_ureg(PROGRAM_INPUT, input); 372} 373 374static struct ureg register_output( struct tnl_program *p, GLuint output ) 375{ 376 p->program->Base.OutputsWritten |= (1<<output); 377 return make_ureg(PROGRAM_OUTPUT, output); 378} 379 380static struct ureg register_const4f( struct tnl_program *p, 381 GLfloat s0, 382 GLfloat s1, 383 GLfloat s2, 384 GLfloat s3) 385{ 386 GLfloat values[4]; 387 GLint idx; 388 GLuint swizzle; 389 values[0] = s0; 390 values[1] = s1; 391 values[2] = s2; 392 values[3] = s3; 393 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, 394 &swizzle ); 395 ASSERT(swizzle == SWIZZLE_NOOP); 396 return make_ureg(PROGRAM_STATE_VAR, idx); 397} 398 399#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 400#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 401#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 402#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 403 404static GLboolean is_undef( struct ureg reg ) 405{ 406 return reg.file == PROGRAM_UNDEFINED; 407} 408 409static struct ureg get_identity_param( struct tnl_program *p ) 410{ 411 if (is_undef(p->identity)) 412 p->identity = register_const4f(p, 0,0,0,1); 413 414 return p->identity; 415} 416 417static struct ureg register_param5(struct tnl_program *p, 418 GLint s0, 419 GLint s1, 420 GLint s2, 421 GLint s3, 422 GLint s4) 423{ 424 gl_state_index tokens[STATE_LENGTH]; 425 GLint idx; 426 tokens[0] = s0; 427 tokens[1] = s1; 428 tokens[2] = s2; 429 tokens[3] = s3; 430 tokens[4] = s4; 431 idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); 432 return make_ureg(PROGRAM_STATE_VAR, idx); 433} 434 435 436#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 437#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 438#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 439#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 440 441 442static void register_matrix_param5( struct tnl_program *p, 443 GLint s0, /* modelview, projection, etc */ 444 GLint s1, /* texture matrix number */ 445 GLint s2, /* first row */ 446 GLint s3, /* last row */ 447 GLint s4, /* inverse, transpose, etc */ 448 struct ureg *matrix ) 449{ 450 GLint i; 451 452 /* This is a bit sad as the support is there to pull the whole 453 * matrix out in one go: 454 */ 455 for (i = 0; i <= s3 - s2; i++) 456 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 457} 458 459 460/** 461 * Convert a ureg source register to a prog_src_register. 462 */ 463static void emit_arg( struct prog_src_register *src, 464 struct ureg reg ) 465{ 466 assert(reg.file != PROGRAM_OUTPUT); 467 src->File = reg.file; 468 src->Index = reg.idx; 469 src->Swizzle = reg.swz; 470 src->NegateBase = reg.negate ? NEGATE_XYZW : 0; 471 src->Abs = 0; 472 src->NegateAbs = 0; 473 src->RelAddr = 0; 474} 475 476/** 477 * Convert a ureg dest register to a prog_dst_register. 478 */ 479static void emit_dst( struct prog_dst_register *dst, 480 struct ureg reg, GLuint mask ) 481{ 482 /* Check for legal output register type. UNDEFINED will occur in 483 * instruction that don't produce a result (like END). 484 */ 485 assert(reg.file == PROGRAM_TEMPORARY || 486 reg.file == PROGRAM_OUTPUT || 487 reg.file == PROGRAM_UNDEFINED); 488 dst->File = reg.file; 489 dst->Index = reg.idx; 490 /* allow zero as a shorthand for xyzw */ 491 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 492 dst->CondMask = COND_TR; /* always pass cond test */ 493 dst->CondSwizzle = SWIZZLE_NOOP; 494 dst->CondSrc = 0; 495 dst->pad = 0; 496} 497 498static void debug_insn( struct prog_instruction *inst, const char *fn, 499 GLuint line ) 500{ 501 if (DISASSEM) { 502 static const char *last_fn; 503 504 if (fn != last_fn) { 505 last_fn = fn; 506 _mesa_printf("%s:\n", fn); 507 } 508 509 _mesa_printf("%d:\t", line); 510 _mesa_print_instruction(inst); 511 } 512} 513 514 515static void emit_op3fn(struct tnl_program *p, 516 enum prog_opcode op, 517 struct ureg dest, 518 GLuint mask, 519 struct ureg src0, 520 struct ureg src1, 521 struct ureg src2, 522 const char *fn, 523 GLuint line) 524{ 525 GLuint nr = p->program->Base.NumInstructions++; 526 struct prog_instruction *inst = &p->program->Base.Instructions[nr]; 527 528 if (p->program->Base.NumInstructions > MAX_INSN) { 529 _mesa_problem(0, "Out of instructions in emit_op3fn\n"); 530 return; 531 } 532 533 inst->Opcode = (enum prog_opcode) op; 534 inst->StringPos = 0; 535 inst->Data = 0; 536 537 emit_arg( &inst->SrcReg[0], src0 ); 538 emit_arg( &inst->SrcReg[1], src1 ); 539 emit_arg( &inst->SrcReg[2], src2 ); 540 541 emit_dst( &inst->DstReg, dest, mask ); 542 543 debug_insn(inst, fn, line); 544} 545 546 547#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 548 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 549 550#define emit_op2(p, op, dst, mask, src0, src1) \ 551 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 552 553#define emit_op1(p, op, dst, mask, src0) \ 554 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 555 556 557static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 558{ 559 if (reg.file == PROGRAM_TEMPORARY && 560 !(p->temp_reserved & (1<<reg.idx))) 561 return reg; 562 else { 563 struct ureg temp = get_temp(p); 564 emit_op1(p, OPCODE_MOV, temp, 0, reg); 565 return temp; 566 } 567} 568 569 570/* Currently no tracking performed of input/output/register size or 571 * active elements. Could be used to reduce these operations, as 572 * could the matrix type. 573 */ 574static void emit_matrix_transform_vec4( struct tnl_program *p, 575 struct ureg dest, 576 const struct ureg *mat, 577 struct ureg src) 578{ 579 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 580 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 581 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 582 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 583} 584 585/* This version is much easier to implement if writemasks are not 586 * supported natively on the target or (like SSE), the target doesn't 587 * have a clean/obvious dotproduct implementation. 588 */ 589static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 590 struct ureg dest, 591 const struct ureg *mat, 592 struct ureg src) 593{ 594 struct ureg tmp; 595 596 if (dest.file != PROGRAM_TEMPORARY) 597 tmp = get_temp(p); 598 else 599 tmp = dest; 600 601 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 602 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 603 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 604 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 605 606 if (dest.file != PROGRAM_TEMPORARY) 607 release_temp(p, tmp); 608} 609 610static void emit_matrix_transform_vec3( struct tnl_program *p, 611 struct ureg dest, 612 const struct ureg *mat, 613 struct ureg src) 614{ 615 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 616 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 617 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 618} 619 620 621static void emit_normalize_vec3( struct tnl_program *p, 622 struct ureg dest, 623 struct ureg src ) 624{ 625 struct ureg tmp = get_temp(p); 626 emit_op2(p, OPCODE_DP3, tmp, 0, src, src); 627 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 628 emit_op2(p, OPCODE_MUL, dest, 0, src, tmp); 629 release_temp(p, tmp); 630} 631 632static void emit_passthrough( struct tnl_program *p, 633 GLuint input, 634 GLuint output ) 635{ 636 struct ureg out = register_output(p, output); 637 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 638} 639 640static struct ureg get_eye_position( struct tnl_program *p ) 641{ 642 if (is_undef(p->eye_position)) { 643 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 644 struct ureg modelview[4]; 645 646 p->eye_position = reserve_temp(p); 647 648 if (PREFER_DP4) { 649 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 650 0, modelview ); 651 652 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 653 } 654 else { 655 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 656 STATE_MATRIX_TRANSPOSE, modelview ); 657 658 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 659 } 660 } 661 662 return p->eye_position; 663} 664 665 666static struct ureg get_eye_position_normalized( struct tnl_program *p ) 667{ 668 if (is_undef(p->eye_position_normalized)) { 669 struct ureg eye = get_eye_position(p); 670 p->eye_position_normalized = reserve_temp(p); 671 emit_normalize_vec3(p, p->eye_position_normalized, eye); 672 } 673 674 return p->eye_position_normalized; 675} 676 677 678static struct ureg get_eye_normal( struct tnl_program *p ) 679{ 680 if (is_undef(p->eye_normal)) { 681 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 682 struct ureg mvinv[3]; 683 684 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 685 STATE_MATRIX_INVTRANS, mvinv ); 686 687 p->eye_normal = reserve_temp(p); 688 689 /* Transform to eye space: 690 */ 691 emit_matrix_transform_vec3( p, p->eye_normal, mvinv, normal ); 692 693 /* Normalize/Rescale: 694 */ 695 if (p->state->normalize) { 696 emit_normalize_vec3( p, p->eye_normal, p->eye_normal ); 697 } 698 else if (p->state->rescale_normals) { 699 struct ureg rescale = register_param2(p, STATE_INTERNAL, 700 STATE_NORMAL_SCALE); 701 702 emit_op2( p, OPCODE_MUL, p->eye_normal, 0, p->eye_normal, 703 swizzle1(rescale, X)); 704 } 705 } 706 707 return p->eye_normal; 708} 709 710 711 712static void build_hpos( struct tnl_program *p ) 713{ 714 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 715 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 716 struct ureg mvp[4]; 717 718 if (PREFER_DP4) { 719 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 720 0, mvp ); 721 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 722 } 723 else { 724 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 725 STATE_MATRIX_TRANSPOSE, mvp ); 726 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 727 } 728} 729 730 731static GLuint material_attrib( GLuint side, GLuint property ) 732{ 733 return ((property - STATE_AMBIENT) * 2 + 734 side); 735} 736 737/* Get a bitmask of which material values vary on a per-vertex basis. 738 */ 739static void set_material_flags( struct tnl_program *p ) 740{ 741 p->color_materials = 0; 742 p->materials = 0; 743 744 if (p->state->light_color_material) { 745 p->materials = 746 p->color_materials = p->state->light_color_material_mask; 747 } 748 749 p->materials |= p->state->light_material_mask; 750} 751 752 753static struct ureg get_material( struct tnl_program *p, GLuint side, 754 GLuint property ) 755{ 756 GLuint attrib = material_attrib(side, property); 757 758 if (p->color_materials & (1<<attrib)) 759 return register_input(p, VERT_ATTRIB_COLOR0); 760 else if (p->materials & (1<<attrib)) 761 return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT ); 762 else 763 return register_param3( p, STATE_MATERIAL, side, property ); 764} 765 766#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 767 MAT_BIT_FRONT_AMBIENT | \ 768 MAT_BIT_FRONT_DIFFUSE) << (side)) 769 770/* Either return a precalculated constant value or emit code to 771 * calculate these values dynamically in the case where material calls 772 * are present between begin/end pairs. 773 * 774 * Probably want to shift this to the program compilation phase - if 775 * we always emitted the calculation here, a smart compiler could 776 * detect that it was constant (given a certain set of inputs), and 777 * lift it out of the main loop. That way the programs created here 778 * would be independent of the vertex_buffer details. 779 */ 780static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 781{ 782 if (p->materials & SCENE_COLOR_BITS(side)) { 783 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 784 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 785 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 786 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 787 struct ureg tmp = make_temp(p, material_diffuse); 788 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 789 material_ambient, material_emission); 790 return tmp; 791 } 792 else 793 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 794} 795 796 797static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 798 GLuint side, GLuint property ) 799{ 800 GLuint attrib = material_attrib(side, property); 801 if (p->materials & (1<<attrib)) { 802 struct ureg light_value = 803 register_param3(p, STATE_LIGHT, light, property); 804 struct ureg material_value = get_material(p, side, property); 805 struct ureg tmp = get_temp(p); 806 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 807 return tmp; 808 } 809 else 810 return register_param4(p, STATE_LIGHTPROD, light, side, property); 811} 812 813static struct ureg calculate_light_attenuation( struct tnl_program *p, 814 GLuint i, 815 struct ureg VPpli, 816 struct ureg dist ) 817{ 818 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 819 STATE_ATTENUATION); 820 struct ureg att = get_temp(p); 821 822 /* Calculate spot attenuation: 823 */ 824 if (!p->state->unit[i].light_spotcutoff_is_180) { 825 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 826 STATE_SPOT_DIR_NORMALIZED, i); 827 struct ureg spot = get_temp(p); 828 struct ureg slt = get_temp(p); 829 830 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 831 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 832 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 833 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 834 835 release_temp(p, spot); 836 release_temp(p, slt); 837 } 838 839 /* Calculate distance attenuation: 840 */ 841 if (p->state->unit[i].light_attenuated) { 842 843 /* 1/d,d,d,1/d */ 844 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 845 /* 1,d,d*d,1/d */ 846 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 847 /* 1/dist-atten */ 848 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 849 850 if (!p->state->unit[i].light_spotcutoff_is_180) { 851 /* dist-atten */ 852 emit_op1(p, OPCODE_RCP, dist, 0, dist); 853 /* spot-atten * dist-atten */ 854 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 855 } else { 856 /* dist-atten */ 857 emit_op1(p, OPCODE_RCP, att, 0, dist); 858 } 859 } 860 861 return att; 862} 863 864 865 866 867 868/* Need to add some addtional parameters to allow lighting in object 869 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 870 * space lighting. 871 */ 872static void build_lighting( struct tnl_program *p ) 873{ 874 const GLboolean twoside = p->state->light_twoside; 875 const GLboolean separate = p->state->separate_specular; 876 GLuint nr_lights = 0, count = 0; 877 struct ureg normal = get_eye_normal(p); 878 struct ureg lit = get_temp(p); 879 struct ureg dots = get_temp(p); 880 struct ureg _col0 = undef, _col1 = undef; 881 struct ureg _bfc0 = undef, _bfc1 = undef; 882 GLuint i; 883 884 for (i = 0; i < MAX_LIGHTS; i++) 885 if (p->state->unit[i].light_enabled) 886 nr_lights++; 887 888 set_material_flags(p); 889 890 { 891 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 892 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 893 894 _col0 = make_temp(p, get_scenecolor(p, 0)); 895 if (separate) 896 _col1 = make_temp(p, get_identity_param(p)); 897 else 898 _col1 = _col0; 899 900 } 901 902 if (twoside) { 903 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 904 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 905 negate(swizzle1(shininess,X))); 906 907 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 908 if (separate) 909 _bfc1 = make_temp(p, get_identity_param(p)); 910 else 911 _bfc1 = _bfc0; 912 } 913 914 915 /* If no lights, still need to emit the scenecolor. 916 */ 917 { 918 struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); 919 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 920 } 921 922 if (separate) { 923 struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); 924 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 925 } 926 927 if (twoside) { 928 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); 929 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 930 } 931 932 if (twoside && separate) { 933 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); 934 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 935 } 936 937 if (nr_lights == 0) { 938 release_temps(p); 939 return; 940 } 941 942 943 for (i = 0; i < MAX_LIGHTS; i++) { 944 if (p->state->unit[i].light_enabled) { 945 struct ureg half = undef; 946 struct ureg att = undef, VPpli = undef; 947 948 count++; 949 950 if (p->state->unit[i].light_eyepos3_is_zero) { 951 /* Can used precomputed constants in this case. 952 * Attenuation never applies to infinite lights. 953 */ 954 VPpli = register_param3(p, STATE_LIGHT, i, 955 STATE_POSITION_NORMALIZED); 956 if (p->state->light_local_viewer) { 957 struct ureg eye_hat = get_eye_position_normalized(p); 958 half = get_temp(p); 959 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 960 emit_normalize_vec3(p, half, half); 961 } else { 962 half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR); 963 } 964 } 965 else { 966 struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 967 STATE_POSITION); 968 struct ureg V = get_eye_position(p); 969 struct ureg dist = get_temp(p); 970 struct ureg tmpPpli = get_temp(p); 971 972 VPpli = get_temp(p); 973 974 /* In homogeneous object coordinates 975 */ 976 emit_op1(p, OPCODE_RCP, dist, 0, swizzle1(Ppli, W)); 977 emit_op2(p, OPCODE_MUL, tmpPpli, 0, Ppli, dist); 978 979 /* Calculate VPpli vector 980 */ 981 emit_op2(p, OPCODE_SUB, VPpli, 0, tmpPpli, V); 982 983 /* we're done with tmpPpli now */ 984 release_temp(p, tmpPpli); 985 986 /* Normalize VPpli. The dist value also used in 987 * attenuation below. 988 */ 989 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 990 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 991 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 992 993 994 /* Calculate attenuation: 995 */ 996 if (!p->state->unit[i].light_spotcutoff_is_180 || 997 p->state->unit[i].light_attenuated) { 998 att = calculate_light_attenuation(p, i, VPpli, dist); 999 } 1000 1001 /* We're done with dist now */ 1002 release_temp(p, dist); 1003 1004 1005 /* Calculate viewer direction, or use infinite viewer: 1006 */ 1007 half = get_temp(p); 1008 if (p->state->light_local_viewer) { 1009 struct ureg eye_hat = get_eye_position_normalized(p); 1010 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1011 } 1012 else { 1013 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1014 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1015 } 1016 1017 emit_normalize_vec3(p, half, half); 1018 } 1019 1020 /* Calculate dot products: 1021 */ 1022 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1023 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1024 1025 /* we're done with VPpli and half now, so free them as to not drive up 1026 our temp usage unnecessary */ 1027 release_temp(p, VPpli); 1028 release_temp(p, half); 1029 1030 /* Front face lighting: 1031 */ 1032 { 1033 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1034 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1035 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1036 struct ureg res0, res1; 1037 GLuint mask0, mask1; 1038 1039 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1040 1041 if (!is_undef(att)) 1042 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1043 1044 1045 if (count == nr_lights) { 1046 if (separate) { 1047 mask0 = WRITEMASK_XYZ; 1048 mask1 = WRITEMASK_XYZ; 1049 res0 = register_output( p, VERT_RESULT_COL0 ); 1050 res1 = register_output( p, VERT_RESULT_COL1 ); 1051 } 1052 else { 1053 mask0 = 0; 1054 mask1 = WRITEMASK_XYZ; 1055 res0 = _col0; 1056 res1 = register_output( p, VERT_RESULT_COL0 ); 1057 } 1058 } else { 1059 mask0 = 0; 1060 mask1 = 0; 1061 res0 = _col0; 1062 res1 = _col1; 1063 } 1064 1065 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1066 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1067 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1068 1069 release_temp(p, ambient); 1070 release_temp(p, diffuse); 1071 release_temp(p, specular); 1072 } 1073 1074 /* Back face lighting: 1075 */ 1076 if (twoside) { 1077 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1078 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1079 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1080 struct ureg res0, res1; 1081 GLuint mask0, mask1; 1082 1083 emit_op1(p, OPCODE_LIT, lit, 0, negate(swizzle(dots,X,Y,W,Z))); 1084 1085 if (!is_undef(att)) 1086 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1087 1088 if (count == nr_lights) { 1089 if (separate) { 1090 mask0 = WRITEMASK_XYZ; 1091 mask1 = WRITEMASK_XYZ; 1092 res0 = register_output( p, VERT_RESULT_BFC0 ); 1093 res1 = register_output( p, VERT_RESULT_BFC1 ); 1094 } 1095 else { 1096 mask0 = 0; 1097 mask1 = WRITEMASK_XYZ; 1098 res0 = _bfc0; 1099 res1 = register_output( p, VERT_RESULT_BFC0 ); 1100 } 1101 } else { 1102 res0 = _bfc0; 1103 res1 = _bfc1; 1104 mask0 = 0; 1105 mask1 = 0; 1106 } 1107 1108 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1109 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1110 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1111 1112 release_temp(p, ambient); 1113 release_temp(p, diffuse); 1114 release_temp(p, specular); 1115 } 1116 1117 release_temp(p, att); 1118 } 1119 } 1120 1121 release_temps( p ); 1122} 1123 1124 1125static void build_fog( struct tnl_program *p ) 1126{ 1127 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 1128 struct ureg input; 1129 1130 if (p->state->fog_source_is_depth) { 1131 input = swizzle1(get_eye_position(p), Z); 1132 } 1133 else { 1134 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1135 } 1136 1137 if (p->state->fog_mode && p->state->tnl_do_vertex_fog) { 1138 struct ureg params = register_param2(p, STATE_INTERNAL, 1139 STATE_FOG_PARAMS_OPTIMIZED); 1140 struct ureg tmp = get_temp(p); 1141 GLboolean useabs = (p->state->fog_mode != FOG_EXP2); 1142 1143 if (useabs) { 1144 emit_op1(p, OPCODE_ABS, tmp, 0, input); 1145 } 1146 1147 switch (p->state->fog_mode) { 1148 case FOG_LINEAR: { 1149 struct ureg id = get_identity_param(p); 1150 emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input, 1151 swizzle1(params,X), swizzle1(params,Y)); 1152 emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ 1153 emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); 1154 break; 1155 } 1156 case FOG_EXP: 1157 emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input, 1158 swizzle1(params,Z)); 1159 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp)); 1160 break; 1161 case FOG_EXP2: 1162 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W)); 1163 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 1164 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp)); 1165 break; 1166 } 1167 1168 release_temp(p, tmp); 1169 } 1170 else { 1171 /* results = incoming fog coords (compute fog per-fragment later) 1172 * 1173 * KW: Is it really necessary to do anything in this case? 1174 * BP: Yes, we always need to compute the absolute value, unless 1175 * we want to push that down into the fragment program... 1176 */ 1177 GLboolean useabs = GL_TRUE; 1178 emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, WRITEMASK_X, input); 1179 } 1180} 1181 1182static void build_reflect_texgen( struct tnl_program *p, 1183 struct ureg dest, 1184 GLuint writemask ) 1185{ 1186 struct ureg normal = get_eye_normal(p); 1187 struct ureg eye_hat = get_eye_position_normalized(p); 1188 struct ureg tmp = get_temp(p); 1189 1190 /* n.u */ 1191 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1192 /* 2n.u */ 1193 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1194 /* (-2n.u)n + u */ 1195 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1196 1197 release_temp(p, tmp); 1198} 1199 1200static void build_sphere_texgen( struct tnl_program *p, 1201 struct ureg dest, 1202 GLuint writemask ) 1203{ 1204 struct ureg normal = get_eye_normal(p); 1205 struct ureg eye_hat = get_eye_position_normalized(p); 1206 struct ureg tmp = get_temp(p); 1207 struct ureg half = register_scalar_const(p, .5); 1208 struct ureg r = get_temp(p); 1209 struct ureg inv_m = get_temp(p); 1210 struct ureg id = get_identity_param(p); 1211 1212 /* Could share the above calculations, but it would be 1213 * a fairly odd state for someone to set (both sphere and 1214 * reflection active for different texture coordinate 1215 * components. Of course - if two texture units enable 1216 * reflect and/or sphere, things start to tilt in favour 1217 * of seperating this out: 1218 */ 1219 1220 /* n.u */ 1221 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1222 /* 2n.u */ 1223 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1224 /* (-2n.u)n + u */ 1225 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1226 /* r + 0,0,1 */ 1227 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1228 /* rx^2 + ry^2 + (rz+1)^2 */ 1229 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1230 /* 2/m */ 1231 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1232 /* 1/m */ 1233 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1234 /* r/m + 1/2 */ 1235 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1236 1237 release_temp(p, tmp); 1238 release_temp(p, r); 1239 release_temp(p, inv_m); 1240} 1241 1242 1243static void build_texture_transform( struct tnl_program *p ) 1244{ 1245 GLuint i, j; 1246 1247 for (i = 0; i < MAX_TEXTURE_UNITS; i++) { 1248 1249 if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i))) 1250 continue; 1251 1252 if (p->state->unit[i].texgen_enabled || 1253 p->state->unit[i].texmat_enabled) { 1254 1255 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1256 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 1257 struct ureg out_texgen = undef; 1258 1259 if (p->state->unit[i].texgen_enabled) { 1260 GLuint copy_mask = 0; 1261 GLuint sphere_mask = 0; 1262 GLuint reflect_mask = 0; 1263 GLuint normal_mask = 0; 1264 GLuint modes[4]; 1265 1266 if (texmat_enabled) 1267 out_texgen = get_temp(p); 1268 else 1269 out_texgen = out; 1270 1271 modes[0] = p->state->unit[i].texgen_mode0; 1272 modes[1] = p->state->unit[i].texgen_mode1; 1273 modes[2] = p->state->unit[i].texgen_mode2; 1274 modes[3] = p->state->unit[i].texgen_mode3; 1275 1276 for (j = 0; j < 4; j++) { 1277 switch (modes[j]) { 1278 case TXG_OBJ_LINEAR: { 1279 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1280 struct ureg plane = 1281 register_param3(p, STATE_TEXGEN, i, 1282 STATE_TEXGEN_OBJECT_S + j); 1283 1284 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1285 obj, plane ); 1286 break; 1287 } 1288 case TXG_EYE_LINEAR: { 1289 struct ureg eye = get_eye_position(p); 1290 struct ureg plane = 1291 register_param3(p, STATE_TEXGEN, i, 1292 STATE_TEXGEN_EYE_S + j); 1293 1294 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1295 eye, plane ); 1296 break; 1297 } 1298 case TXG_SPHERE_MAP: 1299 sphere_mask |= WRITEMASK_X << j; 1300 break; 1301 case TXG_REFLECTION_MAP: 1302 reflect_mask |= WRITEMASK_X << j; 1303 break; 1304 case TXG_NORMAL_MAP: 1305 normal_mask |= WRITEMASK_X << j; 1306 break; 1307 case TXG_NONE: 1308 copy_mask |= WRITEMASK_X << j; 1309 } 1310 1311 } 1312 1313 1314 if (sphere_mask) { 1315 build_sphere_texgen(p, out_texgen, sphere_mask); 1316 } 1317 1318 if (reflect_mask) { 1319 build_reflect_texgen(p, out_texgen, reflect_mask); 1320 } 1321 1322 if (normal_mask) { 1323 struct ureg normal = get_eye_normal(p); 1324 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1325 } 1326 1327 if (copy_mask) { 1328 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1329 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1330 } 1331 } 1332 1333 if (texmat_enabled) { 1334 struct ureg texmat[4]; 1335 struct ureg in = (!is_undef(out_texgen) ? 1336 out_texgen : 1337 register_input(p, VERT_ATTRIB_TEX0+i)); 1338 if (PREFER_DP4) { 1339 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1340 0, texmat ); 1341 emit_matrix_transform_vec4( p, out, texmat, in ); 1342 } 1343 else { 1344 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1345 STATE_MATRIX_TRANSPOSE, texmat ); 1346 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1347 } 1348 } 1349 1350 release_temps(p); 1351 } 1352 else { 1353 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); 1354 } 1355 } 1356} 1357 1358 1359static void build_pointsize( struct tnl_program *p ) 1360{ 1361 struct ureg eye = get_eye_position(p); 1362 struct ureg state_size = register_param1(p, STATE_POINT_SIZE); 1363 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1364 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1365 struct ureg ut = get_temp(p); 1366 1367 /* dist = |eyez| */ 1368 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1369 /* p1 + dist * (p2 + dist * p3); */ 1370 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1371 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1372 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1373 ut, swizzle1(state_attenuation, X)); 1374 1375 /* 1 / sqrt(factor) */ 1376 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1377 1378#if 1 1379 /* out = pointSize / sqrt(factor) */ 1380 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1381#else 1382 /* not sure, might make sense to do clamping here, 1383 but it's not done in t_vb_points neither */ 1384 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1385 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1386 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1387#endif 1388 1389 release_temp(p, ut); 1390} 1391 1392static void build_tnl_program( struct tnl_program *p ) 1393{ /* Emit the program, starting with modelviewproject: 1394 */ 1395 build_hpos(p); 1396 1397 /* Lighting calculations: 1398 */ 1399 if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { 1400 if (p->state->light_global_enabled) 1401 build_lighting(p); 1402 else { 1403 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) 1404 emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); 1405 1406 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) 1407 emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1); 1408 } 1409 } 1410 1411 if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) || 1412 p->state->fog_mode != FOG_NONE) 1413 build_fog(p); 1414 1415 if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) 1416 build_texture_transform(p); 1417 1418 if (p->state->point_attenuated) 1419 build_pointsize(p); 1420 1421 /* Finish up: 1422 */ 1423 emit_op1(p, OPCODE_END, undef, 0, undef); 1424 1425 /* Disassemble: 1426 */ 1427 if (DISASSEM) { 1428 _mesa_printf ("\n"); 1429 } 1430} 1431 1432 1433static void 1434create_new_program( const struct state_key *key, 1435 struct gl_vertex_program *program, 1436 GLuint max_temps) 1437{ 1438 struct tnl_program p; 1439 1440 _mesa_memset(&p, 0, sizeof(p)); 1441 p.state = key; 1442 p.program = program; 1443 p.eye_position = undef; 1444 p.eye_position_normalized = undef; 1445 p.eye_normal = undef; 1446 p.identity = undef; 1447 p.temp_in_use = 0; 1448 1449 if (max_temps >= sizeof(int) * 8) 1450 p.temp_reserved = 0; 1451 else 1452 p.temp_reserved = ~((1<<max_temps)-1); 1453 1454 p.program->Base.Instructions = _mesa_alloc_instructions(MAX_INSN); 1455 p.program->Base.String = NULL; 1456 p.program->Base.NumInstructions = 1457 p.program->Base.NumTemporaries = 1458 p.program->Base.NumParameters = 1459 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1460 p.program->Base.Parameters = _mesa_new_parameter_list(); 1461 p.program->Base.InputsRead = 0; 1462 p.program->Base.OutputsWritten = 0; 1463 1464 build_tnl_program( &p ); 1465} 1466 1467 1468static struct gl_vertex_program * 1469search_cache(struct tnl_cache *cache, GLuint hash, 1470 const void *key, GLuint keysize) 1471{ 1472 struct tnl_cache_item *c; 1473 1474 for (c = cache->items[hash % cache->size]; c; c = c->next) { 1475 if (c->hash == hash && _mesa_memcmp(c->key, key, keysize) == 0) 1476 return c->prog; 1477 } 1478 1479 return NULL; 1480} 1481 1482 1483static void rehash( struct tnl_cache *cache ) 1484{ 1485 struct tnl_cache_item **items; 1486 struct tnl_cache_item *c, *next; 1487 GLuint size, i; 1488 1489 size = cache->size * 3; 1490 items = (struct tnl_cache_item**) _mesa_malloc(size * sizeof(*items)); 1491 _mesa_memset(items, 0, size * sizeof(*items)); 1492 1493 for (i = 0; i < cache->size; i++) 1494 for (c = cache->items[i]; c; c = next) { 1495 next = c->next; 1496 c->next = items[c->hash % size]; 1497 items[c->hash % size] = c; 1498 } 1499 1500 FREE(cache->items); 1501 cache->items = items; 1502 cache->size = size; 1503} 1504 1505static void cache_item( GLcontext *ctx, 1506 struct tnl_cache *cache, 1507 GLuint hash, 1508 void *key, 1509 struct gl_vertex_program *prog ) 1510{ 1511 struct tnl_cache_item *c = CALLOC_STRUCT(tnl_cache_item); 1512 c->hash = hash; 1513 c->key = key; 1514 1515 c->prog = prog; 1516 1517 if (++cache->n_items > cache->size * 1.5) 1518 rehash(cache); 1519 1520 c->next = cache->items[hash % cache->size]; 1521 cache->items[hash % cache->size] = c; 1522} 1523 1524static GLuint hash_key( struct state_key *key ) 1525{ 1526 GLuint *ikey = (GLuint *)key; 1527 GLuint hash = 0, i; 1528 1529 /* I'm sure this can be improved on, but speed is important: 1530 */ 1531 for (i = 0; i < sizeof(*key)/sizeof(GLuint); i++) 1532 hash ^= ikey[i]; 1533 1534 return hash; 1535} 1536 1537void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ) 1538{ 1539 TNLcontext *tnl = TNL_CONTEXT(ctx); 1540 struct state_key *key; 1541 GLuint hash; 1542 const struct gl_vertex_program *prev = ctx->VertexProgram._Current; 1543 1544 if (!ctx->VertexProgram._Current || 1545 ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) { 1546 struct gl_vertex_program *newProg; 1547 1548 /* Grab all the relevent state and put it in a single structure: 1549 */ 1550 key = make_state_key(ctx); 1551 hash = hash_key(key); 1552 1553 /* Look for an already-prepared program for this state: 1554 */ 1555 newProg = search_cache( tnl->vp_cache, hash, key, sizeof(*key)); 1556 1557 /* OK, we'll have to build a new one: 1558 */ 1559 if (!newProg) { 1560 1561 if (0) 1562 _mesa_printf("Build new TNL program\n"); 1563 1564 newProg = (struct gl_vertex_program *) 1565 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 1566 1567 create_new_program( key, newProg, ctx->Const.VertexProgram.MaxTemps ); 1568 1569 if (ctx->Driver.ProgramStringNotify) 1570 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1571 &newProg->Base ); 1572 1573 /* Our ownership of newProg is transferred to the cache */ 1574 cache_item(ctx, tnl->vp_cache, hash, key, newProg); 1575 } 1576 else { 1577 FREE(key); 1578 } 1579 1580 _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, newProg); 1581 _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, newProg); 1582 } 1583 1584 /* Tell the driver about the change. Could define a new target for 1585 * this? 1586 */ 1587 if (ctx->VertexProgram._Current != prev && ctx->Driver.BindProgram) { 1588 ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, 1589 (struct gl_program *) ctx->VertexProgram._Current); 1590 } 1591} 1592 1593void _tnl_ProgramCacheInit( GLcontext *ctx ) 1594{ 1595 TNLcontext *tnl = TNL_CONTEXT(ctx); 1596 1597 tnl->vp_cache = (struct tnl_cache *) MALLOC(sizeof(*tnl->vp_cache)); 1598 tnl->vp_cache->size = 17; 1599 tnl->vp_cache->n_items = 0; 1600 tnl->vp_cache->items = (struct tnl_cache_item**) 1601 _mesa_calloc(tnl->vp_cache->size * sizeof(*tnl->vp_cache->items)); 1602} 1603 1604void _tnl_ProgramCacheDestroy( GLcontext *ctx ) 1605{ 1606 TNLcontext *tnl = TNL_CONTEXT(ctx); 1607 struct tnl_cache_item *c, *next; 1608 GLuint i; 1609 1610 for (i = 0; i < tnl->vp_cache->size; i++) 1611 for (c = tnl->vp_cache->items[i]; c; c = next) { 1612 next = c->next; 1613 FREE(c->key); 1614 _mesa_reference_vertprog(ctx, &c->prog, NULL); 1615 FREE(c); 1616 } 1617 1618 FREE(tnl->vp_cache->items); 1619 FREE(tnl->vp_cache); 1620} 1621