programopt.c revision b6e8256899a9a93c665c34e10efcc918f2fcc095
190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)/* 290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Mesa 3-D graphics library 390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Version: 6.5.3 490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * 590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. 690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * 75d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * Permission is hereby granted, free of charge, to any person obtaining a 890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * copy of this software and associated documentation files (the "Software"), 990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * to deal in the Software without restriction, including without limitation 1090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * and/or sell copies of the Software, and to permit persons to whom the 12eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch * Software is furnished to do so, subject to the following conditions: 13eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch * 1490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * The above copyright notice and this permission notice shall be included 15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) * in all copies or substantial portions of the Software. 161e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * 1790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 1890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 2390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) */ 2490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 2590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)/** 2690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * \file programopt.c 2790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Vertex/Fragment program optimizations and transformations for program 2890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * options, etc. 2990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * 3090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * \author Brian Paul 31eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch */ 323551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 333551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 343551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#include "main/glheader.h" 353551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#include "main/context.h" 363551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#include "prog_parameter.h" 373551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)#include "prog_statevars.h" 3890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "program.h" 3990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "programopt.h" 4090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "prog_instruction.h" 4190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 4290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 4390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)/** 44cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) * This function inserts instructions for coordinate modelview * projection 45cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) * into a vertex program. 46cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) * May be used to implement the position_invariant option. 47cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) */ 48cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)static void 4990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)_mesa_insert_mvp_dp4_code(GLcontext *ctx, struct gl_vertex_program *vprog) 5090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles){ 5190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) struct prog_instruction *newInst; 52cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) const GLuint origLen = vprog->Base.NumInstructions; 5390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) const GLuint newLen = origLen + 4; 545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) GLuint i; 555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* 5790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Setup state references for the modelview/projection matrix. 585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) * XXX we should check if these state vars are already declared. 596d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) */ 6090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) static const gl_state_index mvpState[4][STATE_LENGTH] = { 6190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) { STATE_MVP_MATRIX, 0, 0, 0, 0 }, /* state.matrix.mvp.row[0] */ 625d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) { STATE_MVP_MATRIX, 0, 1, 1, 0 }, /* state.matrix.mvp.row[1] */ 633551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) { STATE_MVP_MATRIX, 0, 2, 2, 0 }, /* state.matrix.mvp.row[2] */ 643551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) { STATE_MVP_MATRIX, 0, 3, 3, 0 }, /* state.matrix.mvp.row[3] */ 6590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) }; 665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) GLint mvpRef[4]; 6790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 6890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) for (i = 0; i < 4; i++) { 6990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) mvpRef[i] = _mesa_add_state_reference(vprog->Base.Parameters, 7090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) mvpState[i]); 71cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) } 7290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 7390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* Alloc storage for new instructions */ 7490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst = _mesa_alloc_instructions(newLen); 7590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) if (!newInst) { 763551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) _mesa_error(ctx, GL_OUT_OF_MEMORY, 773551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) "glProgramString(inserting position_invariant code)"); 783551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return; 793551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) } 8090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 8190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* 8290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Generated instructions: 83a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) * newInst[0] = DP4 result.position.x, mvp.row[0], vertex.position; 8490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * newInst[1] = DP4 result.position.y, mvp.row[1], vertex.position; 85a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) * newInst[2] = DP4 result.position.z, mvp.row[2], vertex.position; 8690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * newInst[3] = DP4 result.position.w, mvp.row[3], vertex.position; 8790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) */ 8890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) _mesa_init_instructions(newInst, 4); 8990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) for (i = 0; i < 4; i++) { 9090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].Opcode = OPCODE_DP4; 9190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].DstReg.File = PROGRAM_OUTPUT; 9290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].DstReg.Index = VERT_RESULT_HPOS; 9390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].DstReg.WriteMask = (WRITEMASK_X << i); 9490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[0].File = PROGRAM_STATE_VAR; 9590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[0].Index = mvpRef[i]; 9690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[0].Swizzle = SWIZZLE_NOOP; 9790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[1].File = PROGRAM_INPUT; 983551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) newInst[i].SrcReg[1].Index = VERT_ATTRIB_POS; 9990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[1].Swizzle = SWIZZLE_NOOP; 10090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 10190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 10290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* Append original instructions after new instructions */ 10390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) _mesa_copy_instructions (newInst + 4, vprog->Base.Instructions, origLen); 10490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 10590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* free old instructions */ 10690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) _mesa_free_instructions(vprog->Base.Instructions, origLen); 10790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 10890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* install new instructions */ 10990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) vprog->Base.Instructions = newInst; 11090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) vprog->Base.NumInstructions = newLen; 11190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) vprog->Base.InputsRead |= VERT_BIT_POS; 11290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) vprog->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS); 11390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)} 11490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 11590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 11690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)static void 1175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)_mesa_insert_mvp_mad_code(GLcontext *ctx, struct gl_vertex_program *vprog) 1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles){ 11990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) struct prog_instruction *newInst; 12090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) const GLuint origLen = vprog->Base.NumInstructions; 12190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) const GLuint newLen = origLen + 4; 12290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) GLuint hposTemp; 12390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) GLuint i; 1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) /* 12690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Setup state references for the modelview/projection matrix. 12790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * XXX we should check if these state vars are already declared. 12890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) */ 12990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) static const gl_state_index mvpState[4][STATE_LENGTH] = { 13090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) { STATE_MVP_MATRIX, 0, 0, 0, STATE_MATRIX_TRANSPOSE }, 13190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) { STATE_MVP_MATRIX, 0, 1, 1, STATE_MATRIX_TRANSPOSE }, 13290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) { STATE_MVP_MATRIX, 0, 2, 2, STATE_MATRIX_TRANSPOSE }, 13390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) { STATE_MVP_MATRIX, 0, 3, 3, STATE_MATRIX_TRANSPOSE }, 13490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) }; 13590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) GLint mvpRef[4]; 13690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 13790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) for (i = 0; i < 4; i++) { 13890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) mvpRef[i] = _mesa_add_state_reference(vprog->Base.Parameters, 13990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) mvpState[i]); 14090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 14190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 14290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* Alloc storage for new instructions */ 14390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst = _mesa_alloc_instructions(newLen); 14490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) if (!newInst) { 14590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) _mesa_error(ctx, GL_OUT_OF_MEMORY, 14690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) "glProgramString(inserting position_invariant code)"); 14790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) return; 14890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 14990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 15090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* TEMP hposTemp; */ 15190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) hposTemp = vprog->Base.NumTemporaries++; 15290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 15390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) /* 15490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * Generated instructions: 15590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 15690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 15790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 1581e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 15990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) */ 16090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) _mesa_init_instructions(newInst, 4); 16190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 16290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[0].Opcode = OPCODE_MUL; 16390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[0].DstReg.File = PROGRAM_TEMPORARY; 16490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[0].DstReg.Index = hposTemp; 16590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[0].DstReg.WriteMask = WRITEMASK_XYZW; 16690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[0].SrcReg[0].File = PROGRAM_INPUT; 167eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch newInst[0].SrcReg[0].Index = VERT_ATTRIB_POS; 1683551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) newInst[0].SrcReg[0].Swizzle = SWIZZLE_XXXX; 16990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR; 17090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[0].SrcReg[1].Index = mvpRef[0]; 17190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[0].SrcReg[1].Swizzle = SWIZZLE_NOOP; 17290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 17390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) for (i = 1; i <= 2; i++) { 174c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch newInst[i].Opcode = OPCODE_MAD; 175c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch newInst[i].DstReg.File = PROGRAM_TEMPORARY; 1763551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) newInst[i].DstReg.Index = hposTemp; 17790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].DstReg.WriteMask = WRITEMASK_XYZW; 17890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[0].File = PROGRAM_INPUT; 17990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[0].Index = VERT_ATTRIB_POS; 18090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(i,i,i,i); 18190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[1].File = PROGRAM_STATE_VAR; 182a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) newInst[i].SrcReg[1].Index = mvpRef[i]; 18390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[1].Swizzle = SWIZZLE_NOOP; 18490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[2].File = PROGRAM_TEMPORARY; 18590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[i].SrcReg[2].Index = hposTemp; 1863551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) newInst[1].SrcReg[2].Swizzle = SWIZZLE_NOOP; 18790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 18890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 18990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) newInst[3].Opcode = OPCODE_MAD; 190 newInst[3].DstReg.File = PROGRAM_OUTPUT; 191 newInst[3].DstReg.Index = VERT_RESULT_HPOS; 192 newInst[3].DstReg.WriteMask = WRITEMASK_XYZW; 193 newInst[3].SrcReg[0].File = PROGRAM_INPUT; 194 newInst[3].SrcReg[0].Index = VERT_ATTRIB_POS; 195 newInst[3].SrcReg[0].Swizzle = SWIZZLE_WWWW; 196 newInst[3].SrcReg[1].File = PROGRAM_STATE_VAR; 197 newInst[3].SrcReg[1].Index = mvpRef[3]; 198 newInst[3].SrcReg[1].Swizzle = SWIZZLE_NOOP; 199 newInst[3].SrcReg[2].File = PROGRAM_TEMPORARY; 200 newInst[3].SrcReg[2].Index = hposTemp; 201 newInst[3].SrcReg[2].Swizzle = SWIZZLE_NOOP; 202 203 204 /* Append original instructions after new instructions */ 205 _mesa_copy_instructions (newInst + 4, vprog->Base.Instructions, origLen); 206 207 /* free old instructions */ 208 _mesa_free_instructions(vprog->Base.Instructions, origLen); 209 210 /* install new instructions */ 211 vprog->Base.Instructions = newInst; 212 vprog->Base.NumInstructions = newLen; 213 vprog->Base.InputsRead |= VERT_BIT_POS; 214 vprog->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS); 215} 216 217 218void 219_mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog) 220{ 221 if (ctx->mvp_with_dp4) 222 _mesa_insert_mvp_dp4_code( ctx, vprog ); 223 else 224 _mesa_insert_mvp_mad_code( ctx, vprog ); 225} 226 227 228 229 230 231 232/** 233 * Append extra instructions onto the given fragment program to implement 234 * the fog mode specified by fprog->FogOption. 235 * The fragment.fogcoord input is used to compute the fog blend factor. 236 * 237 * XXX with a little work, this function could be adapted to add fog code 238 * to vertex programs too. 239 */ 240void 241_mesa_append_fog_code(GLcontext *ctx, struct gl_fragment_program *fprog) 242{ 243 static const gl_state_index fogPStateOpt[STATE_LENGTH] 244 = { STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0 }; 245 static const gl_state_index fogColorState[STATE_LENGTH] 246 = { STATE_FOG_COLOR, 0, 0, 0, 0}; 247 struct prog_instruction *newInst, *inst; 248 const GLuint origLen = fprog->Base.NumInstructions; 249 const GLuint newLen = origLen + 5; 250 GLuint i; 251 GLint fogPRefOpt, fogColorRef; /* state references */ 252 GLuint colorTemp, fogFactorTemp; /* temporary registerss */ 253 254 if (fprog->FogOption == GL_NONE) { 255 _mesa_problem(ctx, "_mesa_append_fog_code() called for fragment program" 256 " with FogOption == GL_NONE"); 257 return; 258 } 259 260 /* Alloc storage for new instructions */ 261 newInst = _mesa_alloc_instructions(newLen); 262 if (!newInst) { 263 _mesa_error(ctx, GL_OUT_OF_MEMORY, 264 "glProgramString(inserting fog_option code)"); 265 return; 266 } 267 268 /* Copy orig instructions into new instruction buffer */ 269 _mesa_copy_instructions(newInst, fprog->Base.Instructions, origLen); 270 271 /* PARAM fogParamsRefOpt = internal optimized fog params; */ 272 fogPRefOpt 273 = _mesa_add_state_reference(fprog->Base.Parameters, fogPStateOpt); 274 /* PARAM fogColorRef = state.fog.color; */ 275 fogColorRef 276 = _mesa_add_state_reference(fprog->Base.Parameters, fogColorState); 277 278 /* TEMP colorTemp; */ 279 colorTemp = fprog->Base.NumTemporaries++; 280 /* TEMP fogFactorTemp; */ 281 fogFactorTemp = fprog->Base.NumTemporaries++; 282 283 /* Scan program to find where result.color is written */ 284 inst = newInst; 285 for (i = 0; i < fprog->Base.NumInstructions; i++) { 286 if (inst->Opcode == OPCODE_END) 287 break; 288 if (inst->DstReg.File == PROGRAM_OUTPUT && 289 inst->DstReg.Index == FRAG_RESULT_COLOR) { 290 /* change the instruction to write to colorTemp w/ clamping */ 291 inst->DstReg.File = PROGRAM_TEMPORARY; 292 inst->DstReg.Index = colorTemp; 293 inst->SaturateMode = SATURATE_ZERO_ONE; 294 /* don't break (may be several writes to result.color) */ 295 } 296 inst++; 297 } 298 assert(inst->Opcode == OPCODE_END); /* we'll overwrite this inst */ 299 300 _mesa_init_instructions(inst, 5); 301 302 /* emit instructions to compute fog blending factor */ 303 if (fprog->FogOption == GL_LINEAR) { 304 /* MAD fogFactorTemp.x, fragment.fogcoord.x, fogPRefOpt.x, fogPRefOpt.y; */ 305 inst->Opcode = OPCODE_MAD; 306 inst->DstReg.File = PROGRAM_TEMPORARY; 307 inst->DstReg.Index = fogFactorTemp; 308 inst->DstReg.WriteMask = WRITEMASK_X; 309 inst->SrcReg[0].File = PROGRAM_INPUT; 310 inst->SrcReg[0].Index = FRAG_ATTRIB_FOGC; 311 inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; 312 inst->SrcReg[1].File = PROGRAM_STATE_VAR; 313 inst->SrcReg[1].Index = fogPRefOpt; 314 inst->SrcReg[1].Swizzle = SWIZZLE_XXXX; 315 inst->SrcReg[2].File = PROGRAM_STATE_VAR; 316 inst->SrcReg[2].Index = fogPRefOpt; 317 inst->SrcReg[2].Swizzle = SWIZZLE_YYYY; 318 inst->SaturateMode = SATURATE_ZERO_ONE; 319 inst++; 320 } 321 else { 322 ASSERT(fprog->FogOption == GL_EXP || fprog->FogOption == GL_EXP2); 323 /* fogPRefOpt.z = d/ln(2), fogPRefOpt.w = d/sqrt(ln(2) */ 324 /* EXP: MUL fogFactorTemp.x, fogPRefOpt.z, fragment.fogcoord.x; */ 325 /* EXP2: MUL fogFactorTemp.x, fogPRefOpt.w, fragment.fogcoord.x; */ 326 inst->Opcode = OPCODE_MUL; 327 inst->DstReg.File = PROGRAM_TEMPORARY; 328 inst->DstReg.Index = fogFactorTemp; 329 inst->DstReg.WriteMask = WRITEMASK_X; 330 inst->SrcReg[0].File = PROGRAM_STATE_VAR; 331 inst->SrcReg[0].Index = fogPRefOpt; 332 inst->SrcReg[0].Swizzle 333 = (fprog->FogOption == GL_EXP) ? SWIZZLE_ZZZZ : SWIZZLE_WWWW; 334 inst->SrcReg[1].File = PROGRAM_INPUT; 335 inst->SrcReg[1].Index = FRAG_ATTRIB_FOGC; 336 inst->SrcReg[1].Swizzle = SWIZZLE_XXXX; 337 inst++; 338 if (fprog->FogOption == GL_EXP2) { 339 /* MUL fogFactorTemp.x, fogFactorTemp.x, fogFactorTemp.x; */ 340 inst->Opcode = OPCODE_MUL; 341 inst->DstReg.File = PROGRAM_TEMPORARY; 342 inst->DstReg.Index = fogFactorTemp; 343 inst->DstReg.WriteMask = WRITEMASK_X; 344 inst->SrcReg[0].File = PROGRAM_TEMPORARY; 345 inst->SrcReg[0].Index = fogFactorTemp; 346 inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; 347 inst->SrcReg[1].File = PROGRAM_TEMPORARY; 348 inst->SrcReg[1].Index = fogFactorTemp; 349 inst->SrcReg[1].Swizzle = SWIZZLE_XXXX; 350 inst++; 351 } 352 /* EX2_SAT fogFactorTemp.x, -fogFactorTemp.x; */ 353 inst->Opcode = OPCODE_EX2; 354 inst->DstReg.File = PROGRAM_TEMPORARY; 355 inst->DstReg.Index = fogFactorTemp; 356 inst->DstReg.WriteMask = WRITEMASK_X; 357 inst->SrcReg[0].File = PROGRAM_TEMPORARY; 358 inst->SrcReg[0].Index = fogFactorTemp; 359 inst->SrcReg[0].Negate = NEGATE_XYZW; 360 inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; 361 inst->SaturateMode = SATURATE_ZERO_ONE; 362 inst++; 363 } 364 /* LRP result.color.xyz, fogFactorTemp.xxxx, colorTemp, fogColorRef; */ 365 inst->Opcode = OPCODE_LRP; 366 inst->DstReg.File = PROGRAM_OUTPUT; 367 inst->DstReg.Index = FRAG_RESULT_COLOR; 368 inst->DstReg.WriteMask = WRITEMASK_XYZ; 369 inst->SrcReg[0].File = PROGRAM_TEMPORARY; 370 inst->SrcReg[0].Index = fogFactorTemp; 371 inst->SrcReg[0].Swizzle = SWIZZLE_XXXX; 372 inst->SrcReg[1].File = PROGRAM_TEMPORARY; 373 inst->SrcReg[1].Index = colorTemp; 374 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 375 inst->SrcReg[2].File = PROGRAM_STATE_VAR; 376 inst->SrcReg[2].Index = fogColorRef; 377 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; 378 inst++; 379 /* MOV result.color.w, colorTemp.x; # copy alpha */ 380 inst->Opcode = OPCODE_MOV; 381 inst->DstReg.File = PROGRAM_OUTPUT; 382 inst->DstReg.Index = FRAG_RESULT_COLOR; 383 inst->DstReg.WriteMask = WRITEMASK_W; 384 inst->SrcReg[0].File = PROGRAM_TEMPORARY; 385 inst->SrcReg[0].Index = colorTemp; 386 inst->SrcReg[0].Swizzle = SWIZZLE_NOOP; 387 inst++; 388 /* END; */ 389 inst->Opcode = OPCODE_END; 390 inst++; 391 392 /* free old instructions */ 393 _mesa_free_instructions(fprog->Base.Instructions, origLen); 394 395 /* install new instructions */ 396 fprog->Base.Instructions = newInst; 397 fprog->Base.NumInstructions = inst - newInst; 398 fprog->Base.InputsRead |= FRAG_BIT_FOGC; 399 /* XXX do this? fprog->FogOption = GL_NONE; */ 400} 401 402 403 404static GLboolean 405is_texture_instruction(const struct prog_instruction *inst) 406{ 407 switch (inst->Opcode) { 408 case OPCODE_TEX: 409 case OPCODE_TXB: 410 case OPCODE_TXD: 411 case OPCODE_TXL: 412 case OPCODE_TXP: 413 case OPCODE_TXP_NV: 414 return GL_TRUE; 415 default: 416 return GL_FALSE; 417 } 418} 419 420 421/** 422 * Count the number of texure indirections in the given program. 423 * The program's NumTexIndirections field will be updated. 424 * See the GL_ARB_fragment_program spec (issue 24) for details. 425 * XXX we count texture indirections in texenvprogram.c (maybe use this code 426 * instead and elsewhere). 427 */ 428void 429_mesa_count_texture_indirections(struct gl_program *prog) 430{ 431 GLuint indirections = 1; 432 GLbitfield tempsOutput = 0x0; 433 GLbitfield aluTemps = 0x0; 434 GLuint i; 435 436 for (i = 0; i < prog->NumInstructions; i++) { 437 const struct prog_instruction *inst = prog->Instructions + i; 438 439 if (is_texture_instruction(inst)) { 440 if (((inst->SrcReg[0].File == PROGRAM_TEMPORARY) && 441 (tempsOutput & (1 << inst->SrcReg[0].Index))) || 442 ((inst->Opcode != OPCODE_KIL) && 443 (inst->DstReg.File == PROGRAM_TEMPORARY) && 444 (aluTemps & (1 << inst->DstReg.Index)))) 445 { 446 indirections++; 447 tempsOutput = 0x0; 448 aluTemps = 0x0; 449 } 450 } 451 else { 452 GLuint j; 453 for (j = 0; j < 3; j++) { 454 if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) 455 aluTemps |= (1 << inst->SrcReg[j].Index); 456 } 457 if (inst->DstReg.File == PROGRAM_TEMPORARY) 458 aluTemps |= (1 << inst->DstReg.Index); 459 } 460 461 if ((inst->Opcode != OPCODE_KIL) && (inst->DstReg.File == PROGRAM_TEMPORARY)) 462 tempsOutput |= (1 << inst->DstReg.Index); 463 } 464 465 prog->NumTexIndirections = indirections; 466} 467 468 469/** 470 * Count number of texture instructions in given program and update the 471 * program's NumTexInstructions field. 472 */ 473void 474_mesa_count_texture_instructions(struct gl_program *prog) 475{ 476 GLuint i; 477 prog->NumTexInstructions = 0; 478 for (i = 0; i < prog->NumInstructions; i++) { 479 prog->NumTexInstructions += is_texture_instruction(prog->Instructions + i); 480 } 481} 482 483 484/** 485 * Scan/rewrite program to remove reads of custom (output) registers. 486 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING 487 * (for vertex shaders). 488 * In GLSL shaders, varying vars can be read and written. 489 * On some hardware, trying to read an output register causes trouble. 490 * So, rewrite the program to use a temporary register in this case. 491 */ 492void 493_mesa_remove_output_reads(struct gl_program *prog, gl_register_file type) 494{ 495 GLuint i; 496 GLint outputMap[VERT_RESULT_MAX]; 497 GLuint numVaryingReads = 0; 498 499 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); 500 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING); 501 502 for (i = 0; i < VERT_RESULT_MAX; i++) 503 outputMap[i] = -1; 504 505 /* look for instructions which read from varying vars */ 506 for (i = 0; i < prog->NumInstructions; i++) { 507 struct prog_instruction *inst = prog->Instructions + i; 508 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); 509 GLuint j; 510 for (j = 0; j < numSrc; j++) { 511 if (inst->SrcReg[j].File == type) { 512 /* replace the read with a temp reg */ 513 const GLuint var = inst->SrcReg[j].Index; 514 if (outputMap[var] == -1) { 515 numVaryingReads++; 516 outputMap[var] = _mesa_find_free_register(prog, 517 PROGRAM_TEMPORARY); 518 } 519 inst->SrcReg[j].File = PROGRAM_TEMPORARY; 520 inst->SrcReg[j].Index = outputMap[var]; 521 } 522 } 523 } 524 525 if (numVaryingReads == 0) 526 return; /* nothing to be done */ 527 528 /* look for instructions which write to the varying vars identified above */ 529 for (i = 0; i < prog->NumInstructions; i++) { 530 struct prog_instruction *inst = prog->Instructions + i; 531 const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); 532 GLuint j; 533 for (j = 0; j < numSrc; j++) { 534 if (inst->DstReg.File == type && 535 outputMap[inst->DstReg.Index] >= 0) { 536 /* change inst to write to the temp reg, instead of the varying */ 537 inst->DstReg.File = PROGRAM_TEMPORARY; 538 inst->DstReg.Index = outputMap[inst->DstReg.Index]; 539 } 540 } 541 } 542 543 /* insert new instructions to copy the temp vars to the varying vars */ 544 { 545 struct prog_instruction *inst; 546 GLint endPos, var; 547 548 /* Look for END instruction and insert the new varying writes */ 549 endPos = -1; 550 for (i = 0; i < prog->NumInstructions; i++) { 551 struct prog_instruction *inst = prog->Instructions + i; 552 if (inst->Opcode == OPCODE_END) { 553 endPos = i; 554 _mesa_insert_instructions(prog, i, numVaryingReads); 555 break; 556 } 557 } 558 559 assert(endPos >= 0); 560 561 /* insert new MOV instructions here */ 562 inst = prog->Instructions + endPos; 563 for (var = 0; var < VERT_RESULT_MAX; var++) { 564 if (outputMap[var] >= 0) { 565 /* MOV VAR[var], TEMP[tmp]; */ 566 inst->Opcode = OPCODE_MOV; 567 inst->DstReg.File = type; 568 inst->DstReg.Index = var; 569 inst->SrcReg[0].File = PROGRAM_TEMPORARY; 570 inst->SrcReg[0].Index = outputMap[var]; 571 inst++; 572 } 573 } 574 } 575} 576