r200_vertprog.c revision fc606f7db9072d4f40081aea8f92f1d4489a5115
1/************************************************************************** 2 3Copyright (C) 2005 Aapo Tahkola. 4 5All Rights Reserved. 6 7Permission is hereby granted, free of charge, to any person obtaining a 8copy of this software and associated documentation files (the "Software"), 9to deal in the Software without restriction, including without limitation 10on the rights to use, copy, modify, merge, publish, distribute, sub 11license, and/or sell copies of the Software, and to permit persons to whom 12the Software is furnished to do so, subject to the following conditions: 13 14The above copyright notice and this permission notice (including the next 15paragraph) shall be included in all copies or substantial portions of the 16Software. 17 18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 22DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26**************************************************************************/ 27 28/* 29 * Authors: 30 * Aapo Tahkola <aet@rasterburn.org> 31 * Roland Scheidegger <rscheidegger_lists@hispeed.ch> 32 */ 33#include "glheader.h" 34#include "macros.h" 35#include "enums.h" 36#include "program.h" 37 38#include "r200_context.h" 39#include "r200_vertprog.h" 40#include "r200_ioctl.h" 41#include "r200_tcl.h" 42#include "program_instruction.h" 43#include "tnl/tnl.h" 44 45#if SWIZZLE_X != VSF_IN_COMPONENT_X || \ 46 SWIZZLE_Y != VSF_IN_COMPONENT_Y || \ 47 SWIZZLE_Z != VSF_IN_COMPONENT_Z || \ 48 SWIZZLE_W != VSF_IN_COMPONENT_W || \ 49 SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \ 50 SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \ 51 WRITEMASK_X != VSF_FLAG_X || \ 52 WRITEMASK_Y != VSF_FLAG_Y || \ 53 WRITEMASK_Z != VSF_FLAG_Z || \ 54 WRITEMASK_W != VSF_FLAG_W 55#error Cannot change these! 56#endif 57 58#define SCALAR_FLAG (1<<31) 59#define FLAG_MASK (1<<31) 60#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ 61#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} 62 63static struct{ 64 char *name; 65 int opcode; 66 unsigned long ip; /* number of input operands and flags */ 67}op_names[]={ 68 OPN(ABS, 1), 69 OPN(ADD, 2), 70 OPN(ARL, 1|SCALAR_FLAG), 71 OPN(DP3, 2), 72 OPN(DP4, 2), 73 OPN(DPH, 2), 74 OPN(DST, 2), 75 OPN(EX2, 1|SCALAR_FLAG), 76 OPN(EXP, 1|SCALAR_FLAG), 77 OPN(FLR, 1), 78 OPN(FRC, 1), 79 OPN(LG2, 1|SCALAR_FLAG), 80 OPN(LIT, 1), 81 OPN(LOG, 1|SCALAR_FLAG), 82 OPN(MAD, 3), 83 OPN(MAX, 2), 84 OPN(MIN, 2), 85 OPN(MOV, 1), 86 OPN(MUL, 2), 87 OPN(POW, 2|SCALAR_FLAG), 88 OPN(RCP, 1|SCALAR_FLAG), 89 OPN(RSQ, 1|SCALAR_FLAG), 90 OPN(SGE, 2), 91 OPN(SLT, 2), 92 OPN(SUB, 2), 93 OPN(SWZ, 1), 94 OPN(XPD, 2), 95 OPN(PRINT, 0), 96 OPN(END, 0), 97}; 98#undef OPN 99 100static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp) 101{ 102 r200ContextPtr rmesa = R200_CONTEXT( ctx ); 103 GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1]; 104 int pi; 105 struct gl_vertex_program *mesa_vp = &vp->mesa_program; 106 struct gl_program_parameter_list *paramList; 107 drm_radeon_cmd_header_t tmp; 108 109 R200_STATECHANGE( rmesa, vpp[0] ); 110 R200_STATECHANGE( rmesa, vpp[1] ); 111 assert(mesa_vp->Base.Parameters); 112 _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters); 113 paramList = mesa_vp->Base.Parameters; 114 115 if(paramList->NumParameters > R200_VSF_MAX_PARAM){ 116 fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__); 117 return GL_FALSE; 118 } 119 120 for(pi = 0; pi < paramList->NumParameters; pi++) { 121 switch(paramList->Parameters[pi].Type) { 122 case PROGRAM_STATE_VAR: 123 case PROGRAM_NAMED_PARAM: 124 //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name); 125 case PROGRAM_CONSTANT: 126 *fcmd++ = paramList->ParameterValues[pi][0]; 127 *fcmd++ = paramList->ParameterValues[pi][1]; 128 *fcmd++ = paramList->ParameterValues[pi][2]; 129 *fcmd++ = paramList->ParameterValues[pi][3]; 130 break; 131 default: 132 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__); 133 break; 134 } 135 if (pi == 95) { 136 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1]; 137 } 138 } 139 /* hack up the cmd_size so not the whole state atom is emitted always. */ 140 rmesa->hw.vpp[0].cmd_size = 141 1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters); 142 tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0]; 143 tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters; 144 rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i; 145 if (paramList->NumParameters > 96) { 146 rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96); 147 tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0]; 148 tmp.veclinear.count = paramList->NumParameters - 96; 149 rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i; 150 } 151 return GL_TRUE; 152} 153 154static __inline unsigned long t_dst_mask(GLuint mask) 155{ 156 /* WRITEMASK_* is equivalent to VSF_FLAG_* */ 157 return mask & VSF_FLAG_ALL; 158} 159 160static unsigned long t_dst(struct prog_dst_register *dst) 161{ 162 switch(dst->File) { 163 case PROGRAM_TEMPORARY: 164 return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT) 165 | R200_VSF_OUT_CLASS_TMP); 166 case PROGRAM_OUTPUT: 167 switch (dst->Index) { 168 case VERT_RESULT_HPOS: 169 return R200_VSF_OUT_CLASS_RESULT_POS; 170 case VERT_RESULT_COL0: 171 return R200_VSF_OUT_CLASS_RESULT_COLOR; 172 case VERT_RESULT_COL1: 173 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT) 174 | R200_VSF_OUT_CLASS_RESULT_COLOR); 175 case VERT_RESULT_FOGC: 176 return R200_VSF_OUT_CLASS_RESULT_FOGC; 177 case VERT_RESULT_TEX0: 178 case VERT_RESULT_TEX1: 179 case VERT_RESULT_TEX2: 180 case VERT_RESULT_TEX3: 181 case VERT_RESULT_TEX4: 182 case VERT_RESULT_TEX5: 183 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT) 184 | R200_VSF_OUT_CLASS_RESULT_TEXC); 185 case VERT_RESULT_PSIZ: 186 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE; 187 default: 188 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index); 189 exit(0); 190 return 0; 191 } 192 case PROGRAM_ADDRESS: 193 assert (dst->Index == 0); 194 return R200_VSF_OUT_CLASS_ADDR; 195 default: 196 fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File); 197 exit(0); 198 return 0; 199 } 200} 201 202static unsigned long t_src_class(enum register_file file) 203{ 204 205 switch(file){ 206 case PROGRAM_TEMPORARY: 207 return VSF_IN_CLASS_TMP; 208 209 case PROGRAM_INPUT: 210 return VSF_IN_CLASS_ATTR; 211 212 case PROGRAM_LOCAL_PARAM: 213 case PROGRAM_ENV_PARAM: 214 case PROGRAM_NAMED_PARAM: 215 case PROGRAM_STATE_VAR: 216 return VSF_IN_CLASS_PARAM; 217 /* 218 case PROGRAM_OUTPUT: 219 case PROGRAM_WRITE_ONLY: 220 case PROGRAM_ADDRESS: 221 */ 222 default: 223 fprintf(stderr, "problem in %s", __FUNCTION__); 224 exit(0); 225 } 226} 227 228static __inline unsigned long t_swizzle(GLubyte swizzle) 229{ 230/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ 231 return swizzle; 232} 233 234#if 0 235static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller) 236{ 237 int i; 238 239 if(vp == NULL){ 240 fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller); 241 return ; 242 } 243 244 fprintf(stderr, "%s:<", caller); 245 for(i=0; i < VERT_ATTRIB_MAX; i++) 246 fprintf(stderr, "%d ", vp->inputs[i]); 247 fprintf(stderr, ">\n"); 248 249} 250#endif 251 252static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src) 253{ 254/* 255 int i; 256 int max_reg = -1; 257*/ 258 if(src->File == PROGRAM_INPUT){ 259/* if(vp->inputs[src->Index] != -1) 260 return vp->inputs[src->Index]; 261 262 for(i=0; i < VERT_ATTRIB_MAX; i++) 263 if(vp->inputs[i] > max_reg) 264 max_reg = vp->inputs[i]; 265 266 vp->inputs[src->Index] = max_reg+1;*/ 267 268 //vp_dump_inputs(vp, __FUNCTION__); 269 assert(vp->inputs[src->Index] != -1); 270 return vp->inputs[src->Index]; 271 } else { 272 if (src->Index < 0) { 273 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n"); 274 return 0; 275 } 276 return src->Index; 277 } 278} 279 280static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src) 281{ 282 283 return MAKE_VSF_SOURCE(t_src_index(vp, src), 284 t_swizzle(GET_SWZ(src->Swizzle, 0)), 285 t_swizzle(GET_SWZ(src->Swizzle, 1)), 286 t_swizzle(GET_SWZ(src->Swizzle, 2)), 287 t_swizzle(GET_SWZ(src->Swizzle, 3)), 288 t_src_class(src->File), 289 src->NegateBase) | (src->RelAddr << 4); 290} 291 292static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src) 293{ 294 295 return MAKE_VSF_SOURCE(t_src_index(vp, src), 296 t_swizzle(GET_SWZ(src->Swizzle, 0)), 297 t_swizzle(GET_SWZ(src->Swizzle, 0)), 298 t_swizzle(GET_SWZ(src->Swizzle, 0)), 299 t_swizzle(GET_SWZ(src->Swizzle, 0)), 300 t_src_class(src->File), 301 src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); 302} 303 304static unsigned long t_opcode(enum prog_opcode opcode) 305{ 306 307 switch(opcode){ 308 case OPCODE_ADD: return R200_VPI_OUT_OP_ADD; 309 /* FIXME: ARL works fine, but negative offsets won't work - fglrx just 310 * seems to ignore neg offsets which isn't quite correct... 311 */ 312 case OPCODE_ARL: return R200_VPI_OUT_OP_ARL; 313 case OPCODE_DP4: return R200_VPI_OUT_OP_DOT; 314 case OPCODE_DST: return R200_VPI_OUT_OP_DST; 315 case OPCODE_EX2: return R200_VPI_OUT_OP_EX2; 316 case OPCODE_EXP: return R200_VPI_OUT_OP_EXP; 317 case OPCODE_FRC: return R200_VPI_OUT_OP_FRC; 318 case OPCODE_LG2: return R200_VPI_OUT_OP_LG2; 319 case OPCODE_LIT: return R200_VPI_OUT_OP_LIT; 320 case OPCODE_LOG: return R200_VPI_OUT_OP_LOG; 321 case OPCODE_MAX: return R200_VPI_OUT_OP_MAX; 322 case OPCODE_MIN: return R200_VPI_OUT_OP_MIN; 323 case OPCODE_MUL: return R200_VPI_OUT_OP_MUL; 324 case OPCODE_RCP: return R200_VPI_OUT_OP_RCP; 325 case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ; 326 case OPCODE_SGE: return R200_VPI_OUT_OP_SGE; 327 case OPCODE_SLT: return R200_VPI_OUT_OP_SLT; 328 329 default: 330 fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode); 331 } 332 exit(-1); 333 return 0; 334} 335 336static unsigned long op_operands(enum prog_opcode opcode) 337{ 338 int i; 339 340 /* Can we trust mesas opcodes to be in order ? */ 341 for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++) 342 if(op_names[i].opcode == opcode) 343 return op_names[i].ip; 344 345 fprintf(stderr, "op %d not found in op_names\n", opcode); 346 exit(-1); 347 return 0; 348} 349 350/* TODO: Get rid of t_src_class call */ 351#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \ 352 ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ 353 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ 354 (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ 355 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \ 356 357/* fglrx on rv250 codes up unused sources as follows: 358 unused but necessary sources are same as previous source, zero-ed out. 359 unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set. 360 i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg 361 set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */ 362 363/* use these simpler definitions. Must obviously not be used with not yet set up regs. 364 Those are NOT semantically equivalent to the r300 ones, requires code changes */ 365#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ 366 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ 367 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ 368 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ 369 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) 370 371#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ 372 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ 373 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ 374 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ 375 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) 376 377#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \ 378 | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \ 379 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \ 380 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \ 381 | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT)))) 382 383#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9) 384 385#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9) 386 387#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9) 388 389 390/* DP4 version seems to trigger some hw peculiarity - fglrx does this on r200 however */ 391#define PREFER_DP4 392 393 394/** 395 * Generate an R200 vertex program from Mesa's internal representation. 396 * 397 * \return GL_TRUE for success, GL_FALSE for failure. 398 */ 399static GLboolean r200_translate_vertex_program(struct r200_vertex_program *vp, GLenum fogmode) 400{ 401 struct gl_vertex_program *mesa_vp = &vp->mesa_program; 402 struct prog_instruction *vpi; 403 int i; 404 VERTEX_SHADER_INSTRUCTION *o_inst; 405 unsigned long operands; 406 int are_srcs_scalar; 407 unsigned long hw_op; 408 int dofogfix = 0; 409 int fog_temp_i = 0; 410 411 vp->native = GL_FALSE; 412 vp->translated = GL_TRUE; 413 vp->fogmode = fogmode; 414 415 if (mesa_vp->Base.NumInstructions == 0) 416 return GL_FALSE; 417 418 if ((mesa_vp->Base.InputsRead & 419 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 | 420 VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 | 421 VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) { 422 if (R200_DEBUG & DEBUG_FALLBACKS) { 423 fprintf(stderr, "can't handle vert prog inputs 0x%x\n", 424 mesa_vp->Base.InputsRead); 425 } 426 return GL_FALSE; 427 } 428 429 if ((mesa_vp->Base.OutputsWritten & 430 ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) | 431 (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) | 432 (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) | 433 (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) { 434 if (R200_DEBUG & DEBUG_FALLBACKS) { 435 fprintf(stderr, "can't handle vert prog outputs 0x%x\n", 436 mesa_vp->Base.OutputsWritten); 437 } 438 return GL_FALSE; 439 } 440 441 if (mesa_vp->IsNVProgram) { 442 /* subtle differences in spec like guaranteed initialized regs could cause 443 headaches. Might want to remove the driconf option to enable it completely */ 444 return GL_FALSE; 445 } 446 /* Initial value should be last tmp reg that hw supports. 447 Strangely enough r300 doesnt mind even though these would be out of range. 448 Smart enough to realize that it doesnt need it? */ 449 int u_temp_i = R200_VSF_MAX_TEMPS - 1; 450 struct prog_src_register src[3]; 451 struct prog_dst_register dst; 452 453/* FIXME: is changing the prog safe to do here? */ 454 if (mesa_vp->IsPositionInvariant && 455 /* make sure we only do this once */ 456 !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) { 457 struct gl_program_parameter_list *paramList; 458 GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX }; 459 460#ifdef PREFER_DP4 461 tokens[5] = STATE_MATRIX; 462#else 463 tokens[5] = STATE_MATRIX_TRANSPOSE; 464#endif 465 paramList = mesa_vp->Base.Parameters; 466 467 vpi = malloc((mesa_vp->Base.NumInstructions + 4) * sizeof(struct prog_instruction)); 468 memset(vpi, 0, 4 * sizeof(struct prog_instruction)); 469 470 /* emit four dot product instructions to do MVP transformation */ 471 for (i=0; i < 4; i++) { 472 GLint idx; 473 tokens[3] = tokens[4] = i; 474 idx = _mesa_add_state_reference(paramList, tokens); 475#ifdef PREFER_DP4 476 vpi[i].Opcode = OPCODE_DP4; 477 vpi[i].StringPos = 0; 478 vpi[i].Data = 0; 479 480 vpi[i].DstReg.File = PROGRAM_OUTPUT; 481 vpi[i].DstReg.Index = VERT_RESULT_HPOS; 482 vpi[i].DstReg.WriteMask = 1 << i; 483 vpi[i].DstReg.CondMask = COND_TR; 484 485 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; 486 vpi[i].SrcReg[0].Index = idx; 487 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); 488 489 vpi[i].SrcReg[1].File = PROGRAM_INPUT; 490 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; 491 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); 492#else 493 if (i == 0) 494 vpi[i].Opcode = OPCODE_MUL; 495 else 496 vpi[i].Opcode = OPCODE_MAD; 497 498 vpi[i].StringPos = 0; 499 vpi[i].Data = 0; 500 501 if (i == 3) 502 vpi[i].DstReg.File = PROGRAM_OUTPUT; 503 else 504 vpi[i].DstReg.File = PROGRAM_TEMPORARY; 505 vpi[i].DstReg.Index = 0; 506 vpi[i].DstReg.WriteMask = 0xf; 507 vpi[i].DstReg.CondMask = COND_TR; 508 509 vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR; 510 vpi[i].SrcReg[0].Index = idx; 511 vpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); 512 513 vpi[i].SrcReg[1].File = PROGRAM_INPUT; 514 vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS; 515 vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i); 516 517 if (i > 0) { 518 vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY; 519 vpi[i].SrcReg[2].Index = 0; 520 vpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W); 521 } 522#endif 523 } 524 525 /* now append original program after our new instructions */ 526 memcpy(&vpi[i], mesa_vp->Base.Instructions, mesa_vp->Base.NumInstructions * sizeof(struct prog_instruction)); 527 528 /* deallocate original program */ 529 free(mesa_vp->Base.Instructions); 530 531 /* install new program */ 532 mesa_vp->Base.Instructions = vpi; 533 534 mesa_vp->Base.NumInstructions += 4; 535 vpi = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions-1]; 536 537 assert(vpi->Opcode == OPCODE_END); 538 539 mesa_vp->Base.InputsRead |= (1 << VERT_ATTRIB_POS); 540 mesa_vp->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS); 541 542 //fprintf(stderr, "IsPositionInvariant is set!\n"); 543 //_mesa_print_program(&mesa_vp->Base); 544 } 545 546 /* for fogc, can't change mesa_vp, as it would hose swtnl 547 maybe should just copy whole prog ? */ 548 if (mesa_vp->Base.OutputsWritten & VERT_RESULT_FOGC && !vp->fogpidx) { 549 struct gl_program_parameter_list *paramList; 550 GLint tokens[6] = { STATE_FOG_PARAMS, 0, 0, 0, 0, 0 }; 551 paramList = mesa_vp->Base.Parameters; 552 vp->fogpidx = _mesa_add_state_reference(paramList, tokens); 553 } 554 555 vp->pos_end = 0; 556 mesa_vp->Base.NumNativeInstructions = 0; 557 if (mesa_vp->Base.Parameters) 558 mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters; 559 else 560 mesa_vp->Base.NumNativeParameters = 0; 561 562 for(i=0; i < VERT_ATTRIB_MAX; i++) 563 vp->inputs[i] = -1; 564/* fglrx uses fixed inputs as follows for conventional attribs. 565 generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available. 566 There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog. 567 attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to 568 vertex normal/weight) 569 attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0 570 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0) 571 attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1) 572 attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0) 573 generic attribs would require some more work (dma regions, renaming). */ 574 575/* may look different when using idx buf / input_route instead of se_vtx_fmt? */ 576 vp->inputs[VERT_ATTRIB_POS] = 0; 577 vp->inputs[VERT_ATTRIB_WEIGHT] = 12; 578 vp->inputs[VERT_ATTRIB_NORMAL] = 1; 579 vp->inputs[VERT_ATTRIB_COLOR0] = 2; 580 vp->inputs[VERT_ATTRIB_COLOR1] = 3; 581 vp->inputs[VERT_ATTRIB_FOG] = 15; 582 vp->inputs[VERT_ATTRIB_TEX0] = 6; 583 vp->inputs[VERT_ATTRIB_TEX1] = 7; 584 vp->inputs[VERT_ATTRIB_TEX2] = 8; 585 vp->inputs[VERT_ATTRIB_TEX3] = 9; 586 vp->inputs[VERT_ATTRIB_TEX4] = 10; 587 vp->inputs[VERT_ATTRIB_TEX5] = 11; 588/* attr 4,5 and 13 are only used with generic attribs. 589 Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is 590 not possibe to use with vertex progs as it is lacking in vert prog specification) */ 591 592 if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) { 593 if (R200_DEBUG & DEBUG_FALLBACKS) { 594 fprintf(stderr, "can't handle vert prog without position output\n"); 595 } 596 return GL_FALSE; 597 } 598 599 o_inst = vp->instr; 600 for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){ 601 operands = op_operands(vpi->Opcode); 602 are_srcs_scalar = operands & SCALAR_FLAG; 603 operands &= OP_MASK; 604 605 for(i = 0; i < operands; i++) { 606 src[i] = vpi->SrcReg[i]; 607 /* hack up default attrib values as per spec as swizzling. 608 normal, fog, secondary color. Crazy? 609 May need more if we don't submit vec4 elements? */ 610 if (src[i].File == PROGRAM_INPUT) { 611 if (src[i].Index == VERT_ATTRIB_NORMAL) { 612 int j; 613 for (j = 0; j < 4; j++) { 614 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { 615 src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); 616 src[i].Swizzle |= SWIZZLE_ONE << (j*3); 617 } 618 } 619 } 620 else if (src[i].Index == VERT_ATTRIB_COLOR1) { 621 int j; 622 for (j = 0; j < 4; j++) { 623 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { 624 src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); 625 src[i].Swizzle |= SWIZZLE_ZERO << (j*3); 626 } 627 } 628 } 629 else if (src[i].Index == VERT_ATTRIB_FOG) { 630 int j; 631 for (j = 0; j < 4; j++) { 632 if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) { 633 src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); 634 src[i].Swizzle |= SWIZZLE_ONE << (j*3); 635 } 636 else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) || 637 GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) { 638 src[i].Swizzle &= ~(SWIZZLE_W << (j*3)); 639 src[i].Swizzle |= SWIZZLE_ZERO << (j*3); 640 } 641 } 642 } 643 } 644 } 645 646 if(operands == 3){ 647 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){ 648 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, 649 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 650 VSF_FLAG_ALL); 651 652 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), 653 SWIZZLE_X, SWIZZLE_Y, 654 SWIZZLE_Z, SWIZZLE_W, 655 t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4); 656 657 o_inst->src1 = ZERO_SRC_0; 658 o_inst->src2 = UNUSED_SRC_1; 659 o_inst++; 660 661 src[2].File = PROGRAM_TEMPORARY; 662 src[2].Index = u_temp_i; 663 src[2].RelAddr = 0; 664 u_temp_i--; 665 } 666 } 667 668 if(operands >= 2){ 669 if( CMP_SRCS(src[1], src[0]) ){ 670 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, 671 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 672 VSF_FLAG_ALL); 673 674 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), 675 SWIZZLE_X, SWIZZLE_Y, 676 SWIZZLE_Z, SWIZZLE_W, 677 t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4); 678 679 o_inst->src1 = ZERO_SRC_0; 680 o_inst->src2 = UNUSED_SRC_1; 681 o_inst++; 682 683 src[0].File = PROGRAM_TEMPORARY; 684 src[0].Index = u_temp_i; 685 src[0].RelAddr = 0; 686 u_temp_i--; 687 } 688 } 689 690 dst = vpi->DstReg; 691 if (dst.File == PROGRAM_OUTPUT && 692 dst.Index == VERT_RESULT_FOGC && 693 dst.WriteMask & WRITEMASK_X) { 694 fog_temp_i = u_temp_i; 695 dst.File = PROGRAM_TEMPORARY; 696 dst.Index = fog_temp_i; 697 dofogfix = 1; 698 u_temp_i--; 699 } 700 701 /* These ops need special handling. */ 702 switch(vpi->Opcode){ 703 case OPCODE_POW: 704/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter). 705 So may need to insert additional instruction */ 706 if ((src[0].File == src[1].File) && 707 (src[0].Index == src[1].Index)) { 708 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst), 709 t_dst_mask(dst.WriteMask)); 710 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), 711 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), 712 SWIZZLE_ZERO, 713 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), 714 SWIZZLE_ZERO, 715 t_src_class(src[0].File), 716 src[0].NegateBase) | (src[0].RelAddr << 4); 717 o_inst->src1 = UNUSED_SRC_0; 718 o_inst->src2 = UNUSED_SRC_0; 719 } 720 else { 721 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, 722 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 723 VSF_FLAG_ALL); 724 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), 725 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), 726 SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, 727 t_src_class(src[0].File), 728 src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); 729 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), 730 SWIZZLE_ZERO, SWIZZLE_ZERO, 731 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, 732 t_src_class(src[1].File), 733 src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); 734 o_inst->src2 = UNUSED_SRC_1; 735 o_inst++; 736 737 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst), 738 t_dst_mask(dst.WriteMask)); 739 o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i, 740 VSF_IN_COMPONENT_X, 741 VSF_IN_COMPONENT_Y, 742 VSF_IN_COMPONENT_Z, 743 VSF_IN_COMPONENT_W, 744 VSF_IN_CLASS_TMP, 745 VSF_FLAG_NONE); 746 o_inst->src1 = UNUSED_SRC_0; 747 o_inst->src2 = UNUSED_SRC_0; 748 u_temp_i--; 749 } 750 goto next; 751 752 case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} 753 case OPCODE_SWZ: 754 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), 755 t_dst_mask(dst.WriteMask)); 756 o_inst->src0 = t_src(vp, &src[0]); 757 o_inst->src1 = ZERO_SRC_0; 758 o_inst->src2 = UNUSED_SRC_1; 759 goto next; 760 761 case OPCODE_MAD: 762 hw_op=(src[0].File == PROGRAM_TEMPORARY && 763 src[1].File == PROGRAM_TEMPORARY && 764 src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD; 765 766 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst), 767 t_dst_mask(dst.WriteMask)); 768 o_inst->src0 = t_src(vp, &src[0]); 769#if 0 770if ((o_inst - vp->instr) == 31) { 771/* fix up the broken vertex program of quake4 demo... */ 772o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), 773 SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, 774 t_src_class(src[1].File), 775 src[1].NegateBase) | (src[1].RelAddr << 4); 776o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), 777 SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, 778 t_src_class(src[1].File), 779 src[1].NegateBase) | (src[1].RelAddr << 4); 780} 781else { 782 o_inst->src1 = t_src(vp, &src[1]); 783 o_inst->src2 = t_src(vp, &src[2]); 784} 785#else 786 o_inst->src1 = t_src(vp, &src[1]); 787 o_inst->src2 = t_src(vp, &src[2]); 788#endif 789 goto next; 790 791 case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} 792 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst), 793 t_dst_mask(dst.WriteMask)); 794 795 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), 796 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), 797 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), 798 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), 799 SWIZZLE_ZERO, 800 t_src_class(src[0].File), 801 src[0].NegateBase) | (src[0].RelAddr << 4); 802 803 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), 804 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), 805 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), 806 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), 807 SWIZZLE_ZERO, 808 t_src_class(src[1].File), 809 src[1].NegateBase) | (src[1].RelAddr << 4); 810 811 o_inst->src2 = UNUSED_SRC_1; 812 goto next; 813 814 case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} 815 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst), 816 t_dst_mask(dst.WriteMask)); 817 818 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), 819 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), 820 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), 821 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), 822 VSF_IN_COMPONENT_ONE, 823 t_src_class(src[0].File), 824 src[0].NegateBase) | (src[0].RelAddr << 4); 825 o_inst->src1 = t_src(vp, &src[1]); 826 o_inst->src2 = UNUSED_SRC_1; 827 goto next; 828 829 case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W 830 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), 831 t_dst_mask(dst.WriteMask)); 832 833 o_inst->src0 = t_src(vp, &src[0]); 834 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), 835 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), 836 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), 837 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), 838 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), 839 t_src_class(src[1].File), 840 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); 841 o_inst->src2 = UNUSED_SRC_1; 842 goto next; 843 844 case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W 845 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst), 846 t_dst_mask(dst.WriteMask)); 847 848 o_inst->src0=t_src(vp, &src[0]); 849 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), 850 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), 851 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), 852 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), 853 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), 854 t_src_class(src[0].File), 855 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); 856 o_inst->src2 = UNUSED_SRC_1; 857 goto next; 858 859 case OPCODE_FLR: 860 /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} 861 ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ 862 863 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC, 864 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 865 t_dst_mask(dst.WriteMask)); 866 867 o_inst->src0 = t_src(vp, &src[0]); 868 o_inst->src1 = UNUSED_SRC_0; 869 o_inst->src2 = UNUSED_SRC_1; 870 o_inst++; 871 872 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst), 873 t_dst_mask(dst.WriteMask)); 874 875 o_inst->src0 = t_src(vp, &src[0]); 876 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i, 877 VSF_IN_COMPONENT_X, 878 VSF_IN_COMPONENT_Y, 879 VSF_IN_COMPONENT_Z, 880 VSF_IN_COMPONENT_W, 881 VSF_IN_CLASS_TMP, 882 /* Not 100% sure about this */ 883 (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); 884 885 o_inst->src2 = UNUSED_SRC_0; 886 u_temp_i--; 887 goto next; 888 889 case OPCODE_XPD: 890 /* mul r0, r1.yzxw, r2.zxyw 891 mad r0, -r2.yzxw, r1.zxyw, r0 892 NOTE: might need MAD_2 893 */ 894 895 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, 896 (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 897 t_dst_mask(dst.WriteMask)); 898 899 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), 900 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y 901 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z 902 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x 903 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w 904 t_src_class(src[0].File), 905 src[0].NegateBase) | (src[0].RelAddr << 4); 906 907 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), 908 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z 909 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x 910 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y 911 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w 912 t_src_class(src[1].File), 913 src[1].NegateBase) | (src[1].RelAddr << 4); 914 915 o_inst->src2 = UNUSED_SRC_1; 916 o_inst++; 917 u_temp_i--; 918 919 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst), 920 t_dst_mask(dst.WriteMask)); 921 922 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), 923 t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y 924 t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z 925 t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x 926 t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w 927 t_src_class(src[1].File), 928 (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); 929 930 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), 931 t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z 932 t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x 933 t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y 934 t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w 935 t_src_class(src[0].File), 936 src[0].NegateBase) | (src[0].RelAddr << 4); 937 938 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1, 939 VSF_IN_COMPONENT_X, 940 VSF_IN_COMPONENT_Y, 941 VSF_IN_COMPONENT_Z, 942 VSF_IN_COMPONENT_W, 943 VSF_IN_CLASS_TMP, 944 VSF_FLAG_NONE); 945 goto next; 946 947 case OPCODE_END: 948 assert(0); 949 default: 950 break; 951 } 952 953 o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst), 954 t_dst_mask(dst.WriteMask)); 955 956 if(are_srcs_scalar){ 957 switch(operands){ 958 case 1: 959 o_inst->src0 = t_src_scalar(vp, &src[0]); 960 o_inst->src1 = UNUSED_SRC_0; 961 o_inst->src2 = UNUSED_SRC_1; 962 break; 963 964 case 2: 965 o_inst->src0 = t_src_scalar(vp, &src[0]); 966 o_inst->src1 = t_src_scalar(vp, &src[1]); 967 o_inst->src2 = UNUSED_SRC_1; 968 break; 969 970 case 3: 971 o_inst->src0 = t_src_scalar(vp, &src[0]); 972 o_inst->src1 = t_src_scalar(vp, &src[1]); 973 o_inst->src2 = t_src_scalar(vp, &src[2]); 974 break; 975 976 default: 977 fprintf(stderr, "illegal number of operands %lu\n", operands); 978 exit(-1); 979 break; 980 } 981 } else { 982 switch(operands){ 983 case 1: 984 o_inst->src0 = t_src(vp, &src[0]); 985 o_inst->src1 = UNUSED_SRC_0; 986 o_inst->src2 = UNUSED_SRC_1; 987 break; 988 989 case 2: 990 o_inst->src0 = t_src(vp, &src[0]); 991 o_inst->src1 = t_src(vp, &src[1]); 992 o_inst->src2 = UNUSED_SRC_1; 993 break; 994 995 case 3: 996 o_inst->src0 = t_src(vp, &src[0]); 997 o_inst->src1 = t_src(vp, &src[1]); 998 o_inst->src2 = t_src(vp, &src[2]); 999 break; 1000 1001 default: 1002 fprintf(stderr, "illegal number of operands %lu\n", operands); 1003 exit(-1); 1004 break; 1005 } 1006 } 1007 next: 1008 1009 if (dofogfix) { 1010 o_inst++; 1011 if (vp->fogmode == GL_EXP) { 1012 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, 1013 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 1014 VSF_FLAG_X); 1015 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); 1016 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE); 1017 o_inst->src2 = UNUSED_SRC_1; 1018 o_inst++; 1019 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_FOG, 1020 R200_VSF_OUT_CLASS_RESULT_FOGC, 1021 VSF_FLAG_X); 1022 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); 1023 o_inst->src1 = UNUSED_SRC_0; 1024 o_inst->src2 = UNUSED_SRC_1; 1025 } 1026 else if (vp->fogmode == GL_EXP2) { 1027 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, 1028 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 1029 VSF_FLAG_X); 1030 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); 1031 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE); 1032 o_inst->src2 = UNUSED_SRC_1; 1033 o_inst++; 1034 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, 1035 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 1036 VSF_FLAG_X); 1037 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); 1038 o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); 1039 o_inst->src2 = UNUSED_SRC_1; 1040 o_inst++; 1041 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_FOG, 1042 R200_VSF_OUT_CLASS_RESULT_FOGC, 1043 VSF_FLAG_X); 1044 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); 1045 o_inst->src1 = UNUSED_SRC_0; 1046 o_inst->src2 = UNUSED_SRC_1; 1047 } 1048 else { /* fogmode == GL_LINEAR */ 1049 /* could do that with single op (dot) if using params like 1050 with fixed function pipeline fog */ 1051 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, 1052 (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP, 1053 VSF_FLAG_X); 1054 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL); 1055 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE); 1056 o_inst->src2 = UNUSED_SRC_1; 1057 o_inst++; 1058 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL, 1059 R200_VSF_OUT_CLASS_RESULT_FOGC, 1060 VSF_FLAG_X); 1061 o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE); 1062 o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE); 1063 o_inst->src2 = UNUSED_SRC_1; 1064 1065 } 1066 dofogfix = 0; 1067 } 1068 1069 if (mesa_vp->Base.NumNativeTemporaries < 1070 (mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i))) { 1071 mesa_vp->Base.NumNativeTemporaries = 1072 mesa_vp->Base.NumTemporaries + (R200_VSF_MAX_TEMPS - 1 - u_temp_i); 1073 } 1074 if (u_temp_i < mesa_vp->Base.NumTemporaries) { 1075 if (R200_DEBUG & DEBUG_FALLBACKS) { 1076 fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_i); 1077 } 1078 return GL_FALSE; 1079 } 1080 u_temp_i = R200_VSF_MAX_TEMPS - 1; 1081 if(o_inst - vp->instr >= R200_VSF_MAX_INST) { 1082 mesa_vp->Base.NumNativeInstructions = 129; 1083 if (R200_DEBUG & DEBUG_FALLBACKS) { 1084 fprintf(stderr, "more than 128 native instructions\n"); 1085 } 1086 return GL_FALSE; 1087 } 1088 if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) { 1089 vp->pos_end = (o_inst - vp->instr); 1090 } 1091 } 1092 1093 vp->native = GL_TRUE; 1094 mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr); 1095#if 0 1096 fprintf(stderr, "hw program:\n"); 1097 for(i=0; i < vp->program.length; i++) 1098 fprintf(stderr, "%08x\n", vp->instr[i]); 1099#endif 1100 return GL_TRUE; 1101} 1102 1103void r200SetupVertexProg( GLcontext *ctx ) { 1104 r200ContextPtr rmesa = R200_CONTEXT(ctx); 1105 struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current; 1106 GLboolean fallback; 1107 GLint i; 1108 1109 if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) { 1110 rmesa->curr_vp_hw = NULL; 1111 r200_translate_vertex_program(vp, ctx->Fog.Mode); 1112 } 1113 /* could optimize setting up vertex progs away for non-tcl hw */ 1114 fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) && 1115 rmesa->r200Screen->drmSupportsVertexProgram); 1116 TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback); 1117 if (rmesa->TclFallback) return; 1118 1119 R200_STATECHANGE( rmesa, vap ); 1120 /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it? 1121 maybe only when using more than 64 inst / 96 param? */ 1122 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/; 1123 1124 R200_STATECHANGE( rmesa, pvs ); 1125 1126 rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) | 1127 ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) | 1128 (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT); 1129 rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | 1130 (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT); 1131 1132 /* maybe user clip planes just work with vertex progs... untested */ 1133 if (ctx->Transform.ClipPlanesEnabled) { 1134 R200_STATECHANGE( rmesa, tcl ); 1135 if (vp->mesa_program.IsPositionInvariant) { 1136 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2); 1137 } 1138 else { 1139 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc); 1140 } 1141 } 1142 1143 if (vp != rmesa->curr_vp_hw) { 1144 GLuint count = vp->mesa_program.Base.NumNativeInstructions; 1145 drm_radeon_cmd_header_t tmp; 1146 1147 R200_STATECHANGE( rmesa, vpi[0] ); 1148 R200_STATECHANGE( rmesa, vpi[1] ); 1149 1150 /* FIXME: what about using a memcopy... */ 1151 for (i = 0; (i < 64) && i < count; i++) { 1152 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op; 1153 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0; 1154 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1; 1155 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2; 1156 } 1157 /* hack up the cmd_size so not the whole state atom is emitted always. 1158 This may require some more thought, we may emit half progs on lost state, but 1159 hopefully it won't matter? 1160 WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected) 1161 packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */ 1162 rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count); 1163 tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0]; 1164 tmp.veclinear.count = (count > 64) ? 64 : count; 1165 rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i; 1166 if (count > 64) { 1167 for (i = 0; i < (count - 64); i++) { 1168 rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op; 1169 rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0; 1170 rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1; 1171 rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2; 1172 } 1173 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64); 1174 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0]; 1175 tmp.veclinear.count = count - 64; 1176 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i; 1177 } 1178 rmesa->curr_vp_hw = vp; 1179 } 1180} 1181 1182 1183static void 1184r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) 1185{ 1186 r200ContextPtr rmesa = R200_CONTEXT(ctx); 1187 1188 switch(target){ 1189 case GL_VERTEX_PROGRAM_ARB: 1190 rmesa->curr_vp_hw = NULL; 1191 break; 1192 default: 1193 _mesa_problem(ctx, "Target not supported yet!"); 1194 break; 1195 } 1196} 1197 1198static struct gl_program * 1199r200NewProgram(GLcontext *ctx, GLenum target, GLuint id) 1200{ 1201 struct r200_vertex_program *vp; 1202 1203 switch(target){ 1204 case GL_VERTEX_PROGRAM_ARB: 1205 vp = CALLOC_STRUCT(r200_vertex_program); 1206 return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); 1207 case GL_FRAGMENT_PROGRAM_ARB: 1208 case GL_FRAGMENT_PROGRAM_NV: 1209 return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id ); 1210 default: 1211 _mesa_problem(ctx, "Bad target in r200NewProgram"); 1212 } 1213 return NULL; 1214} 1215 1216 1217static void 1218r200DeleteProgram(GLcontext *ctx, struct gl_program *prog) 1219{ 1220 _mesa_delete_program(ctx, prog); 1221} 1222 1223static void 1224r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog) 1225{ 1226 struct r200_vertex_program *vp = (void *)prog; 1227 r200ContextPtr rmesa = R200_CONTEXT(ctx); 1228 1229 switch(target) { 1230 case GL_VERTEX_PROGRAM_ARB: 1231 vp->translated = GL_FALSE; 1232 vp->fogpidx = 0; 1233/* memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/ 1234 r200_translate_vertex_program(vp, ctx->Fog.Mode); 1235 rmesa->curr_vp_hw = NULL; 1236 break; 1237 } 1238 /* need this for tcl fallbacks */ 1239 _tnl_program_string(ctx, target, prog); 1240} 1241 1242static GLboolean 1243r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog) 1244{ 1245 struct r200_vertex_program *vp = (void *)prog; 1246 1247 switch(target){ 1248 case GL_VERTEX_STATE_PROGRAM_NV: 1249 case GL_VERTEX_PROGRAM_ARB: 1250 if (!vp->translated) { 1251 r200_translate_vertex_program(vp, ctx->Fog.Mode); 1252 } 1253 /* does not take parameters etc. into account */ 1254 return vp->native; 1255 default: 1256 _mesa_problem(ctx, "Bad target in r200NewProgram"); 1257 } 1258 return 0; 1259} 1260 1261void r200InitShaderFuncs(struct dd_function_table *functions) 1262{ 1263 functions->NewProgram = r200NewProgram; 1264 functions->BindProgram = r200BindProgram; 1265 functions->DeleteProgram = r200DeleteProgram; 1266 functions->ProgramStringNotify = r200ProgramStringNotify; 1267 functions->IsProgramNative = r200IsProgramNative; 1268} 1269