i915_fragprog.c revision 594c3f67ac8fceb061e47b090ec4d149c55a1940
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "glheader.h" 29#include "macros.h" 30#include "enums.h" 31 32#include "tnl/t_context.h" 33#include "intel_batchbuffer.h" 34 35#include "i915_reg.h" 36#include "i915_context.h" 37#include "i915_program.h" 38 39#include "nvfragprog.h" 40#include "program.h" 41#include "arbfragparse.h" 42 43 44 45 46#define PI 3.141592 47 48 49/* 1, -1/3!, 1/5!, -1/7! */ 50static const GLfloat sin_constants[4] = { 1.0, 51 -1.0/(3*2*1), 52 1.0/(5*4*3*2*1), 53 -1.0/(7*6*5*4*3*2*1) }; 54 55/* 1, -1/2!, 1/4!, -1/6! */ 56static const GLfloat cos_constants[4] = { 1.0, 57 -1.0/(2*1), 58 1.0/(4*3*2*1), 59 -1.0/(6*5*4*3*2*1) }; 60 61/** 62 * Retrieve a ureg for the given source register. Will emit 63 * constants, apply swizzling and negation as needed. 64 */ 65static GLuint src_vector( struct i915_fragment_program *p, 66 const struct fp_src_register *source, 67 const struct fragment_program *program ) 68{ 69 GLuint src; 70 71 switch (source->File) { 72 73 /* Registers: 74 */ 75 case PROGRAM_TEMPORARY: 76 if (source->Index >= I915_MAX_TEMPORARY) { 77 i915_program_error( p, "Exceeded max temporary reg" ); 78 return 0; 79 } 80 src = UREG( REG_TYPE_R, source->Index ); 81 break; 82 case PROGRAM_INPUT: 83 switch (source->Index) { 84 case FRAG_ATTRIB_WPOS: 85 src = i915_emit_decl( p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL ); 86 break; 87 case FRAG_ATTRIB_COL0: 88 src = i915_emit_decl( p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL ); 89 break; 90 case FRAG_ATTRIB_COL1: 91 src = i915_emit_decl( p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ ); 92 src = swizzle( src, X, Y, Z, ONE ); 93 break; 94 case FRAG_ATTRIB_FOGC: 95 src = i915_emit_decl( p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W ); 96 src = swizzle( src, W, W, W, W ); 97 break; 98 case FRAG_ATTRIB_TEX0: 99 case FRAG_ATTRIB_TEX1: 100 case FRAG_ATTRIB_TEX2: 101 case FRAG_ATTRIB_TEX3: 102 case FRAG_ATTRIB_TEX4: 103 case FRAG_ATTRIB_TEX5: 104 case FRAG_ATTRIB_TEX6: 105 case FRAG_ATTRIB_TEX7: 106 src = i915_emit_decl( p, REG_TYPE_T, 107 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), 108 D0_CHANNEL_ALL ); 109 break; 110 111 default: 112 i915_program_error( p, "Bad source->Index" ); 113 return 0; 114 } 115 break; 116 117 /* Various paramters and env values. All emitted to 118 * hardware as program constants. 119 */ 120 case PROGRAM_LOCAL_PARAM: 121 src = i915_emit_param4fv( 122 p, program->Base.LocalParams[source->Index]); 123 break; 124 125 case PROGRAM_ENV_PARAM: 126 src = i915_emit_param4fv( 127 p, p->ctx->FragmentProgram.Parameters[source->Index]); 128 break; 129 130 case PROGRAM_STATE_VAR: 131 case PROGRAM_NAMED_PARAM: 132 src = i915_emit_param4fv( 133 p, program->Parameters->Parameters[source->Index].Values ); 134 break; 135 136 default: 137 i915_program_error( p, "Bad source->File" ); 138 return 0; 139 } 140 141 src = swizzle(src, 142 source->Swizzle[0], 143 source->Swizzle[1], 144 source->Swizzle[2], 145 source->Swizzle[3]); 146 147 if (source->NegateBase) 148 src = negate( src, 1,1,1,1 ); 149 150 return src; 151} 152 153 154static GLuint get_result_vector( struct i915_fragment_program *p, 155 const struct fp_instruction *inst ) 156{ 157 switch (inst->DstReg.File) { 158 case PROGRAM_OUTPUT: 159 switch (inst->DstReg.Index) { 160 case 0: 161 return UREG(REG_TYPE_OC, 0); 162 case 1: 163 p->depth_written = 1; 164 return UREG(REG_TYPE_OD, 0); 165 default: 166 i915_program_error( p, "Bad inst->DstReg.Index" ); 167 return 0; 168 } 169 case PROGRAM_TEMPORARY: 170 return UREG(REG_TYPE_R, inst->DstReg.Index); 171 default: 172 i915_program_error( p, "Bad inst->DstReg.File" ); 173 return 0; 174 } 175} 176 177static GLuint get_result_flags( const struct fp_instruction *inst ) 178{ 179 GLuint flags = 0; 180 181 if (inst->Saturate) flags |= A0_DEST_SATURATE; 182 if (inst->DstReg.WriteMask[0]) flags |= A0_DEST_CHANNEL_X; 183 if (inst->DstReg.WriteMask[1]) flags |= A0_DEST_CHANNEL_Y; 184 if (inst->DstReg.WriteMask[2]) flags |= A0_DEST_CHANNEL_Z; 185 if (inst->DstReg.WriteMask[3]) flags |= A0_DEST_CHANNEL_W; 186 187 return flags; 188} 189 190static GLuint translate_tex_src_bit( struct i915_fragment_program *p, 191 GLubyte bit ) 192{ 193 switch (bit) { 194 case TEXTURE_1D_BIT: return D0_SAMPLE_TYPE_2D; 195 case TEXTURE_2D_BIT: return D0_SAMPLE_TYPE_2D; 196 case TEXTURE_RECT_BIT: return D0_SAMPLE_TYPE_2D; 197 case TEXTURE_3D_BIT: return D0_SAMPLE_TYPE_VOLUME; 198 case TEXTURE_CUBE_BIT: return D0_SAMPLE_TYPE_CUBE; 199 default: i915_program_error(p, "TexSrcBit"); return 0; 200 } 201} 202 203#define EMIT_TEX( OP ) \ 204do { \ 205 GLuint dim = translate_tex_src_bit( p, inst->TexSrcBit ); \ 206 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \ 207 inst->TexSrcUnit, dim); \ 208 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \ 209 /* Texel lookup */ \ 210 \ 211 i915_emit_texld( p, \ 212 get_result_vector( p, inst ), \ 213 get_result_flags( inst ), \ 214 sampler, \ 215 coord, \ 216 OP); \ 217} while (0) 218 219#define EMIT_ARITH( OP, N ) \ 220do { \ 221 i915_emit_arith( p, \ 222 OP, \ 223 get_result_vector( p, inst ), \ 224 get_result_flags( inst ), 0, \ 225 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \ 226 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \ 227 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \ 228} while (0) 229 230#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 ) 231#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 ) 232#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 ) 233 234 235/* Possible concerns: 236 * 237 * SIN, COS -- could use another taylor step? 238 * LIT -- results seem a little different to sw mesa 239 * LOG -- different to mesa on negative numbers, but this is conformant. 240 * 241 * Parse failures -- Mesa doesn't currently give a good indication 242 * internally whether a particular program string parsed or not. This 243 * can lead to confusion -- hopefully we cope with it ok now. 244 * 245 */ 246static void upload_program( struct i915_fragment_program *p ) 247{ 248 const struct fragment_program *program = p->ctx->FragmentProgram.Current; 249 const struct fp_instruction *inst = program->Instructions; 250 251/* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */ 252 253 /* Is this a parse-failed program? Ensure a valid program is 254 * loaded, as the flagging of an error isn't sufficient to stop 255 * this being uploaded to hardware. 256 */ 257 if (inst[0].Opcode == FP_OPCODE_END) { 258 GLuint tmp = i915_get_utemp( p ); 259 i915_emit_arith( p, 260 A0_MOV, 261 UREG(REG_TYPE_OC, 0), 262 A0_DEST_CHANNEL_ALL, 0, 263 swizzle(tmp,ONE,ZERO,ONE,ONE), 0, 0); 264 return; 265 } 266 267 while (1) { 268 GLuint src0, src1, src2, flags; 269 GLuint tmp = 0; 270 271 switch (inst->Opcode) { 272 case FP_OPCODE_ABS: 273 src0 = src_vector( p, &inst->SrcReg[0], program); 274 i915_emit_arith( p, 275 A0_MAX, 276 get_result_vector( p, inst ), 277 get_result_flags( inst ), 0, 278 src0, negate(src0, 1,1,1,1), 0); 279 break; 280 281 case FP_OPCODE_ADD: 282 EMIT_2ARG_ARITH( A0_ADD ); 283 break; 284 285 case FP_OPCODE_CMP: 286 src0 = src_vector( p, &inst->SrcReg[0], program); 287 src1 = src_vector( p, &inst->SrcReg[1], program); 288 src2 = src_vector( p, &inst->SrcReg[2], program); 289 i915_emit_arith( p, 290 A0_CMP, 291 get_result_vector( p, inst ), 292 get_result_flags( inst ), 0, 293 src0, src2, src1); /* NOTE: order of src2, src1 */ 294 break; 295 296 case FP_OPCODE_COS: 297 src0 = src_vector( p, &inst->SrcReg[0], program); 298 tmp = i915_get_utemp( p ); 299 300 i915_emit_arith( p, 301 A0_MUL, 302 tmp, A0_DEST_CHANNEL_X, 0, 303 src0, 304 i915_emit_const1f(p, 1.0/(PI * 2)), 305 0); 306 307 i915_emit_arith( p, 308 A0_MOD, 309 tmp, A0_DEST_CHANNEL_X, 0, 310 tmp, 311 0, 0 ); 312 313 /* By choosing different taylor constants, could get rid of this mul: 314 */ 315 i915_emit_arith( p, 316 A0_MUL, 317 tmp, A0_DEST_CHANNEL_X, 0, 318 tmp, 319 i915_emit_const1f(p, (PI * 2)), 320 0); 321 322 /* 323 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 324 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 325 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 326 * result = DP4 t0, cos_constants 327 */ 328 i915_emit_arith( p, 329 A0_MUL, 330 tmp, A0_DEST_CHANNEL_XY, 0, 331 swizzle(tmp, X,X,ONE,ONE), 332 swizzle(tmp, X,ONE,ONE,ONE), 0); 333 334 i915_emit_arith( p, 335 A0_MUL, 336 tmp, A0_DEST_CHANNEL_XYZ, 0, 337 swizzle(tmp, X,Y,X,ONE), 338 swizzle(tmp, X,X,ONE,ONE), 0); 339 340 i915_emit_arith( p, 341 A0_MUL, 342 tmp, A0_DEST_CHANNEL_XYZ, 0, 343 swizzle(tmp, X,X,Z,ONE), 344 swizzle(tmp, Z,ONE,ONE,ONE), 0); 345 346 i915_emit_arith( p, 347 A0_DP4, 348 get_result_vector( p, inst ), 349 get_result_flags( inst ), 0, 350 swizzle(tmp, ONE,Z,Y,X), 351 i915_emit_const4fv( p, cos_constants ), 0); 352 353 break; 354 355 case FP_OPCODE_DP3: 356 EMIT_2ARG_ARITH( A0_DP3 ); 357 break; 358 359 case FP_OPCODE_DP4: 360 EMIT_2ARG_ARITH( A0_DP4 ); 361 break; 362 363 case FP_OPCODE_DPH: 364 src0 = src_vector( p, &inst->SrcReg[0], program); 365 src1 = src_vector( p, &inst->SrcReg[1], program); 366 367 i915_emit_arith( p, 368 A0_DP4, 369 get_result_vector( p, inst ), 370 get_result_flags( inst ), 0, 371 swizzle(src0, X,Y,Z,ONE), src1, 0); 372 break; 373 374 case FP_OPCODE_DST: 375 src0 = src_vector( p, &inst->SrcReg[0], program); 376 src1 = src_vector( p, &inst->SrcReg[1], program); 377 378 /* result[0] = 1 * 1; 379 * result[1] = a[1] * b[1]; 380 * result[2] = a[2] * 1; 381 * result[3] = 1 * b[3]; 382 */ 383 i915_emit_arith( p, 384 A0_MUL, 385 get_result_vector( p, inst ), 386 get_result_flags( inst ), 0, 387 swizzle(src0, ONE, Y, Z, ONE), 388 swizzle(src1, ONE, Y, ONE, W ), 389 0); 390 break; 391 392 case FP_OPCODE_EX2: 393 src0 = src_vector( p, &inst->SrcReg[0], program); 394 395 i915_emit_arith( p, 396 A0_EXP, 397 get_result_vector( p, inst ), 398 get_result_flags( inst ), 0, 399 swizzle(src0,X,X,X,X), 0, 0); 400 break; 401 402 case FP_OPCODE_FLR: 403 EMIT_1ARG_ARITH( A0_FLR ); 404 break; 405 406 case FP_OPCODE_FRC: 407 EMIT_1ARG_ARITH( A0_FRC ); 408 break; 409 410 case FP_OPCODE_KIL: 411 src0 = src_vector( p, &inst->SrcReg[0], program); 412 tmp = i915_get_utemp( p ); 413 414 i915_emit_texld( p, 415 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ 416 0, 417 src0, 418 T0_TEXKILL ); 419 break; 420 421 case FP_OPCODE_LG2: 422 src0 = src_vector( p, &inst->SrcReg[0], program); 423 424 i915_emit_arith( p, 425 A0_LOG, 426 get_result_vector( p, inst ), 427 get_result_flags( inst ), 0, 428 swizzle(src0,X,X,X,X), 0, 0); 429 break; 430 431 case FP_OPCODE_LIT: 432 src0 = src_vector( p, &inst->SrcReg[0], program); 433 tmp = i915_get_utemp( p ); 434 435 /* tmp = max( a.xyzw, a.00zw ) 436 * XXX: Clamp tmp.w to -128..128 437 * tmp.y = log(tmp.y) 438 * tmp.y = tmp.w * tmp.y 439 * tmp.y = exp(tmp.y) 440 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 441 */ 442 i915_emit_arith( p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 443 src0, swizzle(src0, ZERO, ZERO, Z, W), 0 ); 444 445 i915_emit_arith( p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 446 swizzle(tmp, Y, Y, Y, Y), 0, 0 ); 447 448 i915_emit_arith( p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 449 swizzle(tmp, ZERO, Y, ZERO, ZERO), 450 swizzle(tmp, ZERO, W, ZERO, ZERO), 0 ); 451 452 i915_emit_arith( p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 453 swizzle(tmp, Y, Y, Y, Y), 0, 0 ); 454 455 i915_emit_arith( p, A0_CMP, 456 get_result_vector( p, inst ), 457 get_result_flags( inst ), 0, 458 negate(swizzle(tmp, ONE, ONE, X, ONE),0,0,1,0), 459 swizzle(tmp, ONE, X, ZERO, ONE), 460 swizzle(tmp, ONE, X, Y, ONE)); 461 462 break; 463 464 case FP_OPCODE_LRP: 465 src0 = src_vector( p, &inst->SrcReg[0], program); 466 src1 = src_vector( p, &inst->SrcReg[1], program); 467 src2 = src_vector( p, &inst->SrcReg[2], program); 468 flags = get_result_flags( inst ); 469 tmp = i915_get_utemp( p ); 470 471 /* b*a + c*(1-a) 472 * 473 * b*a + c - ca 474 * 475 * tmp = b*a + c, 476 * result = (-c)*a + tmp 477 */ 478 i915_emit_arith( p, A0_MAD, tmp, 479 flags & A0_DEST_CHANNEL_ALL, 0, 480 src1, src0, src2 ); 481 482 i915_emit_arith( p, A0_MAD, 483 get_result_vector( p, inst ), 484 flags, 0, 485 negate(src2, 1,1,1,1), src0, tmp ); 486 break; 487 488 case FP_OPCODE_MAD: 489 EMIT_3ARG_ARITH( A0_MAD ); 490 break; 491 492 case FP_OPCODE_MAX: 493 EMIT_2ARG_ARITH( A0_MAX ); 494 break; 495 496 case FP_OPCODE_MIN: 497 src0 = src_vector( p, &inst->SrcReg[0], program); 498 src1 = src_vector( p, &inst->SrcReg[1], program); 499 tmp = i915_get_utemp( p ); 500 flags = get_result_flags( inst ); 501 502 i915_emit_arith( p, 503 A0_MAX, 504 tmp, flags & A0_DEST_CHANNEL_ALL, 0, 505 negate(src0,1,1,1,1), 506 negate(src1,1,1,1,1), 0); 507 508 i915_emit_arith( p, 509 A0_MOV, 510 get_result_vector( p, inst ), 511 flags, 0, 512 negate(tmp, 1,1,1,1), 0, 0); 513 break; 514 515 case FP_OPCODE_MOV: 516 EMIT_1ARG_ARITH( A0_MOV ); 517 break; 518 519 case FP_OPCODE_MUL: 520 EMIT_2ARG_ARITH( A0_MUL ); 521 break; 522 523 case FP_OPCODE_POW: 524 src0 = src_vector( p, &inst->SrcReg[0], program); 525 src1 = src_vector( p, &inst->SrcReg[1], program); 526 tmp = i915_get_utemp( p ); 527 flags = get_result_flags( inst ); 528 529 /* XXX: masking on intermediate values, here and elsewhere. 530 */ 531 i915_emit_arith( p, 532 A0_LOG, 533 tmp, A0_DEST_CHANNEL_X, 0, 534 swizzle(src0,X,X,X,X), 0, 0); 535 536 i915_emit_arith( p, 537 A0_MUL, 538 tmp, A0_DEST_CHANNEL_X, 0, 539 tmp, src1, 0); 540 541 542 i915_emit_arith( p, 543 A0_EXP, 544 get_result_vector( p, inst ), 545 flags, 0, 546 swizzle(tmp,X,X,X,X), 0, 0); 547 548 break; 549 550 case FP_OPCODE_RCP: 551 src0 = src_vector( p, &inst->SrcReg[0], program); 552 553 i915_emit_arith( p, 554 A0_RCP, 555 get_result_vector( p, inst ), 556 get_result_flags( inst ), 0, 557 swizzle(src0,X,X,X,X), 0, 0); 558 break; 559 560 case FP_OPCODE_RSQ: 561 562 src0 = src_vector( p, &inst->SrcReg[0], program); 563 564 i915_emit_arith( p, 565 A0_RSQ, 566 get_result_vector( p, inst ), 567 get_result_flags( inst ), 0, 568 swizzle(src0,X,X,X,X), 0, 0); 569 break; 570 571 case FP_OPCODE_SCS: 572 src0 = src_vector( p, &inst->SrcReg[0], program); 573 tmp = i915_get_utemp( p ); 574 575 /* 576 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 577 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 578 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 579 * scs.x = DP4 t1, sin_constants 580 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 581 * scs.y = DP4 t1, cos_constants 582 */ 583 i915_emit_arith( p, 584 A0_MUL, 585 tmp, A0_DEST_CHANNEL_XY, 0, 586 swizzle(src0, X,X,ONE,ONE), 587 swizzle(src0, X,ONE,ONE,ONE), 0); 588 589 i915_emit_arith( p, 590 A0_MUL, 591 tmp, A0_DEST_CHANNEL_ALL, 0, 592 swizzle(tmp, X,Y,X,Y), 593 swizzle(tmp, X,X,ONE,ONE), 0); 594 595 if (inst->DstReg.WriteMask[1]) { 596 GLuint tmp1; 597 598 if (inst->DstReg.WriteMask[0]) 599 tmp1 = i915_get_utemp( p ); 600 else 601 tmp1 = tmp; 602 603 i915_emit_arith( p, 604 A0_MUL, 605 tmp1, A0_DEST_CHANNEL_ALL, 0, 606 swizzle(tmp, X,Y,Y,W), 607 swizzle(tmp, X,Z,ONE,ONE), 0); 608 609 i915_emit_arith( p, 610 A0_DP4, 611 get_result_vector( p, inst ), 612 A0_DEST_CHANNEL_Y, 0, 613 swizzle(tmp1, W,Z,Y,X), 614 i915_emit_const4fv( p, sin_constants ), 0); 615 } 616 617 if (inst->DstReg.WriteMask[0]) { 618 i915_emit_arith( p, 619 A0_MUL, 620 tmp, A0_DEST_CHANNEL_XYZ, 0, 621 swizzle(tmp, X,X,Z,ONE), 622 swizzle(tmp, Z,ONE,ONE,ONE), 0); 623 624 i915_emit_arith( p, 625 A0_DP4, 626 get_result_vector( p, inst ), 627 A0_DEST_CHANNEL_X, 0, 628 swizzle(tmp, ONE,Z,Y,X), 629 i915_emit_const4fv( p, cos_constants ), 0); 630 } 631 break; 632 633 case FP_OPCODE_SGE: 634 EMIT_2ARG_ARITH( A0_SGE ); 635 break; 636 637 case FP_OPCODE_SIN: 638 src0 = src_vector( p, &inst->SrcReg[0], program); 639 tmp = i915_get_utemp( p ); 640 641 i915_emit_arith( p, 642 A0_MUL, 643 tmp, A0_DEST_CHANNEL_X, 0, 644 src0, 645 i915_emit_const1f(p, 1.0/(PI * 2)), 646 0); 647 648 i915_emit_arith( p, 649 A0_MOD, 650 tmp, A0_DEST_CHANNEL_X, 0, 651 tmp, 652 0, 0 ); 653 654 /* By choosing different taylor constants, could get rid of this mul: 655 */ 656 i915_emit_arith( p, 657 A0_MUL, 658 tmp, A0_DEST_CHANNEL_X, 0, 659 tmp, 660 i915_emit_const1f(p, (PI * 2)), 661 0); 662 663 /* 664 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 665 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 666 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 667 * result = DP4 t1.wzyx, sin_constants 668 */ 669 i915_emit_arith( p, 670 A0_MUL, 671 tmp, A0_DEST_CHANNEL_XY, 0, 672 swizzle(tmp, X,X,ONE,ONE), 673 swizzle(tmp, X,ONE,ONE,ONE), 0); 674 675 i915_emit_arith( p, 676 A0_MUL, 677 tmp, A0_DEST_CHANNEL_ALL, 0, 678 swizzle(tmp, X,Y,X,Y), 679 swizzle(tmp, X,X,ONE,ONE), 0); 680 681 i915_emit_arith( p, 682 A0_MUL, 683 tmp, A0_DEST_CHANNEL_ALL, 0, 684 swizzle(tmp, X,Y,Y,W), 685 swizzle(tmp, X,Z,ONE,ONE), 0); 686 687 i915_emit_arith( p, 688 A0_DP4, 689 get_result_vector( p, inst ), 690 get_result_flags( inst ), 0, 691 swizzle(tmp, W, Z, Y, X ), 692 i915_emit_const4fv( p, sin_constants ), 0); 693 break; 694 695 case FP_OPCODE_SLT: 696 EMIT_2ARG_ARITH( A0_SLT ); 697 break; 698 699 case FP_OPCODE_SUB: 700 src0 = src_vector( p, &inst->SrcReg[0], program); 701 src1 = src_vector( p, &inst->SrcReg[1], program); 702 703 i915_emit_arith( p, 704 A0_ADD, 705 get_result_vector( p, inst ), 706 get_result_flags( inst ), 0, 707 src0, negate(src1, 1,1,1,1), 0); 708 break; 709 710 case FP_OPCODE_SWZ: 711 EMIT_1ARG_ARITH( A0_MOV ); /* extended swizzle handled natively */ 712 break; 713 714 case FP_OPCODE_TEX: 715 EMIT_TEX( T0_TEXLD ); 716 break; 717 718 case FP_OPCODE_TXB: 719 EMIT_TEX( T0_TEXLDB ); 720 break; 721 722 case FP_OPCODE_TXP: 723 EMIT_TEX( T0_TEXLDP ); 724 break; 725 726 case FP_OPCODE_XPD: 727 /* Cross product: 728 * result.x = src0.y * src1.z - src0.z * src1.y; 729 * result.y = src0.z * src1.x - src0.x * src1.z; 730 * result.z = src0.x * src1.y - src0.y * src1.x; 731 * result.w = undef; 732 */ 733 src0 = src_vector( p, &inst->SrcReg[0], program); 734 src1 = src_vector( p, &inst->SrcReg[1], program); 735 tmp = i915_get_utemp( p ); 736 737 i915_emit_arith( p, 738 A0_MUL, 739 tmp, A0_DEST_CHANNEL_ALL, 0, 740 swizzle(src0,Z,X,Y,ONE), 741 swizzle(src1,Y,Z,X,ONE), 0); 742 743 i915_emit_arith( p, 744 A0_MAD, 745 get_result_vector( p, inst ), 746 get_result_flags( inst ), 0, 747 swizzle(src0,Y,Z,X,ONE), 748 swizzle(src1,Z,X,Y,ONE), 749 negate(tmp,1,1,1,0)); 750 break; 751 752 case FP_OPCODE_END: 753 return; 754 755 default: 756 i915_program_error( p, "bad opcode" ); 757 return; 758 } 759 760 inst++; 761 i915_release_utemps( p ); 762 } 763} 764 765/* Rather than trying to intercept and jiggle depth writes during 766 * emit, just move the value into its correct position at the end of 767 * the program: 768 */ 769static void fixup_depth_write( struct i915_fragment_program *p ) 770{ 771 if (p->depth_written) { 772 GLuint depth = UREG(REG_TYPE_OD, 0); 773 774 i915_emit_arith( p, 775 A0_MOV, 776 depth, A0_DEST_CHANNEL_W, 0, 777 swizzle(depth,X,Y,Z,Z), 778 0, 0); 779 } 780} 781 782 783#define FRAG_BIT_TEX(n) (FRAG_BIT_TEX0 << (n)) 784 785 786static void check_wpos( struct i915_fragment_program *p ) 787{ 788 GLuint inputs = p->FragProg.InputsRead; 789 GLint i; 790 791 p->wpos_tex = 0; 792 793 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 794 if (inputs & FRAG_BIT_TEX(i)) 795 continue; 796 else if (inputs & FRAG_BIT_WPOS) { 797 p->wpos_tex = i; 798 inputs &= ~FRAG_BIT_WPOS; 799 } 800 } 801 802 if (inputs & FRAG_BIT_WPOS) { 803 i915_program_error(p, "No free texcoord for wpos value"); 804 } 805} 806 807 808static void translate_program( struct i915_fragment_program *p ) 809{ 810 i915ContextPtr i915 = I915_CONTEXT(p->ctx); 811 812 i915_init_program( i915, p ); 813 check_wpos( p ); 814 upload_program( p ); 815 fixup_depth_write( p ); 816 i915_fini_program( p ); 817 818 p->translated = 1; 819} 820 821 822static void track_params( struct i915_fragment_program *p ) 823{ 824 GLint i; 825 826 if (p->nr_params) 827 _mesa_load_state_parameters(p->ctx, p->FragProg.Parameters); 828 829 for (i = 0; i < p->nr_params; i++) { 830 GLint reg = p->param[i].reg; 831 COPY_4V( p->constant[reg], p->param[i].values ); 832 } 833 834 p->params_uptodate = 1; 835 p->on_hardware = 0; /* overkill */ 836} 837 838 839static void i915BindProgram( GLcontext *ctx, 840 GLenum target, 841 struct program *prog ) 842{ 843 if (target == GL_FRAGMENT_PROGRAM_ARB) { 844 i915ContextPtr i915 = I915_CONTEXT(ctx); 845 struct i915_fragment_program *p = (struct i915_fragment_program *)prog; 846 847 if (i915->current_program == p) 848 return; 849 850 if (i915->current_program) { 851 i915->current_program->on_hardware = 0; 852 i915->current_program->params_uptodate = 0; 853 } 854 855 i915->current_program = p; 856 857 assert(p->on_hardware == 0); 858 assert(p->params_uptodate == 0); 859 860 /* Hack: make sure fog is correctly enabled according to this 861 * fragment program's fog options. 862 */ 863 ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB, 864 ctx->FragmentProgram.Enabled ); 865 } 866} 867 868static struct program *i915NewProgram( GLcontext *ctx, 869 GLenum target, 870 GLuint id ) 871{ 872 switch (target) { 873 case GL_VERTEX_PROGRAM_ARB: 874 return _mesa_init_vertex_program( ctx, CALLOC_STRUCT(vertex_program), 875 target, id ); 876 877 case GL_FRAGMENT_PROGRAM_ARB: { 878 struct i915_fragment_program *prog = CALLOC_STRUCT(i915_fragment_program); 879 if (prog) { 880 i915_init_program( I915_CONTEXT(ctx), prog ); 881 882 return _mesa_init_fragment_program( ctx, &prog->FragProg, 883 target, id ); 884 } 885 else 886 return NULL; 887 } 888 889 case GL_FRAGMENT_PROGRAM_NV: 890 default: 891 _mesa_problem(ctx, "bad target in _mesa_new_program"); 892 return NULL; 893 } 894} 895 896static void i915DeleteProgram( GLcontext *ctx, 897 struct program *prog ) 898{ 899 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 900 i915ContextPtr i915 = I915_CONTEXT(ctx); 901 struct i915_fragment_program *p = (struct i915_fragment_program *)prog; 902 903 if (i915->current_program == p) 904 i915->current_program = 0; 905 } 906 907 _mesa_delete_program( ctx, prog ); 908} 909 910 911static GLboolean i915IsProgramNative( GLcontext *ctx, 912 GLenum target, 913 struct program *prog ) 914{ 915 if (target == GL_FRAGMENT_PROGRAM_ARB) { 916 struct i915_fragment_program *p = (struct i915_fragment_program *)prog; 917 918 if (!p->translated) 919 translate_program( p ); 920 921 return !p->error; 922 } 923 else 924 return GL_TRUE; 925} 926 927static void i915ProgramStringNotify( GLcontext *ctx, 928 GLenum target, 929 struct program *prog ) 930{ 931 if (target == GL_FRAGMENT_PROGRAM_ARB) { 932 struct i915_fragment_program *p = (struct i915_fragment_program *)prog; 933 p->translated = 0; 934 935 /* Hack: make sure fog is correctly enabled according to this 936 * fragment program's fog options. 937 */ 938 ctx->Driver.Enable( ctx, GL_FRAGMENT_PROGRAM_ARB, 939 ctx->FragmentProgram.Enabled ); 940 } 941} 942 943 944void i915ValidateFragmentProgram( i915ContextPtr i915 ) 945{ 946 GLcontext *ctx = &i915->intel.ctx; 947 intelContextPtr intel = INTEL_CONTEXT(ctx); 948 TNLcontext *tnl = TNL_CONTEXT(ctx); 949 struct vertex_buffer *VB = &tnl->vb; 950 951 struct i915_fragment_program *p = 952 (struct i915_fragment_program *)ctx->FragmentProgram.Current; 953 954 GLuint inputsRead = p->FragProg.InputsRead; 955 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK; 956 GLuint s2 = S2_TEXCOORD_NONE; 957 int i, offset = 0; 958 959 /* Important: 960 */ 961 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; 962 963 if (!p->translated) 964 translate_program( p ); 965 966 intel->vertex_attr_count = 0; 967 intel->wpos_offset = 0; 968 intel->wpos_size = 0; 969 intel->coloroffset = 0; 970 intel->specoffset = 0; 971 972 if (inputsRead & FRAG_BITS_TEX_ANY) { 973 EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16 ); 974 } 975 else { 976 EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12 ); 977 } 978 979 if (inputsRead & FRAG_BIT_COL0) { 980 intel->coloroffset = offset / 4; 981 EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, S4_VFMT_COLOR, 4 ); 982 } 983 984 if (inputsRead & FRAG_BIT_COL1) { 985 intel->specoffset = offset / 4; 986 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, S4_VFMT_SPEC_FOG, 3 ); 987 EMIT_PAD( 1 ); 988 } 989 990 if (inputsRead & FRAG_BIT_FOGC) { 991 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4 ); 992 } 993 994 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 995 if (inputsRead & FRAG_BIT_TEX(i)) { 996 int sz = VB->TexCoordPtr[i]->size; 997 998 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); 999 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); 1000 1001 EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_SZ(sz), 0, sz * 4 ); 1002 } 1003 else if (i == p->wpos_tex) { 1004 1005 /* If WPOS is required, duplicate the XYZ position data in an 1006 * unused texture coordinate: 1007 */ 1008 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); 1009 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3)); 1010 1011 intel->wpos_offset = offset; 1012 intel->wpos_size = 3 * sizeof(GLuint); 1013 1014 EMIT_PAD( intel->wpos_size ); 1015 } 1016 } 1017 1018 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] || 1019 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) { 1020 1021 I915_STATECHANGE( i915, I915_UPLOAD_CTX ); 1022 1023 /* Must do this *after* statechange, so as not to affect 1024 * buffered vertices reliant on the old state: 1025 */ 1026 intel->vertex_size = _tnl_install_attrs( &intel->ctx, 1027 intel->vertex_attrs, 1028 intel->vertex_attr_count, 1029 intel->ViewportMatrix.m, 0 ); 1030 1031 intel->vertex_size >>= 2; 1032 1033 i915->state.Ctx[I915_CTXREG_LIS2] = s2; 1034 i915->state.Ctx[I915_CTXREG_LIS4] = s4; 1035 1036 assert(intel->vtbl.check_vertex_size( intel, intel->vertex_size )); 1037 } 1038 1039 if (!p->params_uptodate) 1040 track_params( p ); 1041 1042 if (!p->on_hardware) 1043 i915_upload_program( i915, p ); 1044} 1045 1046void i915InitFragProgFuncs( struct dd_function_table *functions ) 1047{ 1048 functions->BindProgram = i915BindProgram; 1049 functions->NewProgram = i915NewProgram; 1050 functions->DeleteProgram = i915DeleteProgram; 1051 functions->IsProgramNative = i915IsProgramNative; 1052 functions->ProgramStringNotify = i915ProgramStringNotify; 1053} 1054