1/************************************************************************** 2 * 3 * Copyright 2003 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "main/glheader.h" 29#include "main/macros.h" 30#include "main/enums.h" 31 32#include "program/prog_instruction.h" 33#include "program/prog_parameter.h" 34#include "program/program.h" 35#include "program/programopt.h" 36#include "program/prog_print.h" 37 38#include "tnl/tnl.h" 39#include "tnl/t_context.h" 40 41#include "intel_batchbuffer.h" 42 43#include "i915_reg.h" 44#include "i915_context.h" 45#include "i915_program.h" 46 47static const GLfloat sin_quad_constants[2][4] = { 48 { 49 2.0, 50 -1.0, 51 .5, 52 .75 53 }, 54 { 55 4.0, 56 -4.0, 57 1.0 / (2.0 * M_PI), 58 .2225 59 } 60}; 61 62static const GLfloat sin_constants[4] = { 1.0, 63 -1.0 / (3 * 2 * 1), 64 1.0 / (5 * 4 * 3 * 2 * 1), 65 -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1) 66}; 67 68/* 1, -1/2!, 1/4!, -1/6! */ 69static const GLfloat cos_constants[4] = { 1.0, 70 -1.0 / (2 * 1), 71 1.0 / (4 * 3 * 2 * 1), 72 -1.0 / (6 * 5 * 4 * 3 * 2 * 1) 73}; 74 75/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */ 76#define TEXCOORD_TEX (0<<7) 77#define TEXCOORD_VAR (1<<7) 78 79static unsigned 80get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord) 81{ 82 for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 83 if (p->texcoord_mapping[i] == texcoord) 84 return i; 85 } 86 87 /* blah */ 88 return p->ctx->Const.MaxTextureCoordUnits - 1; 89} 90 91/** 92 * Retrieve a ureg for the given source register. Will emit 93 * constants, apply swizzling and negation as needed. 94 */ 95static GLuint 96src_vector(struct i915_fragment_program *p, 97 const struct prog_src_register *source, 98 const struct gl_program *program) 99{ 100 GLuint src; 101 unsigned unit; 102 103 switch (source->File) { 104 105 /* Registers: 106 */ 107 case PROGRAM_TEMPORARY: 108 if (source->Index >= I915_MAX_TEMPORARY) { 109 i915_program_error(p, "Exceeded max temporary reg: %d/%d", 110 source->Index, I915_MAX_TEMPORARY); 111 return 0; 112 } 113 src = UREG(REG_TYPE_R, source->Index); 114 break; 115 case PROGRAM_INPUT: 116 switch (source->Index) { 117 case VARYING_SLOT_POS: 118 src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); 119 break; 120 case VARYING_SLOT_COL0: 121 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 122 break; 123 case VARYING_SLOT_COL1: 124 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 125 src = swizzle(src, X, Y, Z, ONE); 126 break; 127 case VARYING_SLOT_FOGC: 128 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 129 src = swizzle(src, W, ZERO, ZERO, ONE); 130 break; 131 case VARYING_SLOT_TEX0: 132 case VARYING_SLOT_TEX1: 133 case VARYING_SLOT_TEX2: 134 case VARYING_SLOT_TEX3: 135 case VARYING_SLOT_TEX4: 136 case VARYING_SLOT_TEX5: 137 case VARYING_SLOT_TEX6: 138 case VARYING_SLOT_TEX7: 139 unit = get_texcoord_mapping(p, (source->Index - 140 VARYING_SLOT_TEX0) | TEXCOORD_TEX); 141 src = i915_emit_decl(p, REG_TYPE_T, 142 T_TEX0 + unit, 143 D0_CHANNEL_ALL); 144 break; 145 146 case VARYING_SLOT_VAR0: 147 case VARYING_SLOT_VAR0 + 1: 148 case VARYING_SLOT_VAR0 + 2: 149 case VARYING_SLOT_VAR0 + 3: 150 case VARYING_SLOT_VAR0 + 4: 151 case VARYING_SLOT_VAR0 + 5: 152 case VARYING_SLOT_VAR0 + 6: 153 case VARYING_SLOT_VAR0 + 7: 154 unit = get_texcoord_mapping(p, (source->Index - 155 VARYING_SLOT_VAR0) | TEXCOORD_VAR); 156 src = i915_emit_decl(p, REG_TYPE_T, 157 T_TEX0 + unit, 158 D0_CHANNEL_ALL); 159 break; 160 161 default: 162 i915_program_error(p, "Bad source->Index: %d", source->Index); 163 return 0; 164 } 165 break; 166 167 case PROGRAM_OUTPUT: 168 switch (source->Index) { 169 case FRAG_RESULT_COLOR: 170 case FRAG_RESULT_DATA0: 171 src = UREG(REG_TYPE_OC, 0); 172 break; 173 case FRAG_RESULT_DEPTH: 174 src = UREG(REG_TYPE_OD, 0); 175 break; 176 default: 177 i915_program_error(p, "Bad source->Index: %d", source->Index); 178 return 0; 179 } 180 break; 181 182 /* Various paramters and env values. All emitted to 183 * hardware as program constants. 184 */ 185 case PROGRAM_CONSTANT: 186 case PROGRAM_STATE_VAR: 187 case PROGRAM_UNIFORM: 188 src = i915_emit_param4fv(p, 189 &program->Parameters->ParameterValues[source->Index][0].f); 190 break; 191 192 default: 193 i915_program_error(p, "Bad source->File: %d", source->File); 194 return 0; 195 } 196 197 src = swizzle(src, 198 GET_SWZ(source->Swizzle, 0), 199 GET_SWZ(source->Swizzle, 1), 200 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); 201 202 if (source->Negate) 203 src = negate(src, 204 GET_BIT(source->Negate, 0), 205 GET_BIT(source->Negate, 1), 206 GET_BIT(source->Negate, 2), 207 GET_BIT(source->Negate, 3)); 208 209 return src; 210} 211 212 213static GLuint 214get_result_vector(struct i915_fragment_program *p, 215 const struct prog_instruction *inst) 216{ 217 switch (inst->DstReg.File) { 218 case PROGRAM_OUTPUT: 219 switch (inst->DstReg.Index) { 220 case FRAG_RESULT_COLOR: 221 case FRAG_RESULT_DATA0: 222 return UREG(REG_TYPE_OC, 0); 223 case FRAG_RESULT_DEPTH: 224 p->depth_written = 1; 225 return UREG(REG_TYPE_OD, 0); 226 default: 227 i915_program_error(p, "Bad inst->DstReg.Index: %d", 228 inst->DstReg.Index); 229 return 0; 230 } 231 case PROGRAM_TEMPORARY: 232 return UREG(REG_TYPE_R, inst->DstReg.Index); 233 default: 234 i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File); 235 return 0; 236 } 237} 238 239static GLuint 240get_result_flags(const struct prog_instruction *inst) 241{ 242 GLuint flags = 0; 243 244 if (inst->Saturate) 245 flags |= A0_DEST_SATURATE; 246 if (inst->DstReg.WriteMask & WRITEMASK_X) 247 flags |= A0_DEST_CHANNEL_X; 248 if (inst->DstReg.WriteMask & WRITEMASK_Y) 249 flags |= A0_DEST_CHANNEL_Y; 250 if (inst->DstReg.WriteMask & WRITEMASK_Z) 251 flags |= A0_DEST_CHANNEL_Z; 252 if (inst->DstReg.WriteMask & WRITEMASK_W) 253 flags |= A0_DEST_CHANNEL_W; 254 255 return flags; 256} 257 258static GLuint 259translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit) 260{ 261 switch (bit) { 262 case TEXTURE_1D_INDEX: 263 return D0_SAMPLE_TYPE_2D; 264 case TEXTURE_2D_INDEX: 265 return D0_SAMPLE_TYPE_2D; 266 case TEXTURE_RECT_INDEX: 267 return D0_SAMPLE_TYPE_2D; 268 case TEXTURE_3D_INDEX: 269 return D0_SAMPLE_TYPE_VOLUME; 270 case TEXTURE_CUBE_INDEX: 271 return D0_SAMPLE_TYPE_CUBE; 272 default: 273 i915_program_error(p, "TexSrcBit: %d", bit); 274 return 0; 275 } 276} 277 278#define EMIT_TEX( OP ) \ 279do { \ 280 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \ 281 const struct gl_program *program = &p->FragProg; \ 282 GLuint unit = program->SamplerUnits[inst->TexSrcUnit]; \ 283 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \ 284 unit, dim); \ 285 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \ 286 /* Texel lookup */ \ 287 \ 288 i915_emit_texld( p, get_live_regs(p, inst), \ 289 get_result_vector( p, inst ), \ 290 get_result_flags( inst ), \ 291 sampler, \ 292 coord, \ 293 OP); \ 294} while (0) 295 296#define EMIT_ARITH( OP, N ) \ 297do { \ 298 i915_emit_arith( p, \ 299 OP, \ 300 get_result_vector( p, inst ), \ 301 get_result_flags( inst ), 0, \ 302 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \ 303 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \ 304 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \ 305} while (0) 306 307#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 ) 308#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 ) 309#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 ) 310 311/* 312 * TODO: consider moving this into core 313 */ 314static bool calc_live_regs( struct i915_fragment_program *p ) 315{ 316 const struct gl_program *program = &p->FragProg; 317 GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1); 318 uint8_t live_components[I915_MAX_TEMPORARY] = { 0, }; 319 GLint i; 320 321 for (i = program->arb.NumInstructions - 1; i >= 0; i--) { 322 struct prog_instruction *inst = &program->arb.Instructions[i]; 323 int opArgs = _mesa_num_inst_src_regs(inst->Opcode); 324 int a; 325 326 /* Register is written to: unmark as live for this and preceeding ops */ 327 if (inst->DstReg.File == PROGRAM_TEMPORARY) { 328 if (inst->DstReg.Index >= I915_MAX_TEMPORARY) 329 return false; 330 331 live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; 332 if (live_components[inst->DstReg.Index] == 0) 333 regsUsed &= ~(1 << inst->DstReg.Index); 334 } 335 336 for (a = 0; a < opArgs; a++) { 337 /* Register is read from: mark as live for this and preceeding ops */ 338 if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { 339 unsigned c; 340 341 if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY) 342 return false; 343 344 regsUsed |= 1 << inst->SrcReg[a].Index; 345 346 for (c = 0; c < 4; c++) { 347 const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c); 348 349 if (field <= SWIZZLE_W) 350 live_components[inst->SrcReg[a].Index] |= (1U << field); 351 } 352 } 353 } 354 355 p->usedRegs[i] = regsUsed; 356 } 357 358 return true; 359} 360 361static GLuint get_live_regs( struct i915_fragment_program *p, 362 const struct prog_instruction *inst ) 363{ 364 const struct gl_program *program = &p->FragProg; 365 GLuint nr = inst - program->arb.Instructions; 366 367 return p->usedRegs[nr]; 368} 369 370 371/* Possible concerns: 372 * 373 * SIN, COS -- could use another taylor step? 374 * LIT -- results seem a little different to sw mesa 375 * LOG -- different to mesa on negative numbers, but this is conformant. 376 * 377 * Parse failures -- Mesa doesn't currently give a good indication 378 * internally whether a particular program string parsed or not. This 379 * can lead to confusion -- hopefully we cope with it ok now. 380 * 381 */ 382static void 383upload_program(struct i915_fragment_program *p) 384{ 385 const struct gl_program *program = &p->FragProg; 386 const struct prog_instruction *inst = program->arb.Instructions; 387 388 if (INTEL_DEBUG & DEBUG_WM) 389 _mesa_print_program(program); 390 391 /* Is this a parse-failed program? Ensure a valid program is 392 * loaded, as the flagging of an error isn't sufficient to stop 393 * this being uploaded to hardware. 394 */ 395 if (inst[0].Opcode == OPCODE_END) { 396 GLuint tmp = i915_get_utemp(p); 397 i915_emit_arith(p, 398 A0_MOV, 399 UREG(REG_TYPE_OC, 0), 400 A0_DEST_CHANNEL_ALL, 0, 401 swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0); 402 return; 403 } 404 405 if (program->arb.NumInstructions > I915_MAX_INSN) { 406 i915_program_error(p, "Exceeded max instructions (%d out of %d)", 407 program->arb.NumInstructions, I915_MAX_INSN); 408 return; 409 } 410 411 /* Not always needed: 412 */ 413 if (!calc_live_regs(p)) { 414 i915_program_error(p, "Could not allocate registers"); 415 return; 416 } 417 418 while (1) { 419 GLuint src0, src1, src2, flags; 420 GLuint tmp = 0, dst, consts0 = 0, consts1 = 0; 421 422 switch (inst->Opcode) { 423 case OPCODE_ABS: 424 src0 = src_vector(p, &inst->SrcReg[0], program); 425 i915_emit_arith(p, 426 A0_MAX, 427 get_result_vector(p, inst), 428 get_result_flags(inst), 0, 429 src0, negate(src0, 1, 1, 1, 1), 0); 430 break; 431 432 case OPCODE_ADD: 433 EMIT_2ARG_ARITH(A0_ADD); 434 break; 435 436 case OPCODE_CMP: 437 src0 = src_vector(p, &inst->SrcReg[0], program); 438 src1 = src_vector(p, &inst->SrcReg[1], program); 439 src2 = src_vector(p, &inst->SrcReg[2], program); 440 i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ 441 break; 442 443 case OPCODE_COS: 444 src0 = src_vector(p, &inst->SrcReg[0], program); 445 tmp = i915_get_utemp(p); 446 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); 447 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); 448 449 /* Reduce range from repeating about [-pi,pi] to [-1,1] */ 450 i915_emit_arith(p, 451 A0_MAD, 452 tmp, A0_DEST_CHANNEL_X, 0, 453 src0, 454 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ 455 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */ 456 457 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 458 459 i915_emit_arith(p, 460 A0_MAD, 461 tmp, A0_DEST_CHANNEL_X, 0, 462 tmp, 463 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ 464 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ 465 466 /* Compute COS with the same calculation used for SIN, but a 467 * different source range has been mapped to [-1,1] this time. 468 */ 469 470 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ 471 i915_emit_arith(p, 472 A0_MAX, 473 tmp, A0_DEST_CHANNEL_Y, 0, 474 swizzle(tmp, ZERO, X, ZERO, ZERO), 475 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 476 0); 477 478 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ 479 i915_emit_arith(p, 480 A0_MUL, 481 tmp, A0_DEST_CHANNEL_Y, 0, 482 swizzle(tmp, ZERO, X, ZERO, ZERO), 483 tmp, 484 0); 485 486 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ 487 i915_emit_arith(p, 488 A0_DP3, 489 tmp, A0_DEST_CHANNEL_X, 0, 490 tmp, 491 swizzle(consts1, X, Y, ZERO, ZERO), 492 0); 493 494 /* tmp.x now contains a first approximation (y). Now, weight it 495 * against tmp.y**2 to get closer. 496 */ 497 i915_emit_arith(p, 498 A0_MAX, 499 tmp, A0_DEST_CHANNEL_Y, 0, 500 swizzle(tmp, ZERO, X, ZERO, ZERO), 501 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 502 0); 503 504 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ 505 i915_emit_arith(p, 506 A0_MAD, 507 tmp, A0_DEST_CHANNEL_Y, 0, 508 swizzle(tmp, ZERO, X, ZERO, ZERO), 509 swizzle(tmp, ZERO, Y, ZERO, ZERO), 510 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); 511 512 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ 513 i915_emit_arith(p, 514 A0_MAD, 515 get_result_vector(p, inst), 516 get_result_flags(inst), 0, 517 swizzle(consts1, W, W, W, W), 518 swizzle(tmp, Y, Y, Y, Y), 519 swizzle(tmp, X, X, X, X)); 520 break; 521 522 case OPCODE_DP2: 523 src0 = src_vector(p, &inst->SrcReg[0], program); 524 src1 = src_vector(p, &inst->SrcReg[1], program); 525 i915_emit_arith(p, 526 A0_DP3, 527 get_result_vector(p, inst), 528 get_result_flags(inst), 0, 529 swizzle(src0, X, Y, ZERO, ZERO), 530 swizzle(src1, X, Y, ZERO, ZERO), 531 0); 532 break; 533 534 case OPCODE_DP3: 535 EMIT_2ARG_ARITH(A0_DP3); 536 break; 537 538 case OPCODE_DP4: 539 EMIT_2ARG_ARITH(A0_DP4); 540 break; 541 542 case OPCODE_DPH: 543 src0 = src_vector(p, &inst->SrcReg[0], program); 544 src1 = src_vector(p, &inst->SrcReg[1], program); 545 546 i915_emit_arith(p, 547 A0_DP4, 548 get_result_vector(p, inst), 549 get_result_flags(inst), 0, 550 swizzle(src0, X, Y, Z, ONE), src1, 0); 551 break; 552 553 case OPCODE_DST: 554 src0 = src_vector(p, &inst->SrcReg[0], program); 555 src1 = src_vector(p, &inst->SrcReg[1], program); 556 557 /* result[0] = 1 * 1; 558 * result[1] = a[1] * b[1]; 559 * result[2] = a[2] * 1; 560 * result[3] = 1 * b[3]; 561 */ 562 i915_emit_arith(p, 563 A0_MUL, 564 get_result_vector(p, inst), 565 get_result_flags(inst), 0, 566 swizzle(src0, ONE, Y, Z, ONE), 567 swizzle(src1, ONE, Y, ONE, W), 0); 568 break; 569 570 case OPCODE_EX2: 571 src0 = src_vector(p, &inst->SrcReg[0], program); 572 573 i915_emit_arith(p, 574 A0_EXP, 575 get_result_vector(p, inst), 576 get_result_flags(inst), 0, 577 swizzle(src0, X, X, X, X), 0, 0); 578 break; 579 580 case OPCODE_FLR: 581 EMIT_1ARG_ARITH(A0_FLR); 582 break; 583 584 case OPCODE_TRUNC: 585 EMIT_1ARG_ARITH(A0_TRC); 586 break; 587 588 case OPCODE_FRC: 589 EMIT_1ARG_ARITH(A0_FRC); 590 break; 591 592 case OPCODE_KIL: 593 src0 = src_vector(p, &inst->SrcReg[0], program); 594 tmp = i915_get_utemp(p); 595 596 i915_emit_texld(p, get_live_regs(p, inst), 597 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ 598 0, src0, T0_TEXKILL); 599 break; 600 601 case OPCODE_LG2: 602 src0 = src_vector(p, &inst->SrcReg[0], program); 603 604 i915_emit_arith(p, 605 A0_LOG, 606 get_result_vector(p, inst), 607 get_result_flags(inst), 0, 608 swizzle(src0, X, X, X, X), 0, 0); 609 break; 610 611 case OPCODE_LIT: 612 src0 = src_vector(p, &inst->SrcReg[0], program); 613 tmp = i915_get_utemp(p); 614 615 /* tmp = max( a.xyzw, a.00zw ) 616 * XXX: Clamp tmp.w to -128..128 617 * tmp.y = log(tmp.y) 618 * tmp.y = tmp.w * tmp.y 619 * tmp.y = exp(tmp.y) 620 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 621 */ 622 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 623 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 624 625 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 626 swizzle(tmp, Y, Y, Y, Y), 0, 0); 627 628 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 629 swizzle(tmp, ZERO, Y, ZERO, ZERO), 630 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 631 632 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 633 swizzle(tmp, Y, Y, Y, Y), 0, 0); 634 635 i915_emit_arith(p, A0_CMP, 636 get_result_vector(p, inst), 637 get_result_flags(inst), 0, 638 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 639 swizzle(tmp, ONE, X, ZERO, ONE), 640 swizzle(tmp, ONE, X, Y, ONE)); 641 642 break; 643 644 case OPCODE_LRP: 645 src0 = src_vector(p, &inst->SrcReg[0], program); 646 src1 = src_vector(p, &inst->SrcReg[1], program); 647 src2 = src_vector(p, &inst->SrcReg[2], program); 648 flags = get_result_flags(inst); 649 tmp = i915_get_utemp(p); 650 651 /* b*a + c*(1-a) 652 * 653 * b*a + c - ca 654 * 655 * tmp = b*a + c, 656 * result = (-c)*a + tmp 657 */ 658 i915_emit_arith(p, A0_MAD, tmp, 659 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 660 661 i915_emit_arith(p, A0_MAD, 662 get_result_vector(p, inst), 663 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 664 break; 665 666 case OPCODE_MAD: 667 EMIT_3ARG_ARITH(A0_MAD); 668 break; 669 670 case OPCODE_MAX: 671 EMIT_2ARG_ARITH(A0_MAX); 672 break; 673 674 case OPCODE_MIN: 675 EMIT_2ARG_ARITH(A0_MIN); 676 break; 677 678 case OPCODE_MOV: 679 EMIT_1ARG_ARITH(A0_MOV); 680 break; 681 682 case OPCODE_MUL: 683 EMIT_2ARG_ARITH(A0_MUL); 684 break; 685 686 case OPCODE_POW: 687 src0 = src_vector(p, &inst->SrcReg[0], program); 688 src1 = src_vector(p, &inst->SrcReg[1], program); 689 tmp = i915_get_utemp(p); 690 flags = get_result_flags(inst); 691 692 /* XXX: masking on intermediate values, here and elsewhere. 693 */ 694 i915_emit_arith(p, 695 A0_LOG, 696 tmp, A0_DEST_CHANNEL_X, 0, 697 swizzle(src0, X, X, X, X), 0, 0); 698 699 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 700 701 702 i915_emit_arith(p, 703 A0_EXP, 704 get_result_vector(p, inst), 705 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 706 707 break; 708 709 case OPCODE_RCP: 710 src0 = src_vector(p, &inst->SrcReg[0], program); 711 712 i915_emit_arith(p, 713 A0_RCP, 714 get_result_vector(p, inst), 715 get_result_flags(inst), 0, 716 swizzle(src0, X, X, X, X), 0, 0); 717 break; 718 719 case OPCODE_RSQ: 720 721 src0 = src_vector(p, &inst->SrcReg[0], program); 722 723 i915_emit_arith(p, 724 A0_RSQ, 725 get_result_vector(p, inst), 726 get_result_flags(inst), 0, 727 swizzle(src0, X, X, X, X), 0, 0); 728 break; 729 730 case OPCODE_SCS: 731 src0 = src_vector(p, &inst->SrcReg[0], program); 732 tmp = i915_get_utemp(p); 733 734 /* 735 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 736 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 737 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 738 * scs.x = DP4 t1, sin_constants 739 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 740 * scs.y = DP4 t1, cos_constants 741 */ 742 i915_emit_arith(p, 743 A0_MUL, 744 tmp, A0_DEST_CHANNEL_XY, 0, 745 swizzle(src0, X, X, ONE, ONE), 746 swizzle(src0, X, ONE, ONE, ONE), 0); 747 748 i915_emit_arith(p, 749 A0_MUL, 750 tmp, A0_DEST_CHANNEL_ALL, 0, 751 swizzle(tmp, X, Y, X, Y), 752 swizzle(tmp, X, X, ONE, ONE), 0); 753 754 if (inst->DstReg.WriteMask & WRITEMASK_Y) { 755 GLuint tmp1; 756 757 if (inst->DstReg.WriteMask & WRITEMASK_X) 758 tmp1 = i915_get_utemp(p); 759 else 760 tmp1 = tmp; 761 762 i915_emit_arith(p, 763 A0_MUL, 764 tmp1, A0_DEST_CHANNEL_ALL, 0, 765 swizzle(tmp, X, Y, Y, W), 766 swizzle(tmp, X, Z, ONE, ONE), 0); 767 768 i915_emit_arith(p, 769 A0_DP4, 770 get_result_vector(p, inst), 771 A0_DEST_CHANNEL_Y, 0, 772 swizzle(tmp1, W, Z, Y, X), 773 i915_emit_const4fv(p, sin_constants), 0); 774 } 775 776 if (inst->DstReg.WriteMask & WRITEMASK_X) { 777 i915_emit_arith(p, 778 A0_MUL, 779 tmp, A0_DEST_CHANNEL_XYZ, 0, 780 swizzle(tmp, X, X, Z, ONE), 781 swizzle(tmp, Z, ONE, ONE, ONE), 0); 782 783 i915_emit_arith(p, 784 A0_DP4, 785 get_result_vector(p, inst), 786 A0_DEST_CHANNEL_X, 0, 787 swizzle(tmp, ONE, Z, Y, X), 788 i915_emit_const4fv(p, cos_constants), 0); 789 } 790 break; 791 792 case OPCODE_SIN: 793 src0 = src_vector(p, &inst->SrcReg[0], program); 794 tmp = i915_get_utemp(p); 795 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); 796 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); 797 798 /* Reduce range from repeating about [-pi,pi] to [-1,1] */ 799 i915_emit_arith(p, 800 A0_MAD, 801 tmp, A0_DEST_CHANNEL_X, 0, 802 src0, 803 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ 804 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */ 805 806 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 807 808 i915_emit_arith(p, 809 A0_MAD, 810 tmp, A0_DEST_CHANNEL_X, 0, 811 tmp, 812 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ 813 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ 814 815 /* Compute sin using a quadratic and quartic. It gives continuity 816 * that repeating the Taylor series lacks every 2*pi, and has 817 * reduced error. 818 * 819 * The idea was described at: 820 * http://www.devmaster.net/forums/showthread.php?t=5784 821 */ 822 823 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ 824 i915_emit_arith(p, 825 A0_MAX, 826 tmp, A0_DEST_CHANNEL_Y, 0, 827 swizzle(tmp, ZERO, X, ZERO, ZERO), 828 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 829 0); 830 831 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ 832 i915_emit_arith(p, 833 A0_MUL, 834 tmp, A0_DEST_CHANNEL_Y, 0, 835 swizzle(tmp, ZERO, X, ZERO, ZERO), 836 tmp, 837 0); 838 839 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ 840 i915_emit_arith(p, 841 A0_DP3, 842 tmp, A0_DEST_CHANNEL_X, 0, 843 tmp, 844 swizzle(consts1, X, Y, ZERO, ZERO), 845 0); 846 847 /* tmp.x now contains a first approximation (y). Now, weight it 848 * against tmp.y**2 to get closer. 849 */ 850 i915_emit_arith(p, 851 A0_MAX, 852 tmp, A0_DEST_CHANNEL_Y, 0, 853 swizzle(tmp, ZERO, X, ZERO, ZERO), 854 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 855 0); 856 857 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ 858 i915_emit_arith(p, 859 A0_MAD, 860 tmp, A0_DEST_CHANNEL_Y, 0, 861 swizzle(tmp, ZERO, X, ZERO, ZERO), 862 swizzle(tmp, ZERO, Y, ZERO, ZERO), 863 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); 864 865 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ 866 i915_emit_arith(p, 867 A0_MAD, 868 get_result_vector(p, inst), 869 get_result_flags(inst), 0, 870 swizzle(consts1, W, W, W, W), 871 swizzle(tmp, Y, Y, Y, Y), 872 swizzle(tmp, X, X, X, X)); 873 874 break; 875 876 case OPCODE_SGE: 877 EMIT_2ARG_ARITH(A0_SGE); 878 break; 879 880 case OPCODE_SLT: 881 EMIT_2ARG_ARITH(A0_SLT); 882 break; 883 884 case OPCODE_SSG: 885 dst = get_result_vector(p, inst); 886 flags = get_result_flags(inst); 887 src0 = src_vector(p, &inst->SrcReg[0], program); 888 tmp = i915_get_utemp(p); 889 890 /* tmp = (src < 0.0) */ 891 i915_emit_arith(p, 892 A0_SLT, 893 tmp, 894 flags, 0, 895 src0, 896 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 897 0); 898 899 /* dst = (0.0 < src) */ 900 i915_emit_arith(p, 901 A0_SLT, 902 dst, 903 flags, 0, 904 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 905 src0, 906 0); 907 908 /* dst = (src > 0.0) - (src < 0.0) */ 909 i915_emit_arith(p, 910 A0_ADD, 911 dst, 912 flags, 0, 913 dst, 914 negate(tmp, 1, 1, 1, 1), 915 0); 916 917 break; 918 919 case OPCODE_SUB: 920 src0 = src_vector(p, &inst->SrcReg[0], program); 921 src1 = src_vector(p, &inst->SrcReg[1], program); 922 923 i915_emit_arith(p, 924 A0_ADD, 925 get_result_vector(p, inst), 926 get_result_flags(inst), 0, 927 src0, negate(src1, 1, 1, 1, 1), 0); 928 break; 929 930 case OPCODE_SWZ: 931 EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */ 932 break; 933 934 case OPCODE_TEX: 935 EMIT_TEX(T0_TEXLD); 936 break; 937 938 case OPCODE_TXB: 939 EMIT_TEX(T0_TEXLDB); 940 break; 941 942 case OPCODE_TXP: 943 EMIT_TEX(T0_TEXLDP); 944 break; 945 946 case OPCODE_XPD: 947 /* Cross product: 948 * result.x = src0.y * src1.z - src0.z * src1.y; 949 * result.y = src0.z * src1.x - src0.x * src1.z; 950 * result.z = src0.x * src1.y - src0.y * src1.x; 951 * result.w = undef; 952 */ 953 src0 = src_vector(p, &inst->SrcReg[0], program); 954 src1 = src_vector(p, &inst->SrcReg[1], program); 955 tmp = i915_get_utemp(p); 956 957 i915_emit_arith(p, 958 A0_MUL, 959 tmp, A0_DEST_CHANNEL_ALL, 0, 960 swizzle(src0, Z, X, Y, ONE), 961 swizzle(src1, Y, Z, X, ONE), 0); 962 963 i915_emit_arith(p, 964 A0_MAD, 965 get_result_vector(p, inst), 966 get_result_flags(inst), 0, 967 swizzle(src0, Y, Z, X, ONE), 968 swizzle(src1, Z, X, Y, ONE), 969 negate(tmp, 1, 1, 1, 0)); 970 break; 971 972 case OPCODE_END: 973 return; 974 975 case OPCODE_BGNLOOP: 976 case OPCODE_BGNSUB: 977 case OPCODE_BRK: 978 case OPCODE_CAL: 979 case OPCODE_CONT: 980 case OPCODE_DDX: 981 case OPCODE_DDY: 982 case OPCODE_ELSE: 983 case OPCODE_ENDIF: 984 case OPCODE_ENDLOOP: 985 case OPCODE_ENDSUB: 986 case OPCODE_IF: 987 case OPCODE_RET: 988 p->error = 1; 989 i915_program_error(p, "Unsupported opcode: %s", 990 _mesa_opcode_string(inst->Opcode)); 991 return; 992 993 case OPCODE_EXP: 994 case OPCODE_LOG: 995 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in 996 * prog_instruction.h, but apparently GLSL doesn't ever emit them. 997 * Instead, it translates to EX2 or LG2. 998 */ 999 case OPCODE_TXD: 1000 case OPCODE_TXL: 1001 /* These opcodes are claimed by GLSL in prog_instruction.h, but 1002 * only NV_vp/fp appears to emit them. 1003 */ 1004 default: 1005 i915_program_error(p, "bad opcode: %s", 1006 _mesa_opcode_string(inst->Opcode)); 1007 return; 1008 } 1009 1010 inst++; 1011 i915_release_utemps(p); 1012 } 1013} 1014 1015/* Rather than trying to intercept and jiggle depth writes during 1016 * emit, just move the value into its correct position at the end of 1017 * the program: 1018 */ 1019static void 1020fixup_depth_write(struct i915_fragment_program *p) 1021{ 1022 if (p->depth_written) { 1023 GLuint depth = UREG(REG_TYPE_OD, 0); 1024 1025 i915_emit_arith(p, 1026 A0_MOV, 1027 depth, A0_DEST_CHANNEL_W, 0, 1028 swizzle(depth, X, Y, Z, Z), 0, 0); 1029 } 1030} 1031 1032static void 1033check_texcoord_mapping(struct i915_fragment_program *p) 1034{ 1035 GLbitfield64 inputs = p->FragProg.info.inputs_read; 1036 unsigned unit = 0; 1037 1038 for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 1039 if (inputs & VARYING_BIT_TEX(i)) { 1040 if (unit >= p->ctx->Const.MaxTextureCoordUnits) { 1041 unit++; 1042 break; 1043 } 1044 p->texcoord_mapping[unit++] = i | TEXCOORD_TEX; 1045 } 1046 if (inputs & VARYING_BIT_VAR(i)) { 1047 if (unit >= p->ctx->Const.MaxTextureCoordUnits) { 1048 unit++; 1049 break; 1050 } 1051 p->texcoord_mapping[unit++] = i | TEXCOORD_VAR; 1052 } 1053 } 1054 1055 if (unit > p->ctx->Const.MaxTextureCoordUnits) 1056 i915_program_error(p, "Too many texcoord units"); 1057} 1058 1059static void 1060check_wpos(struct i915_fragment_program *p) 1061{ 1062 GLbitfield64 inputs = p->FragProg.info.inputs_read; 1063 GLint i; 1064 unsigned unit = 0; 1065 1066 p->wpos_tex = -1; 1067 1068 if ((inputs & VARYING_BIT_POS) == 0) 1069 return; 1070 1071 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 1072 unit += !!(inputs & VARYING_BIT_TEX(i)); 1073 unit += !!(inputs & VARYING_BIT_VAR(i)); 1074 } 1075 1076 if (unit < p->ctx->Const.MaxTextureCoordUnits) 1077 p->wpos_tex = unit; 1078 else 1079 i915_program_error(p, "No free texcoord for wpos value"); 1080} 1081 1082 1083static void 1084translate_program(struct i915_fragment_program *p) 1085{ 1086 struct i915_context *i915 = I915_CONTEXT(p->ctx); 1087 1088 if (INTEL_DEBUG & DEBUG_WM) { 1089 printf("fp:\n"); 1090 _mesa_print_program(&p->FragProg); 1091 printf("\n"); 1092 } 1093 1094 i915_init_program(i915, p); 1095 check_texcoord_mapping(p); 1096 check_wpos(p); 1097 upload_program(p); 1098 fixup_depth_write(p); 1099 i915_fini_program(p); 1100 1101 p->translated = 1; 1102} 1103 1104 1105static void 1106track_params(struct i915_fragment_program *p) 1107{ 1108 GLint i; 1109 1110 if (p->nr_params) 1111 _mesa_load_state_parameters(p->ctx, p->FragProg.Parameters); 1112 1113 for (i = 0; i < p->nr_params; i++) { 1114 GLint reg = p->param[i].reg; 1115 COPY_4V(p->constant[reg], p->param[i].values); 1116 } 1117 1118 p->params_uptodate = 1; 1119 p->on_hardware = 0; /* overkill */ 1120} 1121 1122 1123static void 1124i915BindProgram(struct gl_context * ctx, GLenum target, struct gl_program *prog) 1125{ 1126 if (target == GL_FRAGMENT_PROGRAM_ARB) { 1127 struct i915_context *i915 = I915_CONTEXT(ctx); 1128 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1129 1130 if (i915->current_program == p) 1131 return; 1132 1133 if (i915->current_program) { 1134 i915->current_program->on_hardware = 0; 1135 i915->current_program->params_uptodate = 0; 1136 } 1137 1138 i915->current_program = p; 1139 1140 assert(p->on_hardware == 0); 1141 assert(p->params_uptodate == 0); 1142 1143 } 1144} 1145 1146static struct gl_program * 1147i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id, 1148 bool is_arb_asm) 1149{ 1150 switch (target) { 1151 case GL_VERTEX_PROGRAM_ARB: { 1152 struct gl_program *prog = rzalloc(NULL, struct gl_program); 1153 return _mesa_init_gl_program(prog, target, id, is_arb_asm); 1154 } 1155 1156 case GL_FRAGMENT_PROGRAM_ARB:{ 1157 struct i915_fragment_program *prog = 1158 rzalloc(NULL, struct i915_fragment_program); 1159 if (prog) { 1160 i915_init_program(I915_CONTEXT(ctx), prog); 1161 1162 return _mesa_init_gl_program(&prog->FragProg, target, id, 1163 is_arb_asm); 1164 } 1165 else 1166 return NULL; 1167 } 1168 1169 default: 1170 /* Just fallback: 1171 */ 1172 return _mesa_new_program(ctx, target, id, is_arb_asm); 1173 } 1174} 1175 1176static void 1177i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog) 1178{ 1179 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1180 struct i915_context *i915 = I915_CONTEXT(ctx); 1181 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1182 1183 if (i915->current_program == p) 1184 i915->current_program = 0; 1185 } 1186 1187 _mesa_delete_program(ctx, prog); 1188} 1189 1190 1191static GLboolean 1192i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog) 1193{ 1194 if (target == GL_FRAGMENT_PROGRAM_ARB) { 1195 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1196 1197 if (!p->translated) 1198 translate_program(p); 1199 1200 return !p->error; 1201 } 1202 else 1203 return true; 1204} 1205 1206static GLboolean 1207i915ProgramStringNotify(struct gl_context * ctx, 1208 GLenum target, struct gl_program *prog) 1209{ 1210 if (target == GL_FRAGMENT_PROGRAM_ARB) { 1211 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1212 p->translated = 0; 1213 } 1214 1215 (void) _tnl_program_string(ctx, target, prog); 1216 1217 /* XXX check if program is legal, within limits */ 1218 return true; 1219} 1220 1221static void 1222i915SamplerUniformChange(struct gl_context *ctx, 1223 GLenum target, struct gl_program *prog) 1224{ 1225 i915ProgramStringNotify(ctx, target, prog); 1226} 1227 1228void 1229i915_update_program(struct gl_context *ctx) 1230{ 1231 struct intel_context *intel = intel_context(ctx); 1232 struct i915_context *i915 = i915_context(&intel->ctx); 1233 struct i915_fragment_program *fp = 1234 (struct i915_fragment_program *) ctx->FragmentProgram._Current; 1235 1236 if (i915->current_program != fp) { 1237 if (i915->current_program) { 1238 i915->current_program->on_hardware = 0; 1239 i915->current_program->params_uptodate = 0; 1240 } 1241 1242 i915->current_program = fp; 1243 } 1244 1245 if (!fp->translated) 1246 translate_program(fp); 1247 1248 FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error); 1249} 1250 1251void 1252i915ValidateFragmentProgram(struct i915_context *i915) 1253{ 1254 struct gl_context *ctx = &i915->intel.ctx; 1255 struct intel_context *intel = intel_context(ctx); 1256 TNLcontext *tnl = TNL_CONTEXT(ctx); 1257 struct vertex_buffer *VB = &tnl->vb; 1258 1259 struct i915_fragment_program *p = 1260 (struct i915_fragment_program *) ctx->FragmentProgram._Current; 1261 1262 const GLbitfield64 inputsRead = p->FragProg.info.inputs_read; 1263 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK; 1264 GLuint s2 = S2_TEXCOORD_NONE; 1265 int i, offset = 0; 1266 1267 /* Important: 1268 */ 1269 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; 1270 1271 if (!p->translated) 1272 translate_program(p); 1273 1274 intel->vertex_attr_count = 0; 1275 intel->wpos_offset = 0; 1276 intel->coloroffset = 0; 1277 intel->specoffset = 0; 1278 1279 if (inputsRead & VARYING_BITS_TEX_ANY || p->wpos_tex != -1) { 1280 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16); 1281 } 1282 else { 1283 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12); 1284 } 1285 1286 /* Handle gl_PointSize builtin var here */ 1287 if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled) 1288 EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4); 1289 1290 if (inputsRead & VARYING_BIT_COL0) { 1291 intel->coloroffset = offset / 4; 1292 EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); 1293 } 1294 1295 if (inputsRead & VARYING_BIT_COL1) { 1296 intel->specoffset = offset / 4; 1297 EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4); 1298 } 1299 1300 if ((inputsRead & VARYING_BIT_FOGC)) { 1301 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4); 1302 } 1303 1304 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 1305 if (inputsRead & VARYING_BIT_TEX(i)) { 1306 int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX); 1307 int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; 1308 1309 s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK); 1310 s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz)); 1311 1312 EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); 1313 } 1314 if (inputsRead & VARYING_BIT_VAR(i)) { 1315 int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR); 1316 int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size; 1317 1318 s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK); 1319 s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz)); 1320 1321 EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); 1322 } 1323 if (i == p->wpos_tex) { 1324 int wpos_size = 4 * sizeof(float); 1325 /* If WPOS is required, duplicate the XYZ position data in an 1326 * unused texture coordinate: 1327 */ 1328 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); 1329 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size)); 1330 1331 intel->wpos_offset = offset; 1332 EMIT_PAD(wpos_size); 1333 } 1334 } 1335 1336 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] || 1337 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) { 1338 I915_STATECHANGE(i915, I915_UPLOAD_CTX); 1339 1340 /* Must do this *after* statechange, so as not to affect 1341 * buffered vertices reliant on the old state: 1342 */ 1343 intel->vertex_size = _tnl_install_attrs(&intel->ctx, 1344 intel->vertex_attrs, 1345 intel->vertex_attr_count, 1346 intel->ViewportMatrix.m, 0); 1347 1348 assert(intel->prim.current_offset == intel->prim.start_offset); 1349 intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size; 1350 intel->prim.current_offset = intel->prim.start_offset; 1351 1352 intel->vertex_size >>= 2; 1353 1354 i915->state.Ctx[I915_CTXREG_LIS2] = s2; 1355 i915->state.Ctx[I915_CTXREG_LIS4] = s4; 1356 1357 assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size)); 1358 } 1359 1360 if (!p->params_uptodate) 1361 track_params(p); 1362 1363 if (!p->on_hardware) 1364 i915_upload_program(i915, p); 1365 1366 if (INTEL_DEBUG & DEBUG_WM) { 1367 printf("i915:\n"); 1368 i915_disassemble_program(i915->state.Program, i915->state.ProgramSize); 1369 } 1370} 1371 1372void 1373i915InitFragProgFuncs(struct dd_function_table *functions) 1374{ 1375 functions->BindProgram = i915BindProgram; 1376 functions->NewProgram = i915NewProgram; 1377 functions->DeleteProgram = i915DeleteProgram; 1378 functions->IsProgramNative = i915IsProgramNative; 1379 functions->ProgramStringNotify = i915ProgramStringNotify; 1380 functions->SamplerUniformChange = i915SamplerUniformChange; 1381} 1382