1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include <stdarg.h> 30 31#include "i915_reg.h" 32#include "i915_context.h" 33#include "i915_fpc.h" 34#include "i915_debug_private.h" 35 36#include "pipe/p_shader_tokens.h" 37#include "util/u_math.h" 38#include "util/u_memory.h" 39#include "util/u_string.h" 40#include "tgsi/tgsi_parse.h" 41#include "tgsi/tgsi_dump.h" 42 43#include "draw/draw_vertex.h" 44 45#ifndef M_PI 46#define M_PI 3.14159265358979323846 47#endif 48 49/** 50 * Simple pass-through fragment shader to use when we don't have 51 * a real shader (or it fails to compile for some reason). 52 */ 53static unsigned passthrough_decl[] = 54{ 55 _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), 56 57 /* declare input color: 58 */ 59 (D0_DCL | 60 (REG_TYPE_T << D0_TYPE_SHIFT) | 61 (T_DIFFUSE << D0_NR_SHIFT) | 62 D0_CHANNEL_ALL), 63 0, 64 0, 65}; 66 67static unsigned passthrough_program[] = 68{ 69 /* move to output color: 70 */ 71 (A0_MOV | 72 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | 73 A0_DEST_CHANNEL_ALL | 74 (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | 75 (T_DIFFUSE << A0_SRC0_NR_SHIFT)), 76 0x01230000, /* .xyzw */ 77 0 78}; 79 80 81/* 1, -1/3!, 1/5!, -1/7! */ 82static const float scs_sin_constants[4] = { 1.0, 83 -1.0f / (3 * 2 * 1), 84 1.0f / (5 * 4 * 3 * 2 * 1), 85 -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) 86}; 87 88/* 1, -1/2!, 1/4!, -1/6! */ 89static const float scs_cos_constants[4] = { 1.0, 90 -1.0f / (2 * 1), 91 1.0f / (4 * 3 * 2 * 1), 92 -1.0f / (6 * 5 * 4 * 3 * 2 * 1) 93}; 94 95/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ 96static const float sin_constants[4] = { 2.0 * M_PI, 97 -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), 98 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), 99 -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) 100}; 101 102/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ 103static const float cos_constants[4] = { 1.0, 104 -4.0f * M_PI * M_PI / (2 * 1), 105 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), 106 -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) 107}; 108 109 110 111/** 112 * component-wise negation of ureg 113 */ 114static inline int 115negate(int reg, int x, int y, int z, int w) 116{ 117 /* Another neat thing about the UREG representation */ 118 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | 119 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | 120 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | 121 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); 122} 123 124 125/** 126 * In the event of a translation failure, we'll generate a simple color 127 * pass-through program. 128 */ 129static void 130i915_use_passthrough_shader(struct i915_fragment_shader *fs) 131{ 132 fs->program = (uint *) MALLOC(sizeof(passthrough_program)); 133 fs->decl = (uint *) MALLOC(sizeof(passthrough_decl)); 134 if (fs->program) { 135 memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); 136 memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl)); 137 fs->program_len = ARRAY_SIZE(passthrough_program); 138 fs->decl_len = ARRAY_SIZE(passthrough_decl); 139 } 140 fs->num_constants = 0; 141} 142 143 144void 145i915_program_error(struct i915_fp_compile *p, const char *msg, ...) 146{ 147 va_list args; 148 char buffer[1024]; 149 150 debug_printf("i915_program_error: "); 151 va_start( args, msg ); 152 util_vsnprintf( buffer, sizeof(buffer), msg, args ); 153 va_end( args ); 154 debug_printf("%s", buffer); 155 debug_printf("\n"); 156 157 p->error = 1; 158} 159 160static uint get_mapping(struct i915_fragment_shader* fs, int unit) 161{ 162 int i; 163 for (i = 0; i < I915_TEX_UNITS; i++) 164 { 165 if (fs->generic_mapping[i] == -1) { 166 fs->generic_mapping[i] = unit; 167 return i; 168 } 169 if (fs->generic_mapping[i] == unit) 170 return i; 171 } 172 debug_printf("Exceeded max generics\n"); 173 return 0; 174} 175 176/** 177 * Construct a ureg for the given source register. Will emit 178 * constants, apply swizzling and negation as needed. 179 */ 180static uint 181src_vector(struct i915_fp_compile *p, 182 const struct i915_full_src_register *source, 183 struct i915_fragment_shader *fs) 184{ 185 uint index = source->Register.Index; 186 uint src = 0, sem_name, sem_ind; 187 188 switch (source->Register.File) { 189 case TGSI_FILE_TEMPORARY: 190 if (source->Register.Index >= I915_MAX_TEMPORARY) { 191 i915_program_error(p, "Exceeded max temporary reg"); 192 return 0; 193 } 194 src = UREG(REG_TYPE_R, index); 195 break; 196 case TGSI_FILE_INPUT: 197 /* XXX: Packing COL1, FOGC into a single attribute works for 198 * texenv programs, but will fail for real fragment programs 199 * that use these attributes and expect them to be a full 4 200 * components wide. Could use a texcoord to pass these 201 * attributes if necessary, but that won't work in the general 202 * case. 203 * 204 * We also use a texture coordinate to pass wpos when possible. 205 */ 206 207 sem_name = p->shader->info.input_semantic_name[index]; 208 sem_ind = p->shader->info.input_semantic_index[index]; 209 210 switch (sem_name) { 211 case TGSI_SEMANTIC_POSITION: 212 { 213 /* for fragcoord */ 214 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); 215 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 216 break; 217 } 218 case TGSI_SEMANTIC_COLOR: 219 if (sem_ind == 0) { 220 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 221 } 222 else { 223 /* secondary color */ 224 assert(sem_ind == 1); 225 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 226 src = swizzle(src, X, Y, Z, ONE); 227 } 228 break; 229 case TGSI_SEMANTIC_FOG: 230 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 231 src = swizzle(src, W, W, W, W); 232 break; 233 case TGSI_SEMANTIC_GENERIC: 234 { 235 int real_tex_unit = get_mapping(fs, sem_ind); 236 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 237 break; 238 } 239 case TGSI_SEMANTIC_FACE: 240 { 241 /* for back/front faces */ 242 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); 243 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); 244 break; 245 } 246 default: 247 i915_program_error(p, "Bad source->Index"); 248 return 0; 249 } 250 break; 251 252 case TGSI_FILE_IMMEDIATE: 253 assert(index < p->num_immediates); 254 index = p->immediates_map[index]; 255 /* fall-through */ 256 case TGSI_FILE_CONSTANT: 257 src = UREG(REG_TYPE_CONST, index); 258 break; 259 260 default: 261 i915_program_error(p, "Bad source->File"); 262 return 0; 263 } 264 265 src = swizzle(src, 266 source->Register.SwizzleX, 267 source->Register.SwizzleY, 268 source->Register.SwizzleZ, 269 source->Register.SwizzleW); 270 271 /* There's both negate-all-components and per-component negation. 272 * Try to handle both here. 273 */ 274 { 275 int n = source->Register.Negate; 276 src = negate(src, n, n, n, n); 277 } 278 279 /* no abs() */ 280#if 0 281 /* XXX assertions disabled to allow arbfplight.c to run */ 282 /* XXX enable these assertions, or fix things */ 283 assert(!source->Register.Absolute); 284#endif 285 if (source->Register.Absolute) 286 debug_printf("Unhandled absolute value\n"); 287 288 return src; 289} 290 291 292/** 293 * Construct a ureg for a destination register. 294 */ 295static uint 296get_result_vector(struct i915_fp_compile *p, 297 const struct i915_full_dst_register *dest) 298{ 299 switch (dest->Register.File) { 300 case TGSI_FILE_OUTPUT: 301 { 302 uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; 303 switch (sem_name) { 304 case TGSI_SEMANTIC_POSITION: 305 return UREG(REG_TYPE_OD, 0); 306 case TGSI_SEMANTIC_COLOR: 307 return UREG(REG_TYPE_OC, 0); 308 default: 309 i915_program_error(p, "Bad inst->DstReg.Index/semantics"); 310 return 0; 311 } 312 } 313 case TGSI_FILE_TEMPORARY: 314 return UREG(REG_TYPE_R, dest->Register.Index); 315 default: 316 i915_program_error(p, "Bad inst->DstReg.File"); 317 return 0; 318 } 319} 320 321 322/** 323 * Compute flags for saturation and writemask. 324 */ 325static uint 326get_result_flags(const struct i915_full_instruction *inst) 327{ 328 const uint writeMask 329 = inst->Dst[0].Register.WriteMask; 330 uint flags = 0x0; 331 332 if (inst->Instruction.Saturate) 333 flags |= A0_DEST_SATURATE; 334 335 if (writeMask & TGSI_WRITEMASK_X) 336 flags |= A0_DEST_CHANNEL_X; 337 if (writeMask & TGSI_WRITEMASK_Y) 338 flags |= A0_DEST_CHANNEL_Y; 339 if (writeMask & TGSI_WRITEMASK_Z) 340 flags |= A0_DEST_CHANNEL_Z; 341 if (writeMask & TGSI_WRITEMASK_W) 342 flags |= A0_DEST_CHANNEL_W; 343 344 return flags; 345} 346 347 348/** 349 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token 350 */ 351static uint 352translate_tex_src_target(struct i915_fp_compile *p, uint tex) 353{ 354 switch (tex) { 355 case TGSI_TEXTURE_SHADOW1D: 356 /* fall-through */ 357 case TGSI_TEXTURE_1D: 358 return D0_SAMPLE_TYPE_2D; 359 360 case TGSI_TEXTURE_SHADOW2D: 361 /* fall-through */ 362 case TGSI_TEXTURE_2D: 363 return D0_SAMPLE_TYPE_2D; 364 365 case TGSI_TEXTURE_SHADOWRECT: 366 /* fall-through */ 367 case TGSI_TEXTURE_RECT: 368 return D0_SAMPLE_TYPE_2D; 369 370 case TGSI_TEXTURE_3D: 371 return D0_SAMPLE_TYPE_VOLUME; 372 373 case TGSI_TEXTURE_CUBE: 374 return D0_SAMPLE_TYPE_CUBE; 375 376 default: 377 i915_program_error(p, "TexSrc type"); 378 return 0; 379 } 380} 381 382/** 383 * Return the number of coords needed to access a given TGSI_TEXTURE_* 384 */ 385uint 386i915_num_coords(uint tex) 387{ 388 switch (tex) { 389 case TGSI_TEXTURE_SHADOW1D: 390 case TGSI_TEXTURE_1D: 391 return 1; 392 393 case TGSI_TEXTURE_SHADOW2D: 394 case TGSI_TEXTURE_2D: 395 case TGSI_TEXTURE_SHADOWRECT: 396 case TGSI_TEXTURE_RECT: 397 return 2; 398 399 case TGSI_TEXTURE_3D: 400 case TGSI_TEXTURE_CUBE: 401 return 3; 402 403 default: 404 debug_printf("Unknown texture target for num coords"); 405 return 2; 406 } 407} 408 409 410/** 411 * Generate texel lookup instruction. 412 */ 413static void 414emit_tex(struct i915_fp_compile *p, 415 const struct i915_full_instruction *inst, 416 uint opcode, 417 struct i915_fragment_shader* fs) 418{ 419 uint texture = inst->Texture.Texture; 420 uint unit = inst->Src[1].Register.Index; 421 uint tex = translate_tex_src_target( p, texture ); 422 uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); 423 uint coord = src_vector( p, &inst->Src[0], fs); 424 425 i915_emit_texld( p, 426 get_result_vector( p, &inst->Dst[0] ), 427 get_result_flags( inst ), 428 sampler, 429 coord, 430 opcode, 431 i915_num_coords(texture) ); 432} 433 434 435/** 436 * Generate a simple arithmetic instruction 437 * \param opcode the i915 opcode 438 * \param numArgs the number of input/src arguments 439 */ 440static void 441emit_simple_arith(struct i915_fp_compile *p, 442 const struct i915_full_instruction *inst, 443 uint opcode, uint numArgs, 444 struct i915_fragment_shader *fs) 445{ 446 uint arg1, arg2, arg3; 447 448 assert(numArgs <= 3); 449 450 arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); 451 arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); 452 arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); 453 454 i915_emit_arith( p, 455 opcode, 456 get_result_vector( p, &inst->Dst[0]), 457 get_result_flags( inst ), 0, 458 arg1, 459 arg2, 460 arg3 ); 461} 462 463 464/** As above, but swap the first two src regs */ 465static void 466emit_simple_arith_swap2(struct i915_fp_compile *p, 467 const struct i915_full_instruction *inst, 468 uint opcode, uint numArgs, 469 struct i915_fragment_shader *fs) 470{ 471 struct i915_full_instruction inst2; 472 473 assert(numArgs == 2); 474 475 /* transpose first two registers */ 476 inst2 = *inst; 477 inst2.Src[0] = inst->Src[1]; 478 inst2.Src[1] = inst->Src[0]; 479 480 emit_simple_arith(p, &inst2, opcode, numArgs, fs); 481} 482 483/* 484 * Translate TGSI instruction to i915 instruction. 485 * 486 * Possible concerns: 487 * 488 * DDX, DDY -- return 0 489 * SIN, COS -- could use another taylor step? 490 * LIT -- results seem a little different to sw mesa 491 * LOG -- different to mesa on negative numbers, but this is conformant. 492 */ 493static void 494i915_translate_instruction(struct i915_fp_compile *p, 495 const struct i915_full_instruction *inst, 496 struct i915_fragment_shader *fs) 497{ 498 uint writemask; 499 uint src0, src1, src2, flags; 500 uint tmp = 0; 501 502 switch (inst->Instruction.Opcode) { 503 case TGSI_OPCODE_ADD: 504 emit_simple_arith(p, inst, A0_ADD, 2, fs); 505 break; 506 507 case TGSI_OPCODE_CEIL: 508 src0 = src_vector(p, &inst->Src[0], fs); 509 tmp = i915_get_utemp(p); 510 flags = get_result_flags(inst); 511 i915_emit_arith(p, 512 A0_FLR, 513 tmp, 514 flags & A0_DEST_CHANNEL_ALL, 0, 515 negate(src0, 1, 1, 1, 1), 0, 0); 516 i915_emit_arith(p, 517 A0_MOV, 518 get_result_vector(p, &inst->Dst[0]), 519 flags, 0, 520 negate(tmp, 1, 1, 1, 1), 0, 0); 521 break; 522 523 case TGSI_OPCODE_CMP: 524 src0 = src_vector(p, &inst->Src[0], fs); 525 src1 = src_vector(p, &inst->Src[1], fs); 526 src2 = src_vector(p, &inst->Src[2], fs); 527 i915_emit_arith(p, A0_CMP, 528 get_result_vector(p, &inst->Dst[0]), 529 get_result_flags(inst), 530 0, src0, src2, src1); /* NOTE: order of src2, src1 */ 531 break; 532 533 case TGSI_OPCODE_COS: 534 src0 = src_vector(p, &inst->Src[0], fs); 535 tmp = i915_get_utemp(p); 536 537 i915_emit_arith(p, 538 A0_MUL, 539 tmp, A0_DEST_CHANNEL_X, 0, 540 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 541 542 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 543 544 /* 545 * t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1 546 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 547 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 548 * result = DP4 t0, cos_constants 549 */ 550 i915_emit_arith(p, 551 A0_MUL, 552 tmp, A0_DEST_CHANNEL_XY, 0, 553 swizzle(tmp, X, X, ONE, ONE), 554 swizzle(tmp, X, ONE, ONE, ONE), 0); 555 556 i915_emit_arith(p, 557 A0_MUL, 558 tmp, A0_DEST_CHANNEL_XYZ, 0, 559 swizzle(tmp, X, Y, X, ONE), 560 swizzle(tmp, X, X, ONE, ONE), 0); 561 562 i915_emit_arith(p, 563 A0_MUL, 564 tmp, A0_DEST_CHANNEL_XYZ, 0, 565 swizzle(tmp, X, X, Z, ONE), 566 swizzle(tmp, Z, ONE, ONE, ONE), 0); 567 568 i915_emit_arith(p, 569 A0_DP4, 570 get_result_vector(p, &inst->Dst[0]), 571 get_result_flags(inst), 0, 572 swizzle(tmp, ONE, Z, Y, X), 573 i915_emit_const4fv(p, cos_constants), 0); 574 break; 575 576 case TGSI_OPCODE_DDX: 577 case TGSI_OPCODE_DDY: 578 /* XXX We just output 0 here */ 579 debug_printf("Punting DDX/DDY\n"); 580 src0 = get_result_vector(p, &inst->Dst[0]); 581 i915_emit_arith(p, 582 A0_MOV, 583 get_result_vector(p, &inst->Dst[0]), 584 get_result_flags(inst), 0, 585 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); 586 break; 587 588 case TGSI_OPCODE_DP2: 589 src0 = src_vector(p, &inst->Src[0], fs); 590 src1 = src_vector(p, &inst->Src[1], fs); 591 592 i915_emit_arith(p, 593 A0_DP3, 594 get_result_vector(p, &inst->Dst[0]), 595 get_result_flags(inst), 0, 596 swizzle(src0, X, Y, ZERO, ZERO), src1, 0); 597 break; 598 599 case TGSI_OPCODE_DP3: 600 emit_simple_arith(p, inst, A0_DP3, 2, fs); 601 break; 602 603 case TGSI_OPCODE_DP4: 604 emit_simple_arith(p, inst, A0_DP4, 2, fs); 605 break; 606 607 case TGSI_OPCODE_DPH: 608 src0 = src_vector(p, &inst->Src[0], fs); 609 src1 = src_vector(p, &inst->Src[1], fs); 610 611 i915_emit_arith(p, 612 A0_DP4, 613 get_result_vector(p, &inst->Dst[0]), 614 get_result_flags(inst), 0, 615 swizzle(src0, X, Y, Z, ONE), src1, 0); 616 break; 617 618 case TGSI_OPCODE_DST: 619 src0 = src_vector(p, &inst->Src[0], fs); 620 src1 = src_vector(p, &inst->Src[1], fs); 621 622 /* result[0] = 1 * 1; 623 * result[1] = a[1] * b[1]; 624 * result[2] = a[2] * 1; 625 * result[3] = 1 * b[3]; 626 */ 627 i915_emit_arith(p, 628 A0_MUL, 629 get_result_vector(p, &inst->Dst[0]), 630 get_result_flags(inst), 0, 631 swizzle(src0, ONE, Y, Z, ONE), 632 swizzle(src1, ONE, Y, ONE, W), 0); 633 break; 634 635 case TGSI_OPCODE_END: 636 /* no-op */ 637 break; 638 639 case TGSI_OPCODE_EX2: 640 src0 = src_vector(p, &inst->Src[0], fs); 641 642 i915_emit_arith(p, 643 A0_EXP, 644 get_result_vector(p, &inst->Dst[0]), 645 get_result_flags(inst), 0, 646 swizzle(src0, X, X, X, X), 0, 0); 647 break; 648 649 case TGSI_OPCODE_FLR: 650 emit_simple_arith(p, inst, A0_FLR, 1, fs); 651 break; 652 653 case TGSI_OPCODE_FRC: 654 emit_simple_arith(p, inst, A0_FRC, 1, fs); 655 break; 656 657 case TGSI_OPCODE_KILL_IF: 658 /* kill if src[0].x < 0 || src[0].y < 0 ... */ 659 src0 = src_vector(p, &inst->Src[0], fs); 660 tmp = i915_get_utemp(p); 661 662 i915_emit_texld(p, 663 tmp, /* dest reg: a dummy reg */ 664 A0_DEST_CHANNEL_ALL, /* dest writemask */ 665 0, /* sampler */ 666 src0, /* coord*/ 667 T0_TEXKILL, /* opcode */ 668 1); /* num_coord */ 669 break; 670 671 case TGSI_OPCODE_KILL: 672 /* unconditional kill */ 673 tmp = i915_get_utemp(p); 674 675 i915_emit_texld(p, 676 tmp, /* dest reg: a dummy reg */ 677 A0_DEST_CHANNEL_ALL, /* dest writemask */ 678 0, /* sampler */ 679 negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */ 680 T0_TEXKILL, /* opcode */ 681 1); /* num_coord */ 682 break; 683 684 case TGSI_OPCODE_LG2: 685 src0 = src_vector(p, &inst->Src[0], fs); 686 687 i915_emit_arith(p, 688 A0_LOG, 689 get_result_vector(p, &inst->Dst[0]), 690 get_result_flags(inst), 0, 691 swizzle(src0, X, X, X, X), 0, 0); 692 break; 693 694 case TGSI_OPCODE_LIT: 695 src0 = src_vector(p, &inst->Src[0], fs); 696 tmp = i915_get_utemp(p); 697 698 /* tmp = max( a.xyzw, a.00zw ) 699 * XXX: Clamp tmp.w to -128..128 700 * tmp.y = log(tmp.y) 701 * tmp.y = tmp.w * tmp.y 702 * tmp.y = exp(tmp.y) 703 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 704 */ 705 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 706 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 707 708 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 709 swizzle(tmp, Y, Y, Y, Y), 0, 0); 710 711 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 712 swizzle(tmp, ZERO, Y, ZERO, ZERO), 713 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 714 715 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 716 swizzle(tmp, Y, Y, Y, Y), 0, 0); 717 718 i915_emit_arith(p, A0_CMP, 719 get_result_vector(p, &inst->Dst[0]), 720 get_result_flags(inst), 0, 721 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 722 swizzle(tmp, ONE, X, ZERO, ONE), 723 swizzle(tmp, ONE, X, Y, ONE)); 724 725 break; 726 727 case TGSI_OPCODE_LRP: 728 src0 = src_vector(p, &inst->Src[0], fs); 729 src1 = src_vector(p, &inst->Src[1], fs); 730 src2 = src_vector(p, &inst->Src[2], fs); 731 flags = get_result_flags(inst); 732 tmp = i915_get_utemp(p); 733 734 /* b*a + c*(1-a) 735 * 736 * b*a + c - ca 737 * 738 * tmp = b*a + c, 739 * result = (-c)*a + tmp 740 */ 741 i915_emit_arith(p, A0_MAD, tmp, 742 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 743 744 i915_emit_arith(p, A0_MAD, 745 get_result_vector(p, &inst->Dst[0]), 746 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 747 break; 748 749 case TGSI_OPCODE_MAD: 750 emit_simple_arith(p, inst, A0_MAD, 3, fs); 751 break; 752 753 case TGSI_OPCODE_MAX: 754 emit_simple_arith(p, inst, A0_MAX, 2, fs); 755 break; 756 757 case TGSI_OPCODE_MIN: 758 emit_simple_arith(p, inst, A0_MIN, 2, fs); 759 break; 760 761 case TGSI_OPCODE_MOV: 762 emit_simple_arith(p, inst, A0_MOV, 1, fs); 763 break; 764 765 case TGSI_OPCODE_MUL: 766 emit_simple_arith(p, inst, A0_MUL, 2, fs); 767 break; 768 769 case TGSI_OPCODE_NOP: 770 break; 771 772 case TGSI_OPCODE_POW: 773 src0 = src_vector(p, &inst->Src[0], fs); 774 src1 = src_vector(p, &inst->Src[1], fs); 775 tmp = i915_get_utemp(p); 776 flags = get_result_flags(inst); 777 778 /* XXX: masking on intermediate values, here and elsewhere. 779 */ 780 i915_emit_arith(p, 781 A0_LOG, 782 tmp, A0_DEST_CHANNEL_X, 0, 783 swizzle(src0, X, X, X, X), 0, 0); 784 785 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 786 787 i915_emit_arith(p, 788 A0_EXP, 789 get_result_vector(p, &inst->Dst[0]), 790 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 791 break; 792 793 case TGSI_OPCODE_RET: 794 /* XXX: no-op? */ 795 break; 796 797 case TGSI_OPCODE_RCP: 798 src0 = src_vector(p, &inst->Src[0], fs); 799 800 i915_emit_arith(p, 801 A0_RCP, 802 get_result_vector(p, &inst->Dst[0]), 803 get_result_flags(inst), 0, 804 swizzle(src0, X, X, X, X), 0, 0); 805 break; 806 807 case TGSI_OPCODE_RSQ: 808 src0 = src_vector(p, &inst->Src[0], fs); 809 810 i915_emit_arith(p, 811 A0_RSQ, 812 get_result_vector(p, &inst->Dst[0]), 813 get_result_flags(inst), 0, 814 swizzle(src0, X, X, X, X), 0, 0); 815 break; 816 817 case TGSI_OPCODE_SCS: 818 src0 = src_vector(p, &inst->Src[0], fs); 819 tmp = i915_get_utemp(p); 820 821 /* 822 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 823 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 824 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 825 * scs.x = DP4 t1, scs_sin_constants 826 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 827 * scs.y = DP4 t1, scs_cos_constants 828 */ 829 i915_emit_arith(p, 830 A0_MUL, 831 tmp, A0_DEST_CHANNEL_XY, 0, 832 swizzle(src0, X, X, ONE, ONE), 833 swizzle(src0, X, ONE, ONE, ONE), 0); 834 835 i915_emit_arith(p, 836 A0_MUL, 837 tmp, A0_DEST_CHANNEL_ALL, 0, 838 swizzle(tmp, X, Y, X, Y), 839 swizzle(tmp, X, X, ONE, ONE), 0); 840 841 writemask = inst->Dst[0].Register.WriteMask; 842 843 if (writemask & TGSI_WRITEMASK_Y) { 844 uint tmp1; 845 846 if (writemask & TGSI_WRITEMASK_X) 847 tmp1 = i915_get_utemp(p); 848 else 849 tmp1 = tmp; 850 851 i915_emit_arith(p, 852 A0_MUL, 853 tmp1, A0_DEST_CHANNEL_ALL, 0, 854 swizzle(tmp, X, Y, Y, W), 855 swizzle(tmp, X, Z, ONE, ONE), 0); 856 857 i915_emit_arith(p, 858 A0_DP4, 859 get_result_vector(p, &inst->Dst[0]), 860 A0_DEST_CHANNEL_Y, 0, 861 swizzle(tmp1, W, Z, Y, X), 862 i915_emit_const4fv(p, scs_sin_constants), 0); 863 } 864 865 if (writemask & TGSI_WRITEMASK_X) { 866 i915_emit_arith(p, 867 A0_MUL, 868 tmp, A0_DEST_CHANNEL_XYZ, 0, 869 swizzle(tmp, X, X, Z, ONE), 870 swizzle(tmp, Z, ONE, ONE, ONE), 0); 871 872 i915_emit_arith(p, 873 A0_DP4, 874 get_result_vector(p, &inst->Dst[0]), 875 A0_DEST_CHANNEL_X, 0, 876 swizzle(tmp, ONE, Z, Y, X), 877 i915_emit_const4fv(p, scs_cos_constants), 0); 878 } 879 break; 880 881 case TGSI_OPCODE_SEQ: 882 /* if we're both >= and <= then we're == */ 883 src0 = src_vector(p, &inst->Src[0], fs); 884 src1 = src_vector(p, &inst->Src[1], fs); 885 tmp = i915_get_utemp(p); 886 887 i915_emit_arith(p, 888 A0_SGE, 889 tmp, A0_DEST_CHANNEL_ALL, 0, 890 src0, 891 src1, 0); 892 893 i915_emit_arith(p, 894 A0_SGE, 895 get_result_vector(p, &inst->Dst[0]), 896 A0_DEST_CHANNEL_ALL, 0, 897 src1, 898 src0, 0); 899 900 i915_emit_arith(p, 901 A0_MUL, 902 get_result_vector(p, &inst->Dst[0]), 903 A0_DEST_CHANNEL_ALL, 0, 904 get_result_vector(p, &inst->Dst[0]), 905 tmp, 0); 906 907 break; 908 909 case TGSI_OPCODE_SGE: 910 emit_simple_arith(p, inst, A0_SGE, 2, fs); 911 break; 912 913 case TGSI_OPCODE_SIN: 914 src0 = src_vector(p, &inst->Src[0], fs); 915 tmp = i915_get_utemp(p); 916 917 i915_emit_arith(p, 918 A0_MUL, 919 tmp, A0_DEST_CHANNEL_X, 0, 920 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 921 922 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 923 924 /* 925 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 926 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 927 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 928 * result = DP4 t1.wzyx, sin_constants 929 */ 930 i915_emit_arith(p, 931 A0_MUL, 932 tmp, A0_DEST_CHANNEL_XY, 0, 933 swizzle(tmp, X, X, ONE, ONE), 934 swizzle(tmp, X, ONE, ONE, ONE), 0); 935 936 i915_emit_arith(p, 937 A0_MUL, 938 tmp, A0_DEST_CHANNEL_ALL, 0, 939 swizzle(tmp, X, Y, X, Y), 940 swizzle(tmp, X, X, ONE, ONE), 0); 941 942 i915_emit_arith(p, 943 A0_MUL, 944 tmp, A0_DEST_CHANNEL_ALL, 0, 945 swizzle(tmp, X, Y, Y, W), 946 swizzle(tmp, X, Z, ONE, ONE), 0); 947 948 i915_emit_arith(p, 949 A0_DP4, 950 get_result_vector(p, &inst->Dst[0]), 951 get_result_flags(inst), 0, 952 swizzle(tmp, W, Z, Y, X), 953 i915_emit_const4fv(p, sin_constants), 0); 954 break; 955 956 case TGSI_OPCODE_SLE: 957 /* like SGE, but swap reg0, reg1 */ 958 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); 959 break; 960 961 case TGSI_OPCODE_SLT: 962 emit_simple_arith(p, inst, A0_SLT, 2, fs); 963 break; 964 965 case TGSI_OPCODE_SGT: 966 /* like SLT, but swap reg0, reg1 */ 967 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); 968 break; 969 970 case TGSI_OPCODE_SNE: 971 /* if we're < or > then we're != */ 972 src0 = src_vector(p, &inst->Src[0], fs); 973 src1 = src_vector(p, &inst->Src[1], fs); 974 tmp = i915_get_utemp(p); 975 976 i915_emit_arith(p, 977 A0_SLT, 978 tmp, 979 A0_DEST_CHANNEL_ALL, 0, 980 src0, 981 src1, 0); 982 983 i915_emit_arith(p, 984 A0_SLT, 985 get_result_vector(p, &inst->Dst[0]), 986 A0_DEST_CHANNEL_ALL, 0, 987 src1, 988 src0, 0); 989 990 i915_emit_arith(p, 991 A0_ADD, 992 get_result_vector(p, &inst->Dst[0]), 993 A0_DEST_CHANNEL_ALL, 0, 994 get_result_vector(p, &inst->Dst[0]), 995 tmp, 0); 996 break; 997 998 case TGSI_OPCODE_SSG: 999 /* compute (src>0) - (src<0) */ 1000 src0 = src_vector(p, &inst->Src[0], fs); 1001 tmp = i915_get_utemp(p); 1002 1003 i915_emit_arith(p, 1004 A0_SLT, 1005 tmp, 1006 A0_DEST_CHANNEL_ALL, 0, 1007 src0, 1008 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); 1009 1010 i915_emit_arith(p, 1011 A0_SLT, 1012 get_result_vector(p, &inst->Dst[0]), 1013 A0_DEST_CHANNEL_ALL, 0, 1014 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 1015 src0, 0); 1016 1017 i915_emit_arith(p, 1018 A0_ADD, 1019 get_result_vector(p, &inst->Dst[0]), 1020 A0_DEST_CHANNEL_ALL, 0, 1021 get_result_vector(p, &inst->Dst[0]), 1022 negate(tmp, 1, 1, 1, 1), 0); 1023 break; 1024 1025 case TGSI_OPCODE_TEX: 1026 emit_tex(p, inst, T0_TEXLD, fs); 1027 break; 1028 1029 case TGSI_OPCODE_TRUNC: 1030 emit_simple_arith(p, inst, A0_TRC, 1, fs); 1031 break; 1032 1033 case TGSI_OPCODE_TXB: 1034 emit_tex(p, inst, T0_TEXLDB, fs); 1035 break; 1036 1037 case TGSI_OPCODE_TXP: 1038 emit_tex(p, inst, T0_TEXLDP, fs); 1039 break; 1040 1041 case TGSI_OPCODE_XPD: 1042 /* Cross product: 1043 * result.x = src0.y * src1.z - src0.z * src1.y; 1044 * result.y = src0.z * src1.x - src0.x * src1.z; 1045 * result.z = src0.x * src1.y - src0.y * src1.x; 1046 * result.w = undef; 1047 */ 1048 src0 = src_vector(p, &inst->Src[0], fs); 1049 src1 = src_vector(p, &inst->Src[1], fs); 1050 tmp = i915_get_utemp(p); 1051 1052 i915_emit_arith(p, 1053 A0_MUL, 1054 tmp, A0_DEST_CHANNEL_ALL, 0, 1055 swizzle(src0, Z, X, Y, ONE), 1056 swizzle(src1, Y, Z, X, ONE), 0); 1057 1058 i915_emit_arith(p, 1059 A0_MAD, 1060 get_result_vector(p, &inst->Dst[0]), 1061 get_result_flags(inst), 0, 1062 swizzle(src0, Y, Z, X, ONE), 1063 swizzle(src1, Z, X, Y, ONE), 1064 negate(tmp, 1, 1, 1, 0)); 1065 break; 1066 1067 default: 1068 i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); 1069 p->error = 1; 1070 return; 1071 } 1072 1073 i915_release_utemps(p); 1074} 1075 1076 1077static void i915_translate_token(struct i915_fp_compile *p, 1078 const union i915_full_token *token, 1079 struct i915_fragment_shader *fs) 1080{ 1081 struct i915_fragment_shader *ifs = p->shader; 1082 switch( token->Token.Type ) { 1083 case TGSI_TOKEN_TYPE_PROPERTY: 1084 /* 1085 * We only support one cbuf, but we still need to ignore the property 1086 * correctly so we don't hit the assert at the end of the switch case. 1087 */ 1088 assert(token->FullProperty.Property.PropertyName == 1089 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); 1090 break; 1091 1092 case TGSI_TOKEN_TYPE_DECLARATION: 1093 if (token->FullDeclaration.Declaration.File 1094 == TGSI_FILE_CONSTANT) { 1095 uint i; 1096 for (i = token->FullDeclaration.Range.First; 1097 i <= MIN2(token->FullDeclaration.Range.Last, I915_MAX_CONSTANT - 1); 1098 i++) { 1099 assert(ifs->constant_flags[i] == 0x0); 1100 ifs->constant_flags[i] = I915_CONSTFLAG_USER; 1101 ifs->num_constants = MAX2(ifs->num_constants, i + 1); 1102 } 1103 } 1104 else if (token->FullDeclaration.Declaration.File 1105 == TGSI_FILE_TEMPORARY) { 1106 uint i; 1107 for (i = token->FullDeclaration.Range.First; 1108 i <= token->FullDeclaration.Range.Last; 1109 i++) { 1110 if (i >= I915_MAX_TEMPORARY) 1111 debug_printf("Too many temps (%d)\n",i); 1112 else 1113 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ 1114 p->temp_flag |= (1 << i); /* mark temp as used */ 1115 } 1116 } 1117 break; 1118 1119 case TGSI_TOKEN_TYPE_IMMEDIATE: 1120 { 1121 const struct tgsi_full_immediate *imm 1122 = &token->FullImmediate; 1123 const uint pos = p->num_immediates++; 1124 uint j; 1125 assert( imm->Immediate.NrTokens <= 4 + 1 ); 1126 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { 1127 p->immediates[pos][j] = imm->u[j].Float; 1128 } 1129 } 1130 break; 1131 1132 case TGSI_TOKEN_TYPE_INSTRUCTION: 1133 if (p->first_instruction) { 1134 /* resolve location of immediates */ 1135 uint i, j; 1136 for (i = 0; i < p->num_immediates; i++) { 1137 /* find constant slot for this immediate */ 1138 for (j = 0; j < I915_MAX_CONSTANT; j++) { 1139 if (ifs->constant_flags[j] == 0x0) { 1140 memcpy(ifs->constants[j], 1141 p->immediates[i], 1142 4 * sizeof(float)); 1143 /*printf("immediate %d maps to const %d\n", i, j);*/ 1144 ifs->constant_flags[j] = 0xf; /* all four comps used */ 1145 p->immediates_map[i] = j; 1146 ifs->num_constants = MAX2(ifs->num_constants, j + 1); 1147 break; 1148 } 1149 } 1150 } 1151 1152 p->first_instruction = FALSE; 1153 } 1154 1155 i915_translate_instruction(p, &token->FullInstruction, fs); 1156 break; 1157 1158 default: 1159 assert( 0 ); 1160 } 1161 1162} 1163 1164/** 1165 * Translate TGSI fragment shader into i915 hardware instructions. 1166 * \param p the translation state 1167 * \param tokens the TGSI token array 1168 */ 1169static void 1170i915_translate_instructions(struct i915_fp_compile *p, 1171 const struct i915_token_list *tokens, 1172 struct i915_fragment_shader *fs) 1173{ 1174 int i; 1175 for(i = 0; i<tokens->NumTokens; i++) { 1176 i915_translate_token(p, &tokens->Tokens[i], fs); 1177 } 1178} 1179 1180 1181static struct i915_fp_compile * 1182i915_init_compile(struct i915_context *i915, 1183 struct i915_fragment_shader *ifs) 1184{ 1185 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); 1186 int i; 1187 1188 p->shader = ifs; 1189 1190 /* Put new constants at end of const buffer, growing downward. 1191 * The problem is we don't know how many user-defined constants might 1192 * be specified with pipe->set_constant_buffer(). 1193 * Should pre-scan the user's program to determine the highest-numbered 1194 * constant referenced. 1195 */ 1196 ifs->num_constants = 0; 1197 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); 1198 1199 memset(&p->register_phases, 0, sizeof(p->register_phases)); 1200 1201 for (i = 0; i < I915_TEX_UNITS; i++) 1202 ifs->generic_mapping[i] = -1; 1203 1204 p->first_instruction = TRUE; 1205 1206 p->nr_tex_indirect = 1; /* correct? */ 1207 p->nr_tex_insn = 0; 1208 p->nr_alu_insn = 0; 1209 p->nr_decl_insn = 0; 1210 1211 p->csr = p->program; 1212 p->decl = p->declarations; 1213 p->decl_s = 0; 1214 p->decl_t = 0; 1215 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; 1216 p->utemp_flag = ~0x7; 1217 1218 /* initialize the first program word */ 1219 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; 1220 1221 return p; 1222} 1223 1224 1225/* Copy compile results to the fragment program struct and destroy the 1226 * compilation context. 1227 */ 1228static void 1229i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) 1230{ 1231 struct i915_fragment_shader *ifs = p->shader; 1232 unsigned long program_size = (unsigned long) (p->csr - p->program); 1233 unsigned long decl_size = (unsigned long) (p->decl - p->declarations); 1234 1235 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 1236 debug_printf("Exceeded max nr indirect texture lookups\n"); 1237 1238 if (p->nr_tex_insn > I915_MAX_TEX_INSN) 1239 i915_program_error(p, "Exceeded max TEX instructions"); 1240 1241 if (p->nr_alu_insn > I915_MAX_ALU_INSN) 1242 i915_program_error(p, "Exceeded max ALU instructions"); 1243 1244 if (p->nr_decl_insn > I915_MAX_DECL_INSN) 1245 i915_program_error(p, "Exceeded max DECL instructions"); 1246 1247 if (p->error) { 1248 p->NumNativeInstructions = 0; 1249 p->NumNativeAluInstructions = 0; 1250 p->NumNativeTexInstructions = 0; 1251 p->NumNativeTexIndirections = 0; 1252 1253 i915_use_passthrough_shader(ifs); 1254 } 1255 else { 1256 p->NumNativeInstructions 1257 = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; 1258 p->NumNativeAluInstructions = p->nr_alu_insn; 1259 p->NumNativeTexInstructions = p->nr_tex_insn; 1260 p->NumNativeTexIndirections = p->nr_tex_indirect; 1261 1262 /* patch in the program length */ 1263 p->declarations[0] |= program_size + decl_size - 2; 1264 1265 /* Copy compilation results to fragment program struct: 1266 */ 1267 assert(!ifs->decl); 1268 assert(!ifs->program); 1269 1270 ifs->decl 1271 = (uint *) MALLOC(decl_size * sizeof(uint)); 1272 ifs->program 1273 = (uint *) MALLOC(program_size * sizeof(uint)); 1274 1275 if (ifs->decl) { 1276 ifs->decl_len = decl_size; 1277 1278 memcpy(ifs->decl, 1279 p->declarations, 1280 decl_size * sizeof(uint)); 1281 } 1282 1283 if (ifs->program) { 1284 ifs->program_len = program_size; 1285 1286 memcpy(ifs->program, 1287 p->program, 1288 program_size * sizeof(uint)); 1289 } 1290 } 1291 1292 /* Release the compilation struct: 1293 */ 1294 FREE(p); 1295} 1296 1297 1298 1299 1300 1301/** 1302 * Rather than trying to intercept and jiggle depth writes during 1303 * emit, just move the value into its correct position at the end of 1304 * the program: 1305 */ 1306static void 1307i915_fixup_depth_write(struct i915_fp_compile *p) 1308{ 1309 /* XXX assuming pos/depth is always in output[0] */ 1310 if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 1311 const uint depth = UREG(REG_TYPE_OD, 0); 1312 1313 i915_emit_arith(p, 1314 A0_MOV, /* opcode */ 1315 depth, /* dest reg */ 1316 A0_DEST_CHANNEL_W, /* write mask */ 1317 0, /* saturate? */ 1318 swizzle(depth, X, Y, Z, Z), /* src0 */ 1319 0, 0 /* src1, src2 */); 1320 } 1321} 1322 1323 1324void 1325i915_translate_fragment_program( struct i915_context *i915, 1326 struct i915_fragment_shader *fs) 1327{ 1328 struct i915_fp_compile *p; 1329 const struct tgsi_token *tokens = fs->state.tokens; 1330 struct i915_token_list* i_tokens; 1331 1332#if 0 1333 tgsi_dump(tokens, 0); 1334#endif 1335 1336 /* hw doesn't seem to like empty frag programs, even when the depth write 1337 * fixup gets emitted below - may that one is fishy, too? */ 1338 if (fs->info.num_instructions == 1) { 1339 i915_use_passthrough_shader(fs); 1340 1341 return; 1342 } 1343 1344 p = i915_init_compile(i915, fs); 1345 1346 i_tokens = i915_optimize(tokens); 1347 i915_translate_instructions(p, i_tokens, fs); 1348 i915_fixup_depth_write(p); 1349 1350 i915_fini_compile(i915, p); 1351 i915_optimize_free(i_tokens); 1352 1353#if 0 1354 i915_disassemble_program(NULL, fs->program, fs->program_len); 1355#endif 1356} 1357