i915_fpc_translate.c revision 951bf8b4a64c9793d10e963889e74fc1659ddb4b
1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include <stdarg.h> 30 31#include "i915_reg.h" 32#include "i915_context.h" 33#include "i915_fpc.h" 34#include "i915_debug_private.h" 35 36#include "pipe/p_shader_tokens.h" 37#include "util/u_math.h" 38#include "util/u_memory.h" 39#include "util/u_string.h" 40#include "tgsi/tgsi_parse.h" 41#include "tgsi/tgsi_dump.h" 42 43#include "draw/draw_vertex.h" 44 45#ifndef M_PI 46#define M_PI 3.14159265358979323846 47#endif 48 49/** 50 * Simple pass-through fragment shader to use when we don't have 51 * a real shader (or it fails to compile for some reason). 52 */ 53static unsigned passthrough_decl[] = 54{ 55 _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), 56 57 /* declare input color: 58 */ 59 (D0_DCL | 60 (REG_TYPE_T << D0_TYPE_SHIFT) | 61 (T_DIFFUSE << D0_NR_SHIFT) | 62 D0_CHANNEL_ALL), 63 0, 64 0, 65}; 66 67static unsigned passthrough_program[] = 68{ 69 /* move to output color: 70 */ 71 (A0_MOV | 72 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | 73 A0_DEST_CHANNEL_ALL | 74 (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | 75 (T_DIFFUSE << A0_SRC0_NR_SHIFT)), 76 0x01230000, /* .xyzw */ 77 0 78}; 79 80 81/* 1, -1/3!, 1/5!, -1/7! */ 82static const float scs_sin_constants[4] = { 1.0, 83 -1.0f / (3 * 2 * 1), 84 1.0f / (5 * 4 * 3 * 2 * 1), 85 -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) 86}; 87 88/* 1, -1/2!, 1/4!, -1/6! */ 89static const float scs_cos_constants[4] = { 1.0, 90 -1.0f / (2 * 1), 91 1.0f / (4 * 3 * 2 * 1), 92 -1.0f / (6 * 5 * 4 * 3 * 2 * 1) 93}; 94 95/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ 96static const float sin_constants[4] = { 2.0 * M_PI, 97 -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), 98 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), 99 -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) 100}; 101 102/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ 103static const float cos_constants[4] = { 1.0, 104 -4.0f * M_PI * M_PI / (2 * 1), 105 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), 106 -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) 107}; 108 109 110 111/** 112 * component-wise negation of ureg 113 */ 114static inline int 115negate(int reg, int x, int y, int z, int w) 116{ 117 /* Another neat thing about the UREG representation */ 118 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | 119 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | 120 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | 121 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); 122} 123 124 125/** 126 * In the event of a translation failure, we'll generate a simple color 127 * pass-through program. 128 */ 129static void 130i915_use_passthrough_shader(struct i915_fragment_shader *fs) 131{ 132 fs->program = (uint *) MALLOC(sizeof(passthrough_program)); 133 fs->decl = (uint *) MALLOC(sizeof(passthrough_decl)); 134 if (fs->program) { 135 memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); 136 memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl)); 137 fs->program_len = ARRAY_SIZE(passthrough_program); 138 fs->decl_len = ARRAY_SIZE(passthrough_decl); 139 } 140 fs->num_constants = 0; 141} 142 143 144void 145i915_program_error(struct i915_fp_compile *p, const char *msg, ...) 146{ 147 va_list args; 148 char buffer[1024]; 149 150 debug_printf("i915_program_error: "); 151 va_start( args, msg ); 152 util_vsnprintf( buffer, sizeof(buffer), msg, args ); 153 va_end( args ); 154 debug_printf("%s", buffer); 155 debug_printf("\n"); 156 157 p->error = 1; 158} 159 160static uint get_mapping(struct i915_fragment_shader* fs, int unit) 161{ 162 int i; 163 for (i = 0; i < I915_TEX_UNITS; i++) 164 { 165 if (fs->generic_mapping[i] == -1) { 166 fs->generic_mapping[i] = unit; 167 return i; 168 } 169 if (fs->generic_mapping[i] == unit) 170 return i; 171 } 172 debug_printf("Exceeded max generics\n"); 173 return 0; 174} 175 176/** 177 * Construct a ureg for the given source register. Will emit 178 * constants, apply swizzling and negation as needed. 179 */ 180static uint 181src_vector(struct i915_fp_compile *p, 182 const struct i915_full_src_register *source, 183 struct i915_fragment_shader *fs) 184{ 185 uint index = source->Register.Index; 186 uint src = 0, sem_name, sem_ind; 187 188 switch (source->Register.File) { 189 case TGSI_FILE_TEMPORARY: 190 if (source->Register.Index >= I915_MAX_TEMPORARY) { 191 i915_program_error(p, "Exceeded max temporary reg"); 192 return 0; 193 } 194 src = UREG(REG_TYPE_R, index); 195 break; 196 case TGSI_FILE_INPUT: 197 /* XXX: Packing COL1, FOGC into a single attribute works for 198 * texenv programs, but will fail for real fragment programs 199 * that use these attributes and expect them to be a full 4 200 * components wide. Could use a texcoord to pass these 201 * attributes if necessary, but that won't work in the general 202 * case. 203 * 204 * We also use a texture coordinate to pass wpos when possible. 205 */ 206 207 sem_name = p->shader->info.input_semantic_name[index]; 208 sem_ind = p->shader->info.input_semantic_index[index]; 209 210 switch (sem_name) { 211 case TGSI_SEMANTIC_POSITION: 212 { 213 /* for fragcoord */ 214 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); 215 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 216 break; 217 } 218 case TGSI_SEMANTIC_COLOR: 219 if (sem_ind == 0) { 220 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 221 } 222 else { 223 /* secondary color */ 224 assert(sem_ind == 1); 225 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 226 src = swizzle(src, X, Y, Z, ONE); 227 } 228 break; 229 case TGSI_SEMANTIC_FOG: 230 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 231 src = swizzle(src, W, W, W, W); 232 break; 233 case TGSI_SEMANTIC_GENERIC: 234 { 235 int real_tex_unit = get_mapping(fs, sem_ind); 236 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 237 break; 238 } 239 case TGSI_SEMANTIC_FACE: 240 { 241 /* for back/front faces */ 242 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); 243 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); 244 break; 245 } 246 default: 247 i915_program_error(p, "Bad source->Index"); 248 return 0; 249 } 250 break; 251 252 case TGSI_FILE_IMMEDIATE: 253 assert(index < p->num_immediates); 254 index = p->immediates_map[index]; 255 /* fall-through */ 256 case TGSI_FILE_CONSTANT: 257 src = UREG(REG_TYPE_CONST, index); 258 break; 259 260 default: 261 i915_program_error(p, "Bad source->File"); 262 return 0; 263 } 264 265 src = swizzle(src, 266 source->Register.SwizzleX, 267 source->Register.SwizzleY, 268 source->Register.SwizzleZ, 269 source->Register.SwizzleW); 270 271 /* There's both negate-all-components and per-component negation. 272 * Try to handle both here. 273 */ 274 { 275 int n = source->Register.Negate; 276 src = negate(src, n, n, n, n); 277 } 278 279 /* no abs() */ 280#if 0 281 /* XXX assertions disabled to allow arbfplight.c to run */ 282 /* XXX enable these assertions, or fix things */ 283 assert(!source->Register.Absolute); 284#endif 285 if (source->Register.Absolute) 286 debug_printf("Unhandled absolute value\n"); 287 288 return src; 289} 290 291 292/** 293 * Construct a ureg for a destination register. 294 */ 295static uint 296get_result_vector(struct i915_fp_compile *p, 297 const struct i915_full_dst_register *dest) 298{ 299 switch (dest->Register.File) { 300 case TGSI_FILE_OUTPUT: 301 { 302 uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; 303 switch (sem_name) { 304 case TGSI_SEMANTIC_POSITION: 305 return UREG(REG_TYPE_OD, 0); 306 case TGSI_SEMANTIC_COLOR: 307 return UREG(REG_TYPE_OC, 0); 308 default: 309 i915_program_error(p, "Bad inst->DstReg.Index/semantics"); 310 return 0; 311 } 312 } 313 case TGSI_FILE_TEMPORARY: 314 return UREG(REG_TYPE_R, dest->Register.Index); 315 default: 316 i915_program_error(p, "Bad inst->DstReg.File"); 317 return 0; 318 } 319} 320 321 322/** 323 * Compute flags for saturation and writemask. 324 */ 325static uint 326get_result_flags(const struct i915_full_instruction *inst) 327{ 328 const uint writeMask 329 = inst->Dst[0].Register.WriteMask; 330 uint flags = 0x0; 331 332 if (inst->Instruction.Saturate) 333 flags |= A0_DEST_SATURATE; 334 335 if (writeMask & TGSI_WRITEMASK_X) 336 flags |= A0_DEST_CHANNEL_X; 337 if (writeMask & TGSI_WRITEMASK_Y) 338 flags |= A0_DEST_CHANNEL_Y; 339 if (writeMask & TGSI_WRITEMASK_Z) 340 flags |= A0_DEST_CHANNEL_Z; 341 if (writeMask & TGSI_WRITEMASK_W) 342 flags |= A0_DEST_CHANNEL_W; 343 344 return flags; 345} 346 347 348/** 349 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token 350 */ 351static uint 352translate_tex_src_target(struct i915_fp_compile *p, uint tex) 353{ 354 switch (tex) { 355 case TGSI_TEXTURE_SHADOW1D: 356 /* fall-through */ 357 case TGSI_TEXTURE_1D: 358 return D0_SAMPLE_TYPE_2D; 359 360 case TGSI_TEXTURE_SHADOW2D: 361 /* fall-through */ 362 case TGSI_TEXTURE_2D: 363 return D0_SAMPLE_TYPE_2D; 364 365 case TGSI_TEXTURE_SHADOWRECT: 366 /* fall-through */ 367 case TGSI_TEXTURE_RECT: 368 return D0_SAMPLE_TYPE_2D; 369 370 case TGSI_TEXTURE_3D: 371 return D0_SAMPLE_TYPE_VOLUME; 372 373 case TGSI_TEXTURE_CUBE: 374 return D0_SAMPLE_TYPE_CUBE; 375 376 default: 377 i915_program_error(p, "TexSrc type"); 378 return 0; 379 } 380} 381 382/** 383 * Return the number of coords needed to access a given TGSI_TEXTURE_* 384 */ 385uint 386i915_num_coords(uint tex) 387{ 388 switch (tex) { 389 case TGSI_TEXTURE_SHADOW1D: 390 case TGSI_TEXTURE_1D: 391 return 1; 392 393 case TGSI_TEXTURE_SHADOW2D: 394 case TGSI_TEXTURE_2D: 395 case TGSI_TEXTURE_SHADOWRECT: 396 case TGSI_TEXTURE_RECT: 397 return 2; 398 399 case TGSI_TEXTURE_3D: 400 case TGSI_TEXTURE_CUBE: 401 return 3; 402 403 default: 404 debug_printf("Unknown texture target for num coords"); 405 return 2; 406 } 407} 408 409 410/** 411 * Generate texel lookup instruction. 412 */ 413static void 414emit_tex(struct i915_fp_compile *p, 415 const struct i915_full_instruction *inst, 416 uint opcode, 417 struct i915_fragment_shader* fs) 418{ 419 uint texture = inst->Texture.Texture; 420 uint unit = inst->Src[1].Register.Index; 421 uint tex = translate_tex_src_target( p, texture ); 422 uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); 423 uint coord = src_vector( p, &inst->Src[0], fs); 424 425 i915_emit_texld( p, 426 get_result_vector( p, &inst->Dst[0] ), 427 get_result_flags( inst ), 428 sampler, 429 coord, 430 opcode, 431 i915_num_coords(texture) ); 432} 433 434 435/** 436 * Generate a simple arithmetic instruction 437 * \param opcode the i915 opcode 438 * \param numArgs the number of input/src arguments 439 */ 440static void 441emit_simple_arith(struct i915_fp_compile *p, 442 const struct i915_full_instruction *inst, 443 uint opcode, uint numArgs, 444 struct i915_fragment_shader *fs) 445{ 446 uint arg1, arg2, arg3; 447 448 assert(numArgs <= 3); 449 450 arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); 451 arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); 452 arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); 453 454 i915_emit_arith( p, 455 opcode, 456 get_result_vector( p, &inst->Dst[0]), 457 get_result_flags( inst ), 0, 458 arg1, 459 arg2, 460 arg3 ); 461} 462 463 464/** As above, but swap the first two src regs */ 465static void 466emit_simple_arith_swap2(struct i915_fp_compile *p, 467 const struct i915_full_instruction *inst, 468 uint opcode, uint numArgs, 469 struct i915_fragment_shader *fs) 470{ 471 struct i915_full_instruction inst2; 472 473 assert(numArgs == 2); 474 475 /* transpose first two registers */ 476 inst2 = *inst; 477 inst2.Src[0] = inst->Src[1]; 478 inst2.Src[1] = inst->Src[0]; 479 480 emit_simple_arith(p, &inst2, opcode, numArgs, fs); 481} 482 483/* 484 * Translate TGSI instruction to i915 instruction. 485 * 486 * Possible concerns: 487 * 488 * DDX, DDY -- return 0 489 * SIN, COS -- could use another taylor step? 490 * LIT -- results seem a little different to sw mesa 491 * LOG -- different to mesa on negative numbers, but this is conformant. 492 */ 493static void 494i915_translate_instruction(struct i915_fp_compile *p, 495 const struct i915_full_instruction *inst, 496 struct i915_fragment_shader *fs) 497{ 498 uint writemask; 499 uint src0, src1, src2, flags; 500 uint tmp = 0; 501 502 switch (inst->Instruction.Opcode) { 503 case TGSI_OPCODE_ABS: 504 src0 = src_vector(p, &inst->Src[0], fs); 505 i915_emit_arith(p, 506 A0_MAX, 507 get_result_vector(p, &inst->Dst[0]), 508 get_result_flags(inst), 0, 509 src0, negate(src0, 1, 1, 1, 1), 0); 510 break; 511 512 case TGSI_OPCODE_ADD: 513 emit_simple_arith(p, inst, A0_ADD, 2, fs); 514 break; 515 516 case TGSI_OPCODE_CEIL: 517 src0 = src_vector(p, &inst->Src[0], fs); 518 tmp = i915_get_utemp(p); 519 flags = get_result_flags(inst); 520 i915_emit_arith(p, 521 A0_FLR, 522 tmp, 523 flags & A0_DEST_CHANNEL_ALL, 0, 524 negate(src0, 1, 1, 1, 1), 0, 0); 525 i915_emit_arith(p, 526 A0_MOV, 527 get_result_vector(p, &inst->Dst[0]), 528 flags, 0, 529 negate(tmp, 1, 1, 1, 1), 0, 0); 530 break; 531 532 case TGSI_OPCODE_CMP: 533 src0 = src_vector(p, &inst->Src[0], fs); 534 src1 = src_vector(p, &inst->Src[1], fs); 535 src2 = src_vector(p, &inst->Src[2], fs); 536 i915_emit_arith(p, A0_CMP, 537 get_result_vector(p, &inst->Dst[0]), 538 get_result_flags(inst), 539 0, src0, src2, src1); /* NOTE: order of src2, src1 */ 540 break; 541 542 case TGSI_OPCODE_COS: 543 src0 = src_vector(p, &inst->Src[0], fs); 544 tmp = i915_get_utemp(p); 545 546 i915_emit_arith(p, 547 A0_MUL, 548 tmp, A0_DEST_CHANNEL_X, 0, 549 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 550 551 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 552 553 /* 554 * t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1 555 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 556 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 557 * result = DP4 t0, cos_constants 558 */ 559 i915_emit_arith(p, 560 A0_MUL, 561 tmp, A0_DEST_CHANNEL_XY, 0, 562 swizzle(tmp, X, X, ONE, ONE), 563 swizzle(tmp, X, ONE, ONE, ONE), 0); 564 565 i915_emit_arith(p, 566 A0_MUL, 567 tmp, A0_DEST_CHANNEL_XYZ, 0, 568 swizzle(tmp, X, Y, X, ONE), 569 swizzle(tmp, X, X, ONE, ONE), 0); 570 571 i915_emit_arith(p, 572 A0_MUL, 573 tmp, A0_DEST_CHANNEL_XYZ, 0, 574 swizzle(tmp, X, X, Z, ONE), 575 swizzle(tmp, Z, ONE, ONE, ONE), 0); 576 577 i915_emit_arith(p, 578 A0_DP4, 579 get_result_vector(p, &inst->Dst[0]), 580 get_result_flags(inst), 0, 581 swizzle(tmp, ONE, Z, Y, X), 582 i915_emit_const4fv(p, cos_constants), 0); 583 break; 584 585 case TGSI_OPCODE_DDX: 586 case TGSI_OPCODE_DDY: 587 /* XXX We just output 0 here */ 588 debug_printf("Punting DDX/DDX\n"); 589 src0 = get_result_vector(p, &inst->Dst[0]); 590 i915_emit_arith(p, 591 A0_MOV, 592 get_result_vector(p, &inst->Dst[0]), 593 get_result_flags(inst), 0, 594 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); 595 break; 596 597 case TGSI_OPCODE_DP2: 598 src0 = src_vector(p, &inst->Src[0], fs); 599 src1 = src_vector(p, &inst->Src[1], fs); 600 601 i915_emit_arith(p, 602 A0_DP3, 603 get_result_vector(p, &inst->Dst[0]), 604 get_result_flags(inst), 0, 605 swizzle(src0, X, Y, ZERO, ZERO), src1, 0); 606 break; 607 608 case TGSI_OPCODE_DP3: 609 emit_simple_arith(p, inst, A0_DP3, 2, fs); 610 break; 611 612 case TGSI_OPCODE_DP4: 613 emit_simple_arith(p, inst, A0_DP4, 2, fs); 614 break; 615 616 case TGSI_OPCODE_DPH: 617 src0 = src_vector(p, &inst->Src[0], fs); 618 src1 = src_vector(p, &inst->Src[1], fs); 619 620 i915_emit_arith(p, 621 A0_DP4, 622 get_result_vector(p, &inst->Dst[0]), 623 get_result_flags(inst), 0, 624 swizzle(src0, X, Y, Z, ONE), src1, 0); 625 break; 626 627 case TGSI_OPCODE_DST: 628 src0 = src_vector(p, &inst->Src[0], fs); 629 src1 = src_vector(p, &inst->Src[1], fs); 630 631 /* result[0] = 1 * 1; 632 * result[1] = a[1] * b[1]; 633 * result[2] = a[2] * 1; 634 * result[3] = 1 * b[3]; 635 */ 636 i915_emit_arith(p, 637 A0_MUL, 638 get_result_vector(p, &inst->Dst[0]), 639 get_result_flags(inst), 0, 640 swizzle(src0, ONE, Y, Z, ONE), 641 swizzle(src1, ONE, Y, ONE, W), 0); 642 break; 643 644 case TGSI_OPCODE_END: 645 /* no-op */ 646 break; 647 648 case TGSI_OPCODE_EX2: 649 src0 = src_vector(p, &inst->Src[0], fs); 650 651 i915_emit_arith(p, 652 A0_EXP, 653 get_result_vector(p, &inst->Dst[0]), 654 get_result_flags(inst), 0, 655 swizzle(src0, X, X, X, X), 0, 0); 656 break; 657 658 case TGSI_OPCODE_FLR: 659 emit_simple_arith(p, inst, A0_FLR, 1, fs); 660 break; 661 662 case TGSI_OPCODE_FRC: 663 emit_simple_arith(p, inst, A0_FRC, 1, fs); 664 break; 665 666 case TGSI_OPCODE_KILL_IF: 667 /* kill if src[0].x < 0 || src[0].y < 0 ... */ 668 src0 = src_vector(p, &inst->Src[0], fs); 669 tmp = i915_get_utemp(p); 670 671 i915_emit_texld(p, 672 tmp, /* dest reg: a dummy reg */ 673 A0_DEST_CHANNEL_ALL, /* dest writemask */ 674 0, /* sampler */ 675 src0, /* coord*/ 676 T0_TEXKILL, /* opcode */ 677 1); /* num_coord */ 678 break; 679 680 case TGSI_OPCODE_KILL: 681 /* unconditional kill */ 682 tmp = i915_get_utemp(p); 683 684 i915_emit_texld(p, 685 tmp, /* dest reg: a dummy reg */ 686 A0_DEST_CHANNEL_ALL, /* dest writemask */ 687 0, /* sampler */ 688 negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */ 689 T0_TEXKILL, /* opcode */ 690 1); /* num_coord */ 691 break; 692 693 case TGSI_OPCODE_LG2: 694 src0 = src_vector(p, &inst->Src[0], fs); 695 696 i915_emit_arith(p, 697 A0_LOG, 698 get_result_vector(p, &inst->Dst[0]), 699 get_result_flags(inst), 0, 700 swizzle(src0, X, X, X, X), 0, 0); 701 break; 702 703 case TGSI_OPCODE_LIT: 704 src0 = src_vector(p, &inst->Src[0], fs); 705 tmp = i915_get_utemp(p); 706 707 /* tmp = max( a.xyzw, a.00zw ) 708 * XXX: Clamp tmp.w to -128..128 709 * tmp.y = log(tmp.y) 710 * tmp.y = tmp.w * tmp.y 711 * tmp.y = exp(tmp.y) 712 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 713 */ 714 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 715 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 716 717 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 718 swizzle(tmp, Y, Y, Y, Y), 0, 0); 719 720 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 721 swizzle(tmp, ZERO, Y, ZERO, ZERO), 722 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 723 724 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 725 swizzle(tmp, Y, Y, Y, Y), 0, 0); 726 727 i915_emit_arith(p, A0_CMP, 728 get_result_vector(p, &inst->Dst[0]), 729 get_result_flags(inst), 0, 730 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 731 swizzle(tmp, ONE, X, ZERO, ONE), 732 swizzle(tmp, ONE, X, Y, ONE)); 733 734 break; 735 736 case TGSI_OPCODE_LRP: 737 src0 = src_vector(p, &inst->Src[0], fs); 738 src1 = src_vector(p, &inst->Src[1], fs); 739 src2 = src_vector(p, &inst->Src[2], fs); 740 flags = get_result_flags(inst); 741 tmp = i915_get_utemp(p); 742 743 /* b*a + c*(1-a) 744 * 745 * b*a + c - ca 746 * 747 * tmp = b*a + c, 748 * result = (-c)*a + tmp 749 */ 750 i915_emit_arith(p, A0_MAD, tmp, 751 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 752 753 i915_emit_arith(p, A0_MAD, 754 get_result_vector(p, &inst->Dst[0]), 755 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 756 break; 757 758 case TGSI_OPCODE_MAD: 759 emit_simple_arith(p, inst, A0_MAD, 3, fs); 760 break; 761 762 case TGSI_OPCODE_MAX: 763 emit_simple_arith(p, inst, A0_MAX, 2, fs); 764 break; 765 766 case TGSI_OPCODE_MIN: 767 emit_simple_arith(p, inst, A0_MIN, 2, fs); 768 break; 769 770 case TGSI_OPCODE_MOV: 771 emit_simple_arith(p, inst, A0_MOV, 1, fs); 772 break; 773 774 case TGSI_OPCODE_MUL: 775 emit_simple_arith(p, inst, A0_MUL, 2, fs); 776 break; 777 778 case TGSI_OPCODE_NOP: 779 break; 780 781 case TGSI_OPCODE_POW: 782 src0 = src_vector(p, &inst->Src[0], fs); 783 src1 = src_vector(p, &inst->Src[1], fs); 784 tmp = i915_get_utemp(p); 785 flags = get_result_flags(inst); 786 787 /* XXX: masking on intermediate values, here and elsewhere. 788 */ 789 i915_emit_arith(p, 790 A0_LOG, 791 tmp, A0_DEST_CHANNEL_X, 0, 792 swizzle(src0, X, X, X, X), 0, 0); 793 794 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 795 796 i915_emit_arith(p, 797 A0_EXP, 798 get_result_vector(p, &inst->Dst[0]), 799 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 800 break; 801 802 case TGSI_OPCODE_RET: 803 /* XXX: no-op? */ 804 break; 805 806 case TGSI_OPCODE_RCP: 807 src0 = src_vector(p, &inst->Src[0], fs); 808 809 i915_emit_arith(p, 810 A0_RCP, 811 get_result_vector(p, &inst->Dst[0]), 812 get_result_flags(inst), 0, 813 swizzle(src0, X, X, X, X), 0, 0); 814 break; 815 816 case TGSI_OPCODE_RSQ: 817 src0 = src_vector(p, &inst->Src[0], fs); 818 819 i915_emit_arith(p, 820 A0_RSQ, 821 get_result_vector(p, &inst->Dst[0]), 822 get_result_flags(inst), 0, 823 swizzle(src0, X, X, X, X), 0, 0); 824 break; 825 826 case TGSI_OPCODE_SCS: 827 src0 = src_vector(p, &inst->Src[0], fs); 828 tmp = i915_get_utemp(p); 829 830 /* 831 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 832 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 833 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 834 * scs.x = DP4 t1, scs_sin_constants 835 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 836 * scs.y = DP4 t1, scs_cos_constants 837 */ 838 i915_emit_arith(p, 839 A0_MUL, 840 tmp, A0_DEST_CHANNEL_XY, 0, 841 swizzle(src0, X, X, ONE, ONE), 842 swizzle(src0, X, ONE, ONE, ONE), 0); 843 844 i915_emit_arith(p, 845 A0_MUL, 846 tmp, A0_DEST_CHANNEL_ALL, 0, 847 swizzle(tmp, X, Y, X, Y), 848 swizzle(tmp, X, X, ONE, ONE), 0); 849 850 writemask = inst->Dst[0].Register.WriteMask; 851 852 if (writemask & TGSI_WRITEMASK_Y) { 853 uint tmp1; 854 855 if (writemask & TGSI_WRITEMASK_X) 856 tmp1 = i915_get_utemp(p); 857 else 858 tmp1 = tmp; 859 860 i915_emit_arith(p, 861 A0_MUL, 862 tmp1, A0_DEST_CHANNEL_ALL, 0, 863 swizzle(tmp, X, Y, Y, W), 864 swizzle(tmp, X, Z, ONE, ONE), 0); 865 866 i915_emit_arith(p, 867 A0_DP4, 868 get_result_vector(p, &inst->Dst[0]), 869 A0_DEST_CHANNEL_Y, 0, 870 swizzle(tmp1, W, Z, Y, X), 871 i915_emit_const4fv(p, scs_sin_constants), 0); 872 } 873 874 if (writemask & TGSI_WRITEMASK_X) { 875 i915_emit_arith(p, 876 A0_MUL, 877 tmp, A0_DEST_CHANNEL_XYZ, 0, 878 swizzle(tmp, X, X, Z, ONE), 879 swizzle(tmp, Z, ONE, ONE, ONE), 0); 880 881 i915_emit_arith(p, 882 A0_DP4, 883 get_result_vector(p, &inst->Dst[0]), 884 A0_DEST_CHANNEL_X, 0, 885 swizzle(tmp, ONE, Z, Y, X), 886 i915_emit_const4fv(p, scs_cos_constants), 0); 887 } 888 break; 889 890 case TGSI_OPCODE_SEQ: 891 /* if we're both >= and <= then we're == */ 892 src0 = src_vector(p, &inst->Src[0], fs); 893 src1 = src_vector(p, &inst->Src[1], fs); 894 tmp = i915_get_utemp(p); 895 896 i915_emit_arith(p, 897 A0_SGE, 898 tmp, A0_DEST_CHANNEL_ALL, 0, 899 src0, 900 src1, 0); 901 902 i915_emit_arith(p, 903 A0_SGE, 904 get_result_vector(p, &inst->Dst[0]), 905 A0_DEST_CHANNEL_ALL, 0, 906 src1, 907 src0, 0); 908 909 i915_emit_arith(p, 910 A0_MUL, 911 get_result_vector(p, &inst->Dst[0]), 912 A0_DEST_CHANNEL_ALL, 0, 913 get_result_vector(p, &inst->Dst[0]), 914 tmp, 0); 915 916 break; 917 918 case TGSI_OPCODE_SGE: 919 emit_simple_arith(p, inst, A0_SGE, 2, fs); 920 break; 921 922 case TGSI_OPCODE_SIN: 923 src0 = src_vector(p, &inst->Src[0], fs); 924 tmp = i915_get_utemp(p); 925 926 i915_emit_arith(p, 927 A0_MUL, 928 tmp, A0_DEST_CHANNEL_X, 0, 929 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 930 931 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 932 933 /* 934 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 935 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 936 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 937 * result = DP4 t1.wzyx, sin_constants 938 */ 939 i915_emit_arith(p, 940 A0_MUL, 941 tmp, A0_DEST_CHANNEL_XY, 0, 942 swizzle(tmp, X, X, ONE, ONE), 943 swizzle(tmp, X, ONE, ONE, ONE), 0); 944 945 i915_emit_arith(p, 946 A0_MUL, 947 tmp, A0_DEST_CHANNEL_ALL, 0, 948 swizzle(tmp, X, Y, X, Y), 949 swizzle(tmp, X, X, ONE, ONE), 0); 950 951 i915_emit_arith(p, 952 A0_MUL, 953 tmp, A0_DEST_CHANNEL_ALL, 0, 954 swizzle(tmp, X, Y, Y, W), 955 swizzle(tmp, X, Z, ONE, ONE), 0); 956 957 i915_emit_arith(p, 958 A0_DP4, 959 get_result_vector(p, &inst->Dst[0]), 960 get_result_flags(inst), 0, 961 swizzle(tmp, W, Z, Y, X), 962 i915_emit_const4fv(p, sin_constants), 0); 963 break; 964 965 case TGSI_OPCODE_SLE: 966 /* like SGE, but swap reg0, reg1 */ 967 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); 968 break; 969 970 case TGSI_OPCODE_SLT: 971 emit_simple_arith(p, inst, A0_SLT, 2, fs); 972 break; 973 974 case TGSI_OPCODE_SGT: 975 /* like SLT, but swap reg0, reg1 */ 976 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); 977 break; 978 979 case TGSI_OPCODE_SNE: 980 /* if we're < or > then we're != */ 981 src0 = src_vector(p, &inst->Src[0], fs); 982 src1 = src_vector(p, &inst->Src[1], fs); 983 tmp = i915_get_utemp(p); 984 985 i915_emit_arith(p, 986 A0_SLT, 987 tmp, 988 A0_DEST_CHANNEL_ALL, 0, 989 src0, 990 src1, 0); 991 992 i915_emit_arith(p, 993 A0_SLT, 994 get_result_vector(p, &inst->Dst[0]), 995 A0_DEST_CHANNEL_ALL, 0, 996 src1, 997 src0, 0); 998 999 i915_emit_arith(p, 1000 A0_ADD, 1001 get_result_vector(p, &inst->Dst[0]), 1002 A0_DEST_CHANNEL_ALL, 0, 1003 get_result_vector(p, &inst->Dst[0]), 1004 tmp, 0); 1005 break; 1006 1007 case TGSI_OPCODE_SSG: 1008 /* compute (src>0) - (src<0) */ 1009 src0 = src_vector(p, &inst->Src[0], fs); 1010 tmp = i915_get_utemp(p); 1011 1012 i915_emit_arith(p, 1013 A0_SLT, 1014 tmp, 1015 A0_DEST_CHANNEL_ALL, 0, 1016 src0, 1017 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); 1018 1019 i915_emit_arith(p, 1020 A0_SLT, 1021 get_result_vector(p, &inst->Dst[0]), 1022 A0_DEST_CHANNEL_ALL, 0, 1023 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 1024 src0, 0); 1025 1026 i915_emit_arith(p, 1027 A0_ADD, 1028 get_result_vector(p, &inst->Dst[0]), 1029 A0_DEST_CHANNEL_ALL, 0, 1030 get_result_vector(p, &inst->Dst[0]), 1031 negate(tmp, 1, 1, 1, 1), 0); 1032 break; 1033 1034 case TGSI_OPCODE_SUB: 1035 src0 = src_vector(p, &inst->Src[0], fs); 1036 src1 = src_vector(p, &inst->Src[1], fs); 1037 1038 i915_emit_arith(p, 1039 A0_ADD, 1040 get_result_vector(p, &inst->Dst[0]), 1041 get_result_flags(inst), 0, 1042 src0, negate(src1, 1, 1, 1, 1), 0); 1043 break; 1044 1045 case TGSI_OPCODE_TEX: 1046 emit_tex(p, inst, T0_TEXLD, fs); 1047 break; 1048 1049 case TGSI_OPCODE_TRUNC: 1050 emit_simple_arith(p, inst, A0_TRC, 1, fs); 1051 break; 1052 1053 case TGSI_OPCODE_TXB: 1054 emit_tex(p, inst, T0_TEXLDB, fs); 1055 break; 1056 1057 case TGSI_OPCODE_TXP: 1058 emit_tex(p, inst, T0_TEXLDP, fs); 1059 break; 1060 1061 case TGSI_OPCODE_XPD: 1062 /* Cross product: 1063 * result.x = src0.y * src1.z - src0.z * src1.y; 1064 * result.y = src0.z * src1.x - src0.x * src1.z; 1065 * result.z = src0.x * src1.y - src0.y * src1.x; 1066 * result.w = undef; 1067 */ 1068 src0 = src_vector(p, &inst->Src[0], fs); 1069 src1 = src_vector(p, &inst->Src[1], fs); 1070 tmp = i915_get_utemp(p); 1071 1072 i915_emit_arith(p, 1073 A0_MUL, 1074 tmp, A0_DEST_CHANNEL_ALL, 0, 1075 swizzle(src0, Z, X, Y, ONE), 1076 swizzle(src1, Y, Z, X, ONE), 0); 1077 1078 i915_emit_arith(p, 1079 A0_MAD, 1080 get_result_vector(p, &inst->Dst[0]), 1081 get_result_flags(inst), 0, 1082 swizzle(src0, Y, Z, X, ONE), 1083 swizzle(src1, Z, X, Y, ONE), 1084 negate(tmp, 1, 1, 1, 0)); 1085 break; 1086 1087 default: 1088 i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); 1089 p->error = 1; 1090 return; 1091 } 1092 1093 i915_release_utemps(p); 1094} 1095 1096 1097static void i915_translate_token(struct i915_fp_compile *p, 1098 const union i915_full_token *token, 1099 struct i915_fragment_shader *fs) 1100{ 1101 struct i915_fragment_shader *ifs = p->shader; 1102 switch( token->Token.Type ) { 1103 case TGSI_TOKEN_TYPE_PROPERTY: 1104 /* 1105 * We only support one cbuf, but we still need to ignore the property 1106 * correctly so we don't hit the assert at the end of the switch case. 1107 */ 1108 assert(token->FullProperty.Property.PropertyName == 1109 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); 1110 break; 1111 1112 case TGSI_TOKEN_TYPE_DECLARATION: 1113 if (token->FullDeclaration.Declaration.File 1114 == TGSI_FILE_CONSTANT) { 1115 uint i; 1116 for (i = token->FullDeclaration.Range.First; 1117 i <= MIN2(token->FullDeclaration.Range.Last, I915_MAX_CONSTANT - 1); 1118 i++) { 1119 assert(ifs->constant_flags[i] == 0x0); 1120 ifs->constant_flags[i] = I915_CONSTFLAG_USER; 1121 ifs->num_constants = MAX2(ifs->num_constants, i + 1); 1122 } 1123 } 1124 else if (token->FullDeclaration.Declaration.File 1125 == TGSI_FILE_TEMPORARY) { 1126 uint i; 1127 for (i = token->FullDeclaration.Range.First; 1128 i <= token->FullDeclaration.Range.Last; 1129 i++) { 1130 if (i >= I915_MAX_TEMPORARY) 1131 debug_printf("Too many temps (%d)\n",i); 1132 else 1133 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ 1134 p->temp_flag |= (1 << i); /* mark temp as used */ 1135 } 1136 } 1137 break; 1138 1139 case TGSI_TOKEN_TYPE_IMMEDIATE: 1140 { 1141 const struct tgsi_full_immediate *imm 1142 = &token->FullImmediate; 1143 const uint pos = p->num_immediates++; 1144 uint j; 1145 assert( imm->Immediate.NrTokens <= 4 + 1 ); 1146 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { 1147 p->immediates[pos][j] = imm->u[j].Float; 1148 } 1149 } 1150 break; 1151 1152 case TGSI_TOKEN_TYPE_INSTRUCTION: 1153 if (p->first_instruction) { 1154 /* resolve location of immediates */ 1155 uint i, j; 1156 for (i = 0; i < p->num_immediates; i++) { 1157 /* find constant slot for this immediate */ 1158 for (j = 0; j < I915_MAX_CONSTANT; j++) { 1159 if (ifs->constant_flags[j] == 0x0) { 1160 memcpy(ifs->constants[j], 1161 p->immediates[i], 1162 4 * sizeof(float)); 1163 /*printf("immediate %d maps to const %d\n", i, j);*/ 1164 ifs->constant_flags[j] = 0xf; /* all four comps used */ 1165 p->immediates_map[i] = j; 1166 ifs->num_constants = MAX2(ifs->num_constants, j + 1); 1167 break; 1168 } 1169 } 1170 } 1171 1172 p->first_instruction = FALSE; 1173 } 1174 1175 i915_translate_instruction(p, &token->FullInstruction, fs); 1176 break; 1177 1178 default: 1179 assert( 0 ); 1180 } 1181 1182} 1183 1184/** 1185 * Translate TGSI fragment shader into i915 hardware instructions. 1186 * \param p the translation state 1187 * \param tokens the TGSI token array 1188 */ 1189static void 1190i915_translate_instructions(struct i915_fp_compile *p, 1191 const struct i915_token_list *tokens, 1192 struct i915_fragment_shader *fs) 1193{ 1194 int i; 1195 for(i = 0; i<tokens->NumTokens; i++) { 1196 i915_translate_token(p, &tokens->Tokens[i], fs); 1197 } 1198} 1199 1200 1201static struct i915_fp_compile * 1202i915_init_compile(struct i915_context *i915, 1203 struct i915_fragment_shader *ifs) 1204{ 1205 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); 1206 int i; 1207 1208 p->shader = ifs; 1209 1210 /* Put new constants at end of const buffer, growing downward. 1211 * The problem is we don't know how many user-defined constants might 1212 * be specified with pipe->set_constant_buffer(). 1213 * Should pre-scan the user's program to determine the highest-numbered 1214 * constant referenced. 1215 */ 1216 ifs->num_constants = 0; 1217 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); 1218 1219 memset(&p->register_phases, 0, sizeof(p->register_phases)); 1220 1221 for (i = 0; i < I915_TEX_UNITS; i++) 1222 ifs->generic_mapping[i] = -1; 1223 1224 p->first_instruction = TRUE; 1225 1226 p->nr_tex_indirect = 1; /* correct? */ 1227 p->nr_tex_insn = 0; 1228 p->nr_alu_insn = 0; 1229 p->nr_decl_insn = 0; 1230 1231 p->csr = p->program; 1232 p->decl = p->declarations; 1233 p->decl_s = 0; 1234 p->decl_t = 0; 1235 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; 1236 p->utemp_flag = ~0x7; 1237 1238 /* initialize the first program word */ 1239 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; 1240 1241 return p; 1242} 1243 1244 1245/* Copy compile results to the fragment program struct and destroy the 1246 * compilation context. 1247 */ 1248static void 1249i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) 1250{ 1251 struct i915_fragment_shader *ifs = p->shader; 1252 unsigned long program_size = (unsigned long) (p->csr - p->program); 1253 unsigned long decl_size = (unsigned long) (p->decl - p->declarations); 1254 1255 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 1256 debug_printf("Exceeded max nr indirect texture lookups\n"); 1257 1258 if (p->nr_tex_insn > I915_MAX_TEX_INSN) 1259 i915_program_error(p, "Exceeded max TEX instructions"); 1260 1261 if (p->nr_alu_insn > I915_MAX_ALU_INSN) 1262 i915_program_error(p, "Exceeded max ALU instructions"); 1263 1264 if (p->nr_decl_insn > I915_MAX_DECL_INSN) 1265 i915_program_error(p, "Exceeded max DECL instructions"); 1266 1267 if (p->error) { 1268 p->NumNativeInstructions = 0; 1269 p->NumNativeAluInstructions = 0; 1270 p->NumNativeTexInstructions = 0; 1271 p->NumNativeTexIndirections = 0; 1272 1273 i915_use_passthrough_shader(ifs); 1274 } 1275 else { 1276 p->NumNativeInstructions 1277 = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; 1278 p->NumNativeAluInstructions = p->nr_alu_insn; 1279 p->NumNativeTexInstructions = p->nr_tex_insn; 1280 p->NumNativeTexIndirections = p->nr_tex_indirect; 1281 1282 /* patch in the program length */ 1283 p->declarations[0] |= program_size + decl_size - 2; 1284 1285 /* Copy compilation results to fragment program struct: 1286 */ 1287 assert(!ifs->decl); 1288 assert(!ifs->program); 1289 1290 ifs->decl 1291 = (uint *) MALLOC(decl_size * sizeof(uint)); 1292 ifs->program 1293 = (uint *) MALLOC(program_size * sizeof(uint)); 1294 1295 if (ifs->decl) { 1296 ifs->decl_len = decl_size; 1297 1298 memcpy(ifs->decl, 1299 p->declarations, 1300 decl_size * sizeof(uint)); 1301 } 1302 1303 if (ifs->program) { 1304 ifs->program_len = program_size; 1305 1306 memcpy(ifs->program, 1307 p->program, 1308 program_size * sizeof(uint)); 1309 } 1310 } 1311 1312 /* Release the compilation struct: 1313 */ 1314 FREE(p); 1315} 1316 1317 1318 1319 1320 1321/** 1322 * Rather than trying to intercept and jiggle depth writes during 1323 * emit, just move the value into its correct position at the end of 1324 * the program: 1325 */ 1326static void 1327i915_fixup_depth_write(struct i915_fp_compile *p) 1328{ 1329 /* XXX assuming pos/depth is always in output[0] */ 1330 if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 1331 const uint depth = UREG(REG_TYPE_OD, 0); 1332 1333 i915_emit_arith(p, 1334 A0_MOV, /* opcode */ 1335 depth, /* dest reg */ 1336 A0_DEST_CHANNEL_W, /* write mask */ 1337 0, /* saturate? */ 1338 swizzle(depth, X, Y, Z, Z), /* src0 */ 1339 0, 0 /* src1, src2 */); 1340 } 1341} 1342 1343 1344void 1345i915_translate_fragment_program( struct i915_context *i915, 1346 struct i915_fragment_shader *fs) 1347{ 1348 struct i915_fp_compile *p; 1349 const struct tgsi_token *tokens = fs->state.tokens; 1350 struct i915_token_list* i_tokens; 1351 1352#if 0 1353 tgsi_dump(tokens, 0); 1354#endif 1355 1356 /* hw doesn't seem to like empty frag programs, even when the depth write 1357 * fixup gets emitted below - may that one is fishy, too? */ 1358 if (fs->info.num_instructions == 1) { 1359 i915_use_passthrough_shader(fs); 1360 1361 return; 1362 } 1363 1364 p = i915_init_compile(i915, fs); 1365 1366 i_tokens = i915_optimize(tokens); 1367 i915_translate_instructions(p, i_tokens, fs); 1368 i915_fixup_depth_write(p); 1369 1370 i915_fini_compile(i915, p); 1371 i915_optimize_free(i_tokens); 1372 1373#if 0 1374 i915_disassemble_program(NULL, fs->program, fs->program_len); 1375#endif 1376} 1377