1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include <stdarg.h> 30 31#include "i915_reg.h" 32#include "i915_context.h" 33#include "i915_fpc.h" 34 35#include "pipe/p_shader_tokens.h" 36#include "util/u_math.h" 37#include "util/u_memory.h" 38#include "util/u_string.h" 39#include "tgsi/tgsi_parse.h" 40#include "tgsi/tgsi_dump.h" 41 42#include "draw/draw_vertex.h" 43 44#ifndef M_PI 45#define M_PI 3.14159265358979323846 46#endif 47 48/** 49 * Simple pass-through fragment shader to use when we don't have 50 * a real shader (or it fails to compile for some reason). 51 */ 52static unsigned passthrough_decl[] = 53{ 54 _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), 55 56 /* declare input color: 57 */ 58 (D0_DCL | 59 (REG_TYPE_T << D0_TYPE_SHIFT) | 60 (T_DIFFUSE << D0_NR_SHIFT) | 61 D0_CHANNEL_ALL), 62 0, 63 0, 64}; 65 66static unsigned passthrough_program[] = 67{ 68 /* move to output color: 69 */ 70 (A0_MOV | 71 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | 72 A0_DEST_CHANNEL_ALL | 73 (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | 74 (T_DIFFUSE << A0_SRC0_NR_SHIFT)), 75 0x01230000, /* .xyzw */ 76 0 77}; 78 79 80/* 1, -1/3!, 1/5!, -1/7! */ 81static const float scs_sin_constants[4] = { 1.0, 82 -1.0f / (3 * 2 * 1), 83 1.0f / (5 * 4 * 3 * 2 * 1), 84 -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) 85}; 86 87/* 1, -1/2!, 1/4!, -1/6! */ 88static const float scs_cos_constants[4] = { 1.0, 89 -1.0f / (2 * 1), 90 1.0f / (4 * 3 * 2 * 1), 91 -1.0f / (6 * 5 * 4 * 3 * 2 * 1) 92}; 93 94/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ 95static const float sin_constants[4] = { 2.0 * M_PI, 96 -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), 97 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), 98 -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) 99}; 100 101/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ 102static const float cos_constants[4] = { 1.0, 103 -4.0f * M_PI * M_PI / (2 * 1), 104 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), 105 -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) 106}; 107 108 109 110/** 111 * component-wise negation of ureg 112 */ 113static INLINE int 114negate(int reg, int x, int y, int z, int w) 115{ 116 /* Another neat thing about the UREG representation */ 117 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | 118 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | 119 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | 120 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); 121} 122 123 124/** 125 * In the event of a translation failure, we'll generate a simple color 126 * pass-through program. 127 */ 128static void 129i915_use_passthrough_shader(struct i915_fragment_shader *fs) 130{ 131 fs->program = (uint *) MALLOC(sizeof(passthrough_program)); 132 fs->decl = (uint *) MALLOC(sizeof(passthrough_decl)); 133 if (fs->program) { 134 memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); 135 memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl)); 136 fs->program_len = Elements(passthrough_program); 137 fs->decl_len = Elements(passthrough_decl); 138 } 139 fs->num_constants = 0; 140} 141 142 143void 144i915_program_error(struct i915_fp_compile *p, const char *msg, ...) 145{ 146 va_list args; 147 char buffer[1024]; 148 149 debug_printf("i915_program_error: "); 150 va_start( args, msg ); 151 util_vsnprintf( buffer, sizeof(buffer), msg, args ); 152 va_end( args ); 153 debug_printf("%s", buffer); 154 debug_printf("\n"); 155 156 p->error = 1; 157} 158 159static uint get_mapping(struct i915_fragment_shader* fs, int unit) 160{ 161 int i; 162 for (i = 0; i < I915_TEX_UNITS; i++) 163 { 164 if (fs->generic_mapping[i] == -1) { 165 fs->generic_mapping[i] = unit; 166 return i; 167 } 168 if (fs->generic_mapping[i] == unit) 169 return i; 170 } 171 debug_printf("Exceeded max generics\n"); 172 return 0; 173} 174 175/** 176 * Construct a ureg for the given source register. Will emit 177 * constants, apply swizzling and negation as needed. 178 */ 179static uint 180src_vector(struct i915_fp_compile *p, 181 const struct i915_full_src_register *source, 182 struct i915_fragment_shader* fs) 183{ 184 uint index = source->Register.Index; 185 uint src = 0, sem_name, sem_ind; 186 187 switch (source->Register.File) { 188 case TGSI_FILE_TEMPORARY: 189 if (source->Register.Index >= I915_MAX_TEMPORARY) { 190 i915_program_error(p, "Exceeded max temporary reg"); 191 return 0; 192 } 193 src = UREG(REG_TYPE_R, index); 194 break; 195 case TGSI_FILE_INPUT: 196 /* XXX: Packing COL1, FOGC into a single attribute works for 197 * texenv programs, but will fail for real fragment programs 198 * that use these attributes and expect them to be a full 4 199 * components wide. Could use a texcoord to pass these 200 * attributes if necessary, but that won't work in the general 201 * case. 202 * 203 * We also use a texture coordinate to pass wpos when possible. 204 */ 205 206 sem_name = p->shader->info.input_semantic_name[index]; 207 sem_ind = p->shader->info.input_semantic_index[index]; 208 209 switch (sem_name) { 210 case TGSI_SEMANTIC_POSITION: 211 { 212 /* for fragcoord */ 213 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); 214 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 215 break; 216 } 217 case TGSI_SEMANTIC_COLOR: 218 if (sem_ind == 0) { 219 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 220 } 221 else { 222 /* secondary color */ 223 assert(sem_ind == 1); 224 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 225 src = swizzle(src, X, Y, Z, ONE); 226 } 227 break; 228 case TGSI_SEMANTIC_FOG: 229 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 230 src = swizzle(src, W, W, W, W); 231 break; 232 case TGSI_SEMANTIC_GENERIC: 233 { 234 int real_tex_unit = get_mapping(fs, sem_ind); 235 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 236 break; 237 } 238 case TGSI_SEMANTIC_FACE: 239 { 240 /* for back/front faces */ 241 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); 242 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); 243 break; 244 } 245 default: 246 i915_program_error(p, "Bad source->Index"); 247 return 0; 248 } 249 break; 250 251 case TGSI_FILE_IMMEDIATE: 252 assert(index < p->num_immediates); 253 index = p->immediates_map[index]; 254 /* fall-through */ 255 case TGSI_FILE_CONSTANT: 256 src = UREG(REG_TYPE_CONST, index); 257 break; 258 259 default: 260 i915_program_error(p, "Bad source->File"); 261 return 0; 262 } 263 264 src = swizzle(src, 265 source->Register.SwizzleX, 266 source->Register.SwizzleY, 267 source->Register.SwizzleZ, 268 source->Register.SwizzleW); 269 270 /* There's both negate-all-components and per-component negation. 271 * Try to handle both here. 272 */ 273 { 274 int n = source->Register.Negate; 275 src = negate(src, n, n, n, n); 276 } 277 278 /* no abs() */ 279#if 0 280 /* XXX assertions disabled to allow arbfplight.c to run */ 281 /* XXX enable these assertions, or fix things */ 282 assert(!source->Register.Absolute); 283#endif 284 if (source->Register.Absolute) 285 debug_printf("Unhandled absolute value\n"); 286 287 return src; 288} 289 290 291/** 292 * Construct a ureg for a destination register. 293 */ 294static uint 295get_result_vector(struct i915_fp_compile *p, 296 const struct i915_full_dst_register *dest) 297{ 298 switch (dest->Register.File) { 299 case TGSI_FILE_OUTPUT: 300 { 301 uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; 302 switch (sem_name) { 303 case TGSI_SEMANTIC_POSITION: 304 return UREG(REG_TYPE_OD, 0); 305 case TGSI_SEMANTIC_COLOR: 306 return UREG(REG_TYPE_OC, 0); 307 default: 308 i915_program_error(p, "Bad inst->DstReg.Index/semantics"); 309 return 0; 310 } 311 } 312 case TGSI_FILE_TEMPORARY: 313 return UREG(REG_TYPE_R, dest->Register.Index); 314 default: 315 i915_program_error(p, "Bad inst->DstReg.File"); 316 return 0; 317 } 318} 319 320 321/** 322 * Compute flags for saturation and writemask. 323 */ 324static uint 325get_result_flags(const struct i915_full_instruction *inst) 326{ 327 const uint writeMask 328 = inst->Dst[0].Register.WriteMask; 329 uint flags = 0x0; 330 331 if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) 332 flags |= A0_DEST_SATURATE; 333 334 if (writeMask & TGSI_WRITEMASK_X) 335 flags |= A0_DEST_CHANNEL_X; 336 if (writeMask & TGSI_WRITEMASK_Y) 337 flags |= A0_DEST_CHANNEL_Y; 338 if (writeMask & TGSI_WRITEMASK_Z) 339 flags |= A0_DEST_CHANNEL_Z; 340 if (writeMask & TGSI_WRITEMASK_W) 341 flags |= A0_DEST_CHANNEL_W; 342 343 return flags; 344} 345 346 347/** 348 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token 349 */ 350static uint 351translate_tex_src_target(struct i915_fp_compile *p, uint tex) 352{ 353 switch (tex) { 354 case TGSI_TEXTURE_SHADOW1D: 355 /* fall-through */ 356 case TGSI_TEXTURE_1D: 357 return D0_SAMPLE_TYPE_2D; 358 359 case TGSI_TEXTURE_SHADOW2D: 360 /* fall-through */ 361 case TGSI_TEXTURE_2D: 362 return D0_SAMPLE_TYPE_2D; 363 364 case TGSI_TEXTURE_SHADOWRECT: 365 /* fall-through */ 366 case TGSI_TEXTURE_RECT: 367 return D0_SAMPLE_TYPE_2D; 368 369 case TGSI_TEXTURE_3D: 370 return D0_SAMPLE_TYPE_VOLUME; 371 372 case TGSI_TEXTURE_CUBE: 373 return D0_SAMPLE_TYPE_CUBE; 374 375 default: 376 i915_program_error(p, "TexSrc type"); 377 return 0; 378 } 379} 380 381/** 382 * Return the number of coords needed to access a given TGSI_TEXTURE_* 383 */ 384static uint 385texture_num_coords(struct i915_fp_compile *p, uint tex) 386{ 387 switch (tex) { 388 case TGSI_TEXTURE_SHADOW1D: 389 case TGSI_TEXTURE_1D: 390 return 1; 391 392 case TGSI_TEXTURE_SHADOW2D: 393 case TGSI_TEXTURE_2D: 394 case TGSI_TEXTURE_SHADOWRECT: 395 case TGSI_TEXTURE_RECT: 396 return 2; 397 398 case TGSI_TEXTURE_3D: 399 case TGSI_TEXTURE_CUBE: 400 return 3; 401 402 default: 403 i915_program_error(p, "Num coords"); 404 return 2; 405 } 406} 407 408 409/** 410 * Generate texel lookup instruction. 411 */ 412static void 413emit_tex(struct i915_fp_compile *p, 414 const struct i915_full_instruction *inst, 415 uint opcode, 416 struct i915_fragment_shader* fs) 417{ 418 uint texture = inst->Texture.Texture; 419 uint unit = inst->Src[1].Register.Index; 420 uint tex = translate_tex_src_target( p, texture ); 421 uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); 422 uint coord = src_vector( p, &inst->Src[0], fs); 423 424 i915_emit_texld( p, 425 get_result_vector( p, &inst->Dst[0] ), 426 get_result_flags( inst ), 427 sampler, 428 coord, 429 opcode, 430 texture_num_coords(p, texture) ); 431} 432 433 434/** 435 * Generate a simple arithmetic instruction 436 * \param opcode the i915 opcode 437 * \param numArgs the number of input/src arguments 438 */ 439static void 440emit_simple_arith(struct i915_fp_compile *p, 441 const struct i915_full_instruction *inst, 442 uint opcode, uint numArgs, 443 struct i915_fragment_shader* fs) 444{ 445 uint arg1, arg2, arg3; 446 447 assert(numArgs <= 3); 448 449 arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); 450 arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); 451 arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); 452 453 i915_emit_arith( p, 454 opcode, 455 get_result_vector( p, &inst->Dst[0]), 456 get_result_flags( inst ), 0, 457 arg1, 458 arg2, 459 arg3 ); 460} 461 462 463/** As above, but swap the first two src regs */ 464static void 465emit_simple_arith_swap2(struct i915_fp_compile *p, 466 const struct i915_full_instruction *inst, 467 uint opcode, uint numArgs, 468 struct i915_fragment_shader* fs) 469{ 470 struct i915_full_instruction inst2; 471 472 assert(numArgs == 2); 473 474 /* transpose first two registers */ 475 inst2 = *inst; 476 inst2.Src[0] = inst->Src[1]; 477 inst2.Src[1] = inst->Src[0]; 478 479 emit_simple_arith(p, &inst2, opcode, numArgs, fs); 480} 481 482/* 483 * Translate TGSI instruction to i915 instruction. 484 * 485 * Possible concerns: 486 * 487 * DDX, DDY -- return 0 488 * SIN, COS -- could use another taylor step? 489 * LIT -- results seem a little different to sw mesa 490 * LOG -- different to mesa on negative numbers, but this is conformant. 491 */ 492static void 493i915_translate_instruction(struct i915_fp_compile *p, 494 const struct i915_full_instruction *inst, 495 struct i915_fragment_shader *fs) 496{ 497 uint writemask; 498 uint src0, src1, src2, flags; 499 uint tmp = 0; 500 501 switch (inst->Instruction.Opcode) { 502 case TGSI_OPCODE_ABS: 503 src0 = src_vector(p, &inst->Src[0], fs); 504 i915_emit_arith(p, 505 A0_MAX, 506 get_result_vector(p, &inst->Dst[0]), 507 get_result_flags(inst), 0, 508 src0, negate(src0, 1, 1, 1, 1), 0); 509 break; 510 511 case TGSI_OPCODE_ADD: 512 emit_simple_arith(p, inst, A0_ADD, 2, fs); 513 break; 514 515 case TGSI_OPCODE_CEIL: 516 src0 = src_vector(p, &inst->Src[0], fs); 517 tmp = i915_get_utemp(p); 518 flags = get_result_flags(inst); 519 i915_emit_arith(p, 520 A0_FLR, 521 tmp, 522 flags & A0_DEST_CHANNEL_ALL, 0, 523 negate(src0, 1, 1, 1, 1), 0, 0); 524 i915_emit_arith(p, 525 A0_MOV, 526 get_result_vector(p, &inst->Dst[0]), 527 flags, 0, 528 negate(tmp, 1, 1, 1, 1), 0, 0); 529 break; 530 531 case TGSI_OPCODE_CMP: 532 src0 = src_vector(p, &inst->Src[0], fs); 533 src1 = src_vector(p, &inst->Src[1], fs); 534 src2 = src_vector(p, &inst->Src[2], fs); 535 i915_emit_arith(p, A0_CMP, 536 get_result_vector(p, &inst->Dst[0]), 537 get_result_flags(inst), 538 0, src0, src2, src1); /* NOTE: order of src2, src1 */ 539 break; 540 541 case TGSI_OPCODE_COS: 542 src0 = src_vector(p, &inst->Src[0], fs); 543 tmp = i915_get_utemp(p); 544 545 i915_emit_arith(p, 546 A0_MUL, 547 tmp, A0_DEST_CHANNEL_X, 0, 548 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 549 550 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 551 552 /* 553 * t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1 554 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 555 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 556 * result = DP4 t0, cos_constants 557 */ 558 i915_emit_arith(p, 559 A0_MUL, 560 tmp, A0_DEST_CHANNEL_XY, 0, 561 swizzle(tmp, X, X, ONE, ONE), 562 swizzle(tmp, X, ONE, ONE, ONE), 0); 563 564 i915_emit_arith(p, 565 A0_MUL, 566 tmp, A0_DEST_CHANNEL_XYZ, 0, 567 swizzle(tmp, X, Y, X, ONE), 568 swizzle(tmp, X, X, ONE, ONE), 0); 569 570 i915_emit_arith(p, 571 A0_MUL, 572 tmp, A0_DEST_CHANNEL_XYZ, 0, 573 swizzle(tmp, X, X, Z, ONE), 574 swizzle(tmp, Z, ONE, ONE, ONE), 0); 575 576 i915_emit_arith(p, 577 A0_DP4, 578 get_result_vector(p, &inst->Dst[0]), 579 get_result_flags(inst), 0, 580 swizzle(tmp, ONE, Z, Y, X), 581 i915_emit_const4fv(p, cos_constants), 0); 582 break; 583 584 case TGSI_OPCODE_DDX: 585 case TGSI_OPCODE_DDY: 586 /* XXX We just output 0 here */ 587 debug_printf("Punting DDX/DDX\n"); 588 src0 = get_result_vector(p, &inst->Dst[0]); 589 i915_emit_arith(p, 590 A0_MOV, 591 get_result_vector(p, &inst->Dst[0]), 592 get_result_flags(inst), 0, 593 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); 594 break; 595 596 case TGSI_OPCODE_DP2: 597 src0 = src_vector(p, &inst->Src[0], fs); 598 src1 = src_vector(p, &inst->Src[1], fs); 599 600 i915_emit_arith(p, 601 A0_DP3, 602 get_result_vector(p, &inst->Dst[0]), 603 get_result_flags(inst), 0, 604 swizzle(src0, X, Y, ZERO, ZERO), src1, 0); 605 break; 606 607 case TGSI_OPCODE_DP3: 608 emit_simple_arith(p, inst, A0_DP3, 2, fs); 609 break; 610 611 case TGSI_OPCODE_DP4: 612 emit_simple_arith(p, inst, A0_DP4, 2, fs); 613 break; 614 615 case TGSI_OPCODE_DPH: 616 src0 = src_vector(p, &inst->Src[0], fs); 617 src1 = src_vector(p, &inst->Src[1], fs); 618 619 i915_emit_arith(p, 620 A0_DP4, 621 get_result_vector(p, &inst->Dst[0]), 622 get_result_flags(inst), 0, 623 swizzle(src0, X, Y, Z, ONE), src1, 0); 624 break; 625 626 case TGSI_OPCODE_DST: 627 src0 = src_vector(p, &inst->Src[0], fs); 628 src1 = src_vector(p, &inst->Src[1], fs); 629 630 /* result[0] = 1 * 1; 631 * result[1] = a[1] * b[1]; 632 * result[2] = a[2] * 1; 633 * result[3] = 1 * b[3]; 634 */ 635 i915_emit_arith(p, 636 A0_MUL, 637 get_result_vector(p, &inst->Dst[0]), 638 get_result_flags(inst), 0, 639 swizzle(src0, ONE, Y, Z, ONE), 640 swizzle(src1, ONE, Y, ONE, W), 0); 641 break; 642 643 case TGSI_OPCODE_END: 644 /* no-op */ 645 break; 646 647 case TGSI_OPCODE_EX2: 648 src0 = src_vector(p, &inst->Src[0], fs); 649 650 i915_emit_arith(p, 651 A0_EXP, 652 get_result_vector(p, &inst->Dst[0]), 653 get_result_flags(inst), 0, 654 swizzle(src0, X, X, X, X), 0, 0); 655 break; 656 657 case TGSI_OPCODE_FLR: 658 emit_simple_arith(p, inst, A0_FLR, 1, fs); 659 break; 660 661 case TGSI_OPCODE_FRC: 662 emit_simple_arith(p, inst, A0_FRC, 1, fs); 663 break; 664 665 case TGSI_OPCODE_KIL: 666 /* kill if src[0].x < 0 || src[0].y < 0 ... */ 667 src0 = src_vector(p, &inst->Src[0], fs); 668 tmp = i915_get_utemp(p); 669 670 i915_emit_texld(p, 671 tmp, /* dest reg: a dummy reg */ 672 A0_DEST_CHANNEL_ALL, /* dest writemask */ 673 0, /* sampler */ 674 src0, /* coord*/ 675 T0_TEXKILL, /* opcode */ 676 1); /* num_coord */ 677 break; 678 679 case TGSI_OPCODE_KILP: 680 /* We emit an unconditional kill; we may want to revisit 681 * if we ever implement conditionals. 682 */ 683 tmp = i915_get_utemp(p); 684 685 i915_emit_texld(p, 686 tmp, /* dest reg: a dummy reg */ 687 A0_DEST_CHANNEL_ALL, /* dest writemask */ 688 0, /* sampler */ 689 negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */ 690 T0_TEXKILL, /* opcode */ 691 1); /* num_coord */ 692 break; 693 694 case TGSI_OPCODE_LG2: 695 src0 = src_vector(p, &inst->Src[0], fs); 696 697 i915_emit_arith(p, 698 A0_LOG, 699 get_result_vector(p, &inst->Dst[0]), 700 get_result_flags(inst), 0, 701 swizzle(src0, X, X, X, X), 0, 0); 702 break; 703 704 case TGSI_OPCODE_LIT: 705 src0 = src_vector(p, &inst->Src[0], fs); 706 tmp = i915_get_utemp(p); 707 708 /* tmp = max( a.xyzw, a.00zw ) 709 * XXX: Clamp tmp.w to -128..128 710 * tmp.y = log(tmp.y) 711 * tmp.y = tmp.w * tmp.y 712 * tmp.y = exp(tmp.y) 713 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 714 */ 715 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 716 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 717 718 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 719 swizzle(tmp, Y, Y, Y, Y), 0, 0); 720 721 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 722 swizzle(tmp, ZERO, Y, ZERO, ZERO), 723 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 724 725 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 726 swizzle(tmp, Y, Y, Y, Y), 0, 0); 727 728 i915_emit_arith(p, A0_CMP, 729 get_result_vector(p, &inst->Dst[0]), 730 get_result_flags(inst), 0, 731 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 732 swizzle(tmp, ONE, X, ZERO, ONE), 733 swizzle(tmp, ONE, X, Y, ONE)); 734 735 break; 736 737 case TGSI_OPCODE_LRP: 738 src0 = src_vector(p, &inst->Src[0], fs); 739 src1 = src_vector(p, &inst->Src[1], fs); 740 src2 = src_vector(p, &inst->Src[2], fs); 741 flags = get_result_flags(inst); 742 tmp = i915_get_utemp(p); 743 744 /* b*a + c*(1-a) 745 * 746 * b*a + c - ca 747 * 748 * tmp = b*a + c, 749 * result = (-c)*a + tmp 750 */ 751 i915_emit_arith(p, A0_MAD, tmp, 752 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 753 754 i915_emit_arith(p, A0_MAD, 755 get_result_vector(p, &inst->Dst[0]), 756 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 757 break; 758 759 case TGSI_OPCODE_MAD: 760 emit_simple_arith(p, inst, A0_MAD, 3, fs); 761 break; 762 763 case TGSI_OPCODE_MAX: 764 emit_simple_arith(p, inst, A0_MAX, 2, fs); 765 break; 766 767 case TGSI_OPCODE_MIN: 768 src0 = src_vector(p, &inst->Src[0], fs); 769 src1 = src_vector(p, &inst->Src[1], fs); 770 tmp = i915_get_utemp(p); 771 flags = get_result_flags(inst); 772 773 i915_emit_arith(p, 774 A0_MAX, 775 tmp, flags & A0_DEST_CHANNEL_ALL, 0, 776 negate(src0, 1, 1, 1, 1), 777 negate(src1, 1, 1, 1, 1), 0); 778 779 i915_emit_arith(p, 780 A0_MOV, 781 get_result_vector(p, &inst->Dst[0]), 782 flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); 783 break; 784 785 case TGSI_OPCODE_MOV: 786 emit_simple_arith(p, inst, A0_MOV, 1, fs); 787 break; 788 789 case TGSI_OPCODE_MUL: 790 emit_simple_arith(p, inst, A0_MUL, 2, fs); 791 break; 792 793 case TGSI_OPCODE_NOP: 794 break; 795 796 case TGSI_OPCODE_POW: 797 src0 = src_vector(p, &inst->Src[0], fs); 798 src1 = src_vector(p, &inst->Src[1], fs); 799 tmp = i915_get_utemp(p); 800 flags = get_result_flags(inst); 801 802 /* XXX: masking on intermediate values, here and elsewhere. 803 */ 804 i915_emit_arith(p, 805 A0_LOG, 806 tmp, A0_DEST_CHANNEL_X, 0, 807 swizzle(src0, X, X, X, X), 0, 0); 808 809 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 810 811 i915_emit_arith(p, 812 A0_EXP, 813 get_result_vector(p, &inst->Dst[0]), 814 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 815 break; 816 817 case TGSI_OPCODE_RET: 818 /* XXX: no-op? */ 819 break; 820 821 case TGSI_OPCODE_RCP: 822 src0 = src_vector(p, &inst->Src[0], fs); 823 824 i915_emit_arith(p, 825 A0_RCP, 826 get_result_vector(p, &inst->Dst[0]), 827 get_result_flags(inst), 0, 828 swizzle(src0, X, X, X, X), 0, 0); 829 break; 830 831 case TGSI_OPCODE_RSQ: 832 src0 = src_vector(p, &inst->Src[0], fs); 833 834 i915_emit_arith(p, 835 A0_RSQ, 836 get_result_vector(p, &inst->Dst[0]), 837 get_result_flags(inst), 0, 838 swizzle(src0, X, X, X, X), 0, 0); 839 break; 840 841 case TGSI_OPCODE_SCS: 842 src0 = src_vector(p, &inst->Src[0], fs); 843 tmp = i915_get_utemp(p); 844 845 /* 846 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 847 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 848 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 849 * scs.x = DP4 t1, scs_sin_constants 850 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 851 * scs.y = DP4 t1, scs_cos_constants 852 */ 853 i915_emit_arith(p, 854 A0_MUL, 855 tmp, A0_DEST_CHANNEL_XY, 0, 856 swizzle(src0, X, X, ONE, ONE), 857 swizzle(src0, X, ONE, ONE, ONE), 0); 858 859 i915_emit_arith(p, 860 A0_MUL, 861 tmp, A0_DEST_CHANNEL_ALL, 0, 862 swizzle(tmp, X, Y, X, Y), 863 swizzle(tmp, X, X, ONE, ONE), 0); 864 865 writemask = inst->Dst[0].Register.WriteMask; 866 867 if (writemask & TGSI_WRITEMASK_Y) { 868 uint tmp1; 869 870 if (writemask & TGSI_WRITEMASK_X) 871 tmp1 = i915_get_utemp(p); 872 else 873 tmp1 = tmp; 874 875 i915_emit_arith(p, 876 A0_MUL, 877 tmp1, A0_DEST_CHANNEL_ALL, 0, 878 swizzle(tmp, X, Y, Y, W), 879 swizzle(tmp, X, Z, ONE, ONE), 0); 880 881 i915_emit_arith(p, 882 A0_DP4, 883 get_result_vector(p, &inst->Dst[0]), 884 A0_DEST_CHANNEL_Y, 0, 885 swizzle(tmp1, W, Z, Y, X), 886 i915_emit_const4fv(p, scs_sin_constants), 0); 887 } 888 889 if (writemask & TGSI_WRITEMASK_X) { 890 i915_emit_arith(p, 891 A0_MUL, 892 tmp, A0_DEST_CHANNEL_XYZ, 0, 893 swizzle(tmp, X, X, Z, ONE), 894 swizzle(tmp, Z, ONE, ONE, ONE), 0); 895 896 i915_emit_arith(p, 897 A0_DP4, 898 get_result_vector(p, &inst->Dst[0]), 899 A0_DEST_CHANNEL_X, 0, 900 swizzle(tmp, ONE, Z, Y, X), 901 i915_emit_const4fv(p, scs_cos_constants), 0); 902 } 903 break; 904 905 case TGSI_OPCODE_SEQ: 906 /* if we're both >= and <= then we're == */ 907 src0 = src_vector(p, &inst->Src[0], fs); 908 src1 = src_vector(p, &inst->Src[1], fs); 909 tmp = i915_get_utemp(p); 910 911 i915_emit_arith(p, 912 A0_SGE, 913 tmp, A0_DEST_CHANNEL_ALL, 0, 914 src0, 915 src1, 0); 916 917 i915_emit_arith(p, 918 A0_SGE, 919 get_result_vector(p, &inst->Dst[0]), 920 A0_DEST_CHANNEL_ALL, 0, 921 src1, 922 src0, 0); 923 924 i915_emit_arith(p, 925 A0_MUL, 926 get_result_vector(p, &inst->Dst[0]), 927 A0_DEST_CHANNEL_ALL, 0, 928 get_result_vector(p, &inst->Dst[0]), 929 tmp, 0); 930 931 break; 932 933 case TGSI_OPCODE_SGE: 934 emit_simple_arith(p, inst, A0_SGE, 2, fs); 935 break; 936 937 case TGSI_OPCODE_SIN: 938 src0 = src_vector(p, &inst->Src[0], fs); 939 tmp = i915_get_utemp(p); 940 941 i915_emit_arith(p, 942 A0_MUL, 943 tmp, A0_DEST_CHANNEL_X, 0, 944 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 945 946 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 947 948 /* 949 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 950 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 951 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 952 * result = DP4 t1.wzyx, sin_constants 953 */ 954 i915_emit_arith(p, 955 A0_MUL, 956 tmp, A0_DEST_CHANNEL_XY, 0, 957 swizzle(tmp, X, X, ONE, ONE), 958 swizzle(tmp, X, ONE, ONE, ONE), 0); 959 960 i915_emit_arith(p, 961 A0_MUL, 962 tmp, A0_DEST_CHANNEL_ALL, 0, 963 swizzle(tmp, X, Y, X, Y), 964 swizzle(tmp, X, X, ONE, ONE), 0); 965 966 i915_emit_arith(p, 967 A0_MUL, 968 tmp, A0_DEST_CHANNEL_ALL, 0, 969 swizzle(tmp, X, Y, Y, W), 970 swizzle(tmp, X, Z, ONE, ONE), 0); 971 972 i915_emit_arith(p, 973 A0_DP4, 974 get_result_vector(p, &inst->Dst[0]), 975 get_result_flags(inst), 0, 976 swizzle(tmp, W, Z, Y, X), 977 i915_emit_const4fv(p, sin_constants), 0); 978 break; 979 980 case TGSI_OPCODE_SLE: 981 /* like SGE, but swap reg0, reg1 */ 982 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); 983 break; 984 985 case TGSI_OPCODE_SLT: 986 emit_simple_arith(p, inst, A0_SLT, 2, fs); 987 break; 988 989 case TGSI_OPCODE_SGT: 990 /* like SLT, but swap reg0, reg1 */ 991 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); 992 break; 993 994 case TGSI_OPCODE_SNE: 995 /* if we're < or > then we're != */ 996 src0 = src_vector(p, &inst->Src[0], fs); 997 src1 = src_vector(p, &inst->Src[1], fs); 998 tmp = i915_get_utemp(p); 999 1000 i915_emit_arith(p, 1001 A0_SLT, 1002 tmp, 1003 A0_DEST_CHANNEL_ALL, 0, 1004 src0, 1005 src1, 0); 1006 1007 i915_emit_arith(p, 1008 A0_SLT, 1009 get_result_vector(p, &inst->Dst[0]), 1010 A0_DEST_CHANNEL_ALL, 0, 1011 src1, 1012 src0, 0); 1013 1014 i915_emit_arith(p, 1015 A0_ADD, 1016 get_result_vector(p, &inst->Dst[0]), 1017 A0_DEST_CHANNEL_ALL, 0, 1018 get_result_vector(p, &inst->Dst[0]), 1019 tmp, 0); 1020 break; 1021 1022 case TGSI_OPCODE_SSG: 1023 /* compute (src>0) - (src<0) */ 1024 src0 = src_vector(p, &inst->Src[0], fs); 1025 tmp = i915_get_utemp(p); 1026 1027 i915_emit_arith(p, 1028 A0_SLT, 1029 tmp, 1030 A0_DEST_CHANNEL_ALL, 0, 1031 src0, 1032 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); 1033 1034 i915_emit_arith(p, 1035 A0_SLT, 1036 get_result_vector(p, &inst->Dst[0]), 1037 A0_DEST_CHANNEL_ALL, 0, 1038 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 1039 src0, 0); 1040 1041 i915_emit_arith(p, 1042 A0_ADD, 1043 get_result_vector(p, &inst->Dst[0]), 1044 A0_DEST_CHANNEL_ALL, 0, 1045 get_result_vector(p, &inst->Dst[0]), 1046 negate(tmp, 1, 1, 1, 1), 0); 1047 break; 1048 1049 case TGSI_OPCODE_SUB: 1050 src0 = src_vector(p, &inst->Src[0], fs); 1051 src1 = src_vector(p, &inst->Src[1], fs); 1052 1053 i915_emit_arith(p, 1054 A0_ADD, 1055 get_result_vector(p, &inst->Dst[0]), 1056 get_result_flags(inst), 0, 1057 src0, negate(src1, 1, 1, 1, 1), 0); 1058 break; 1059 1060 case TGSI_OPCODE_TEX: 1061 emit_tex(p, inst, T0_TEXLD, fs); 1062 break; 1063 1064 case TGSI_OPCODE_TRUNC: 1065 emit_simple_arith(p, inst, A0_TRC, 1, fs); 1066 break; 1067 1068 case TGSI_OPCODE_TXB: 1069 emit_tex(p, inst, T0_TEXLDB, fs); 1070 break; 1071 1072 case TGSI_OPCODE_TXP: 1073 emit_tex(p, inst, T0_TEXLDP, fs); 1074 break; 1075 1076 case TGSI_OPCODE_XPD: 1077 /* Cross product: 1078 * result.x = src0.y * src1.z - src0.z * src1.y; 1079 * result.y = src0.z * src1.x - src0.x * src1.z; 1080 * result.z = src0.x * src1.y - src0.y * src1.x; 1081 * result.w = undef; 1082 */ 1083 src0 = src_vector(p, &inst->Src[0], fs); 1084 src1 = src_vector(p, &inst->Src[1], fs); 1085 tmp = i915_get_utemp(p); 1086 1087 i915_emit_arith(p, 1088 A0_MUL, 1089 tmp, A0_DEST_CHANNEL_ALL, 0, 1090 swizzle(src0, Z, X, Y, ONE), 1091 swizzle(src1, Y, Z, X, ONE), 0); 1092 1093 i915_emit_arith(p, 1094 A0_MAD, 1095 get_result_vector(p, &inst->Dst[0]), 1096 get_result_flags(inst), 0, 1097 swizzle(src0, Y, Z, X, ONE), 1098 swizzle(src1, Z, X, Y, ONE), 1099 negate(tmp, 1, 1, 1, 0)); 1100 break; 1101 1102 default: 1103 i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); 1104 p->error = 1; 1105 return; 1106 } 1107 1108 i915_release_utemps(p); 1109} 1110 1111 1112static void i915_translate_token(struct i915_fp_compile *p, 1113 const union i915_full_token* token, 1114 struct i915_fragment_shader *fs) 1115{ 1116 struct i915_fragment_shader *ifs = p->shader; 1117 switch( token->Token.Type ) { 1118 case TGSI_TOKEN_TYPE_PROPERTY: 1119 /* 1120 * We only support one cbuf, but we still need to ignore the property 1121 * correctly so we don't hit the assert at the end of the switch case. 1122 */ 1123 assert(token->FullProperty.Property.PropertyName == 1124 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); 1125 break; 1126 1127 case TGSI_TOKEN_TYPE_DECLARATION: 1128 if (token->FullDeclaration.Declaration.File 1129 == TGSI_FILE_CONSTANT) { 1130 uint i; 1131 for (i = token->FullDeclaration.Range.First; 1132 i <= token->FullDeclaration.Range.Last; 1133 i++) { 1134 assert(ifs->constant_flags[i] == 0x0); 1135 ifs->constant_flags[i] = I915_CONSTFLAG_USER; 1136 ifs->num_constants = MAX2(ifs->num_constants, i + 1); 1137 } 1138 } 1139 else if (token->FullDeclaration.Declaration.File 1140 == TGSI_FILE_TEMPORARY) { 1141 uint i; 1142 for (i = token->FullDeclaration.Range.First; 1143 i <= token->FullDeclaration.Range.Last; 1144 i++) { 1145 if (i >= I915_MAX_TEMPORARY) 1146 debug_printf("Too many temps (%d)\n",i); 1147 else 1148 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ 1149 p->temp_flag |= (1 << i); /* mark temp as used */ 1150 } 1151 } 1152 break; 1153 1154 case TGSI_TOKEN_TYPE_IMMEDIATE: 1155 { 1156 const struct tgsi_full_immediate *imm 1157 = &token->FullImmediate; 1158 const uint pos = p->num_immediates++; 1159 uint j; 1160 assert( imm->Immediate.NrTokens <= 4 + 1 ); 1161 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { 1162 p->immediates[pos][j] = imm->u[j].Float; 1163 } 1164 } 1165 break; 1166 1167 case TGSI_TOKEN_TYPE_INSTRUCTION: 1168 if (p->first_instruction) { 1169 /* resolve location of immediates */ 1170 uint i, j; 1171 for (i = 0; i < p->num_immediates; i++) { 1172 /* find constant slot for this immediate */ 1173 for (j = 0; j < I915_MAX_CONSTANT; j++) { 1174 if (ifs->constant_flags[j] == 0x0) { 1175 memcpy(ifs->constants[j], 1176 p->immediates[i], 1177 4 * sizeof(float)); 1178 /*printf("immediate %d maps to const %d\n", i, j);*/ 1179 ifs->constant_flags[j] = 0xf; /* all four comps used */ 1180 p->immediates_map[i] = j; 1181 ifs->num_constants = MAX2(ifs->num_constants, j + 1); 1182 break; 1183 } 1184 } 1185 } 1186 1187 p->first_instruction = FALSE; 1188 } 1189 1190 i915_translate_instruction(p, &token->FullInstruction, fs); 1191 break; 1192 1193 default: 1194 assert( 0 ); 1195 } 1196 1197} 1198 1199/** 1200 * Translate TGSI fragment shader into i915 hardware instructions. 1201 * \param p the translation state 1202 * \param tokens the TGSI token array 1203 */ 1204static void 1205i915_translate_instructions(struct i915_fp_compile *p, 1206 const struct i915_token_list *tokens, 1207 struct i915_fragment_shader *fs) 1208{ 1209 int i; 1210 for(i = 0; i<tokens->NumTokens; i++) { 1211 i915_translate_token(p, &tokens->Tokens[i], fs); 1212 } 1213} 1214 1215 1216static struct i915_fp_compile * 1217i915_init_compile(struct i915_context *i915, 1218 struct i915_fragment_shader *ifs) 1219{ 1220 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); 1221 int i; 1222 1223 p->shader = ifs; 1224 1225 /* Put new constants at end of const buffer, growing downward. 1226 * The problem is we don't know how many user-defined constants might 1227 * be specified with pipe->set_constant_buffer(). 1228 * Should pre-scan the user's program to determine the highest-numbered 1229 * constant referenced. 1230 */ 1231 ifs->num_constants = 0; 1232 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); 1233 1234 memset(&p->register_phases, 0, sizeof(p->register_phases)); 1235 1236 for (i = 0; i < I915_TEX_UNITS; i++) 1237 ifs->generic_mapping[i] = -1; 1238 1239 p->first_instruction = TRUE; 1240 1241 p->nr_tex_indirect = 1; /* correct? */ 1242 p->nr_tex_insn = 0; 1243 p->nr_alu_insn = 0; 1244 p->nr_decl_insn = 0; 1245 1246 p->csr = p->program; 1247 p->decl = p->declarations; 1248 p->decl_s = 0; 1249 p->decl_t = 0; 1250 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; 1251 p->utemp_flag = ~0x7; 1252 1253 /* initialize the first program word */ 1254 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; 1255 1256 return p; 1257} 1258 1259 1260/* Copy compile results to the fragment program struct and destroy the 1261 * compilation context. 1262 */ 1263static void 1264i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) 1265{ 1266 struct i915_fragment_shader *ifs = p->shader; 1267 unsigned long program_size = (unsigned long) (p->csr - p->program); 1268 unsigned long decl_size = (unsigned long) (p->decl - p->declarations); 1269 1270 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 1271 debug_printf("Exceeded max nr indirect texture lookups\n"); 1272 1273 if (p->nr_tex_insn > I915_MAX_TEX_INSN) 1274 i915_program_error(p, "Exceeded max TEX instructions"); 1275 1276 if (p->nr_alu_insn > I915_MAX_ALU_INSN) 1277 i915_program_error(p, "Exceeded max ALU instructions"); 1278 1279 if (p->nr_decl_insn > I915_MAX_DECL_INSN) 1280 i915_program_error(p, "Exceeded max DECL instructions"); 1281 1282 if (p->error) { 1283 p->NumNativeInstructions = 0; 1284 p->NumNativeAluInstructions = 0; 1285 p->NumNativeTexInstructions = 0; 1286 p->NumNativeTexIndirections = 0; 1287 1288 i915_use_passthrough_shader(ifs); 1289 } 1290 else { 1291 p->NumNativeInstructions 1292 = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; 1293 p->NumNativeAluInstructions = p->nr_alu_insn; 1294 p->NumNativeTexInstructions = p->nr_tex_insn; 1295 p->NumNativeTexIndirections = p->nr_tex_indirect; 1296 1297 /* patch in the program length */ 1298 p->declarations[0] |= program_size + decl_size - 2; 1299 1300 /* Copy compilation results to fragment program struct: 1301 */ 1302 assert(!ifs->decl); 1303 assert(!ifs->program); 1304 1305 ifs->decl 1306 = (uint *) MALLOC(decl_size * sizeof(uint)); 1307 ifs->program 1308 = (uint *) MALLOC(program_size * sizeof(uint)); 1309 1310 if (ifs->decl) { 1311 ifs->decl_len = decl_size; 1312 1313 memcpy(ifs->decl, 1314 p->declarations, 1315 decl_size * sizeof(uint)); 1316 } 1317 1318 if (ifs->program) { 1319 ifs->program_len = program_size; 1320 1321 memcpy(ifs->program, 1322 p->program, 1323 program_size * sizeof(uint)); 1324 } 1325 } 1326 1327 /* Release the compilation struct: 1328 */ 1329 FREE(p); 1330} 1331 1332 1333 1334 1335 1336/** 1337 * Rather than trying to intercept and jiggle depth writes during 1338 * emit, just move the value into its correct position at the end of 1339 * the program: 1340 */ 1341static void 1342i915_fixup_depth_write(struct i915_fp_compile *p) 1343{ 1344 /* XXX assuming pos/depth is always in output[0] */ 1345 if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 1346 const uint depth = UREG(REG_TYPE_OD, 0); 1347 1348 i915_emit_arith(p, 1349 A0_MOV, /* opcode */ 1350 depth, /* dest reg */ 1351 A0_DEST_CHANNEL_W, /* write mask */ 1352 0, /* saturate? */ 1353 swizzle(depth, X, Y, Z, Z), /* src0 */ 1354 0, 0 /* src1, src2 */); 1355 } 1356} 1357 1358 1359void 1360i915_translate_fragment_program( struct i915_context *i915, 1361 struct i915_fragment_shader *fs) 1362{ 1363 struct i915_fp_compile *p; 1364 const struct tgsi_token *tokens = fs->state.tokens; 1365 struct i915_token_list* i_tokens; 1366 1367#if 0 1368 tgsi_dump(tokens, 0); 1369#endif 1370 1371 /* hw doesn't seem to like empty frag programs, even when the depth write 1372 * fixup gets emitted below - may that one is fishy, too? */ 1373 if (fs->info.num_instructions == 1) { 1374 i915_use_passthrough_shader(fs); 1375 1376 return; 1377 } 1378 1379 p = i915_init_compile(i915, fs); 1380 1381 i_tokens = i915_optimize(tokens); 1382 i915_translate_instructions(p, i_tokens, fs); 1383 i915_fixup_depth_write(p); 1384 1385 i915_fini_compile(i915, p); 1386 i915_optimize_free(i_tokens); 1387 1388#if 0 1389 i915_disassemble_program(NULL, fs->program, fs->program_len); 1390#endif 1391} 1392