i915_fpc_translate.c revision 8f0a331040fc6fa700ab2c5f96061844a2289599
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include <stdarg.h> 30 31#include "i915_reg.h" 32#include "i915_context.h" 33#include "i915_fpc.h" 34 35#include "pipe/p_shader_tokens.h" 36#include "util/u_math.h" 37#include "util/u_memory.h" 38#include "util/u_string.h" 39#include "tgsi/tgsi_parse.h" 40#include "tgsi/tgsi_dump.h" 41 42#include "draw/draw_vertex.h" 43 44#ifndef M_PI 45#define M_PI 3.14159265358979323846 46#endif 47 48/** 49 * Simple pass-through fragment shader to use when we don't have 50 * a real shader (or it fails to compile for some reason). 51 */ 52static unsigned passthrough[] = 53{ 54 _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), 55 56 /* declare input color: 57 */ 58 (D0_DCL | 59 (REG_TYPE_T << D0_TYPE_SHIFT) | 60 (T_DIFFUSE << D0_NR_SHIFT) | 61 D0_CHANNEL_ALL), 62 0, 63 0, 64 65 /* move to output color: 66 */ 67 (A0_MOV | 68 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | 69 A0_DEST_CHANNEL_ALL | 70 (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | 71 (T_DIFFUSE << A0_SRC0_NR_SHIFT)), 72 0x01230000, /* .xyzw */ 73 0 74}; 75 76 77/* 1, -1/3!, 1/5!, -1/7! */ 78static const float scs_sin_constants[4] = { 1.0, 79 -1.0f / (3 * 2 * 1), 80 1.0f / (5 * 4 * 3 * 2 * 1), 81 -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) 82}; 83 84/* 1, -1/2!, 1/4!, -1/6! */ 85static const float scs_cos_constants[4] = { 1.0, 86 -1.0f / (2 * 1), 87 1.0f / (4 * 3 * 2 * 1), 88 -1.0f / (6 * 5 * 4 * 3 * 2 * 1) 89}; 90 91/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ 92static const float sin_constants[4] = { 2.0 * M_PI, 93 -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), 94 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), 95 -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) 96}; 97 98/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ 99static const float cos_constants[4] = { 1.0, 100 -4.0f * M_PI * M_PI / (2 * 1), 101 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), 102 -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) 103}; 104 105 106 107/** 108 * component-wise negation of ureg 109 */ 110static INLINE int 111negate(int reg, int x, int y, int z, int w) 112{ 113 /* Another neat thing about the UREG representation */ 114 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | 115 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | 116 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | 117 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); 118} 119 120 121/** 122 * In the event of a translation failure, we'll generate a simple color 123 * pass-through program. 124 */ 125static void 126i915_use_passthrough_shader(struct i915_fragment_shader *fs) 127{ 128 fs->program = (uint *) MALLOC(sizeof(passthrough)); 129 if (fs->program) { 130 memcpy(fs->program, passthrough, sizeof(passthrough)); 131 fs->program_len = Elements(passthrough); 132 } 133 fs->num_constants = 0; 134} 135 136 137void 138i915_program_error(struct i915_fp_compile *p, const char *msg, ...) 139{ 140 va_list args; 141 char buffer[1024]; 142 143 debug_printf("i915_program_error: "); 144 va_start( args, msg ); 145 util_vsnprintf( buffer, sizeof(buffer), msg, args ); 146 va_end( args ); 147 debug_printf("%s", buffer); 148 debug_printf("\n"); 149 150 p->error = 1; 151} 152 153static uint get_mapping(struct i915_fragment_shader* fs, int unit) 154{ 155 int i; 156 for (i = 0; i < I915_TEX_UNITS; i++) 157 { 158 if (fs->generic_mapping[i] == -1) { 159 fs->generic_mapping[i] = unit; 160 return i; 161 } 162 if (fs->generic_mapping[i] == unit) 163 return i; 164 } 165 debug_printf("Exceeded max generics\n"); 166 return 0; 167} 168 169/** 170 * Construct a ureg for the given source register. Will emit 171 * constants, apply swizzling and negation as needed. 172 */ 173static uint 174src_vector(struct i915_fp_compile *p, 175 const struct tgsi_full_src_register *source, 176 struct i915_fragment_shader* fs) 177{ 178 uint index = source->Register.Index; 179 uint src = 0, sem_name, sem_ind; 180 181 switch (source->Register.File) { 182 case TGSI_FILE_TEMPORARY: 183 if (source->Register.Index >= I915_MAX_TEMPORARY) { 184 i915_program_error(p, "Exceeded max temporary reg"); 185 return 0; 186 } 187 src = UREG(REG_TYPE_R, index); 188 break; 189 case TGSI_FILE_INPUT: 190 /* XXX: Packing COL1, FOGC into a single attribute works for 191 * texenv programs, but will fail for real fragment programs 192 * that use these attributes and expect them to be a full 4 193 * components wide. Could use a texcoord to pass these 194 * attributes if necessary, but that won't work in the general 195 * case. 196 * 197 * We also use a texture coordinate to pass wpos when possible. 198 */ 199 200 sem_name = p->shader->info.input_semantic_name[index]; 201 sem_ind = p->shader->info.input_semantic_index[index]; 202 203 switch (sem_name) { 204 case TGSI_SEMANTIC_POSITION: 205 { 206 /* for fragcoord */ 207 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); 208 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 209 break; 210 } 211 case TGSI_SEMANTIC_COLOR: 212 if (sem_ind == 0) { 213 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 214 } 215 else { 216 /* secondary color */ 217 assert(sem_ind == 1); 218 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 219 src = swizzle(src, X, Y, Z, ONE); 220 } 221 break; 222 case TGSI_SEMANTIC_FOG: 223 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 224 src = swizzle(src, W, W, W, W); 225 break; 226 case TGSI_SEMANTIC_GENERIC: 227 { 228 int real_tex_unit = get_mapping(fs, sem_ind); 229 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 230 break; 231 } 232 case TGSI_SEMANTIC_FACE: 233 { 234 /* for back/front faces */ 235 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); 236 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); 237 break; 238 } 239 default: 240 i915_program_error(p, "Bad source->Index"); 241 return 0; 242 } 243 break; 244 245 case TGSI_FILE_IMMEDIATE: 246 assert(index < p->num_immediates); 247 index = p->immediates_map[index]; 248 /* fall-through */ 249 case TGSI_FILE_CONSTANT: 250 src = UREG(REG_TYPE_CONST, index); 251 break; 252 253 default: 254 i915_program_error(p, "Bad source->File"); 255 return 0; 256 } 257 258 src = swizzle(src, 259 source->Register.SwizzleX, 260 source->Register.SwizzleY, 261 source->Register.SwizzleZ, 262 source->Register.SwizzleW); 263 264 /* There's both negate-all-components and per-component negation. 265 * Try to handle both here. 266 */ 267 { 268 int n = source->Register.Negate; 269 src = negate(src, n, n, n, n); 270 } 271 272 /* no abs() */ 273#if 0 274 /* XXX assertions disabled to allow arbfplight.c to run */ 275 /* XXX enable these assertions, or fix things */ 276 assert(!source->Register.Absolute); 277#endif 278 if (source->Register.Absolute) 279 debug_printf("Unhandler absolute value\n"); 280 281 return src; 282} 283 284 285/** 286 * Construct a ureg for a destination register. 287 */ 288static uint 289get_result_vector(struct i915_fp_compile *p, 290 const struct tgsi_full_dst_register *dest) 291{ 292 switch (dest->Register.File) { 293 case TGSI_FILE_OUTPUT: 294 { 295 uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; 296 switch (sem_name) { 297 case TGSI_SEMANTIC_POSITION: 298 return UREG(REG_TYPE_OD, 0); 299 case TGSI_SEMANTIC_COLOR: 300 return UREG(REG_TYPE_OC, 0); 301 default: 302 i915_program_error(p, "Bad inst->DstReg.Index/semantics"); 303 return 0; 304 } 305 } 306 case TGSI_FILE_TEMPORARY: 307 return UREG(REG_TYPE_R, dest->Register.Index); 308 default: 309 i915_program_error(p, "Bad inst->DstReg.File"); 310 return 0; 311 } 312} 313 314 315/** 316 * Compute flags for saturation and writemask. 317 */ 318static uint 319get_result_flags(const struct tgsi_full_instruction *inst) 320{ 321 const uint writeMask 322 = inst->Dst[0].Register.WriteMask; 323 uint flags = 0x0; 324 325 if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) 326 flags |= A0_DEST_SATURATE; 327 328 if (writeMask & TGSI_WRITEMASK_X) 329 flags |= A0_DEST_CHANNEL_X; 330 if (writeMask & TGSI_WRITEMASK_Y) 331 flags |= A0_DEST_CHANNEL_Y; 332 if (writeMask & TGSI_WRITEMASK_Z) 333 flags |= A0_DEST_CHANNEL_Z; 334 if (writeMask & TGSI_WRITEMASK_W) 335 flags |= A0_DEST_CHANNEL_W; 336 337 return flags; 338} 339 340 341/** 342 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token 343 */ 344static uint 345translate_tex_src_target(struct i915_fp_compile *p, uint tex) 346{ 347 switch (tex) { 348 case TGSI_TEXTURE_SHADOW1D: 349 /* fall-through */ 350 case TGSI_TEXTURE_1D: 351 return D0_SAMPLE_TYPE_2D; 352 353 case TGSI_TEXTURE_SHADOW2D: 354 /* fall-through */ 355 case TGSI_TEXTURE_2D: 356 return D0_SAMPLE_TYPE_2D; 357 358 case TGSI_TEXTURE_SHADOWRECT: 359 /* fall-through */ 360 case TGSI_TEXTURE_RECT: 361 return D0_SAMPLE_TYPE_2D; 362 363 case TGSI_TEXTURE_3D: 364 return D0_SAMPLE_TYPE_VOLUME; 365 366 case TGSI_TEXTURE_CUBE: 367 return D0_SAMPLE_TYPE_CUBE; 368 369 default: 370 i915_program_error(p, "TexSrc type"); 371 return 0; 372 } 373} 374 375 376/** 377 * Generate texel lookup instruction. 378 */ 379static void 380emit_tex(struct i915_fp_compile *p, 381 const struct tgsi_full_instruction *inst, 382 uint opcode, 383 struct i915_fragment_shader* fs) 384{ 385 uint texture = inst->Texture.Texture; 386 uint unit = inst->Src[1].Register.Index; 387 uint tex = translate_tex_src_target( p, texture ); 388 uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); 389 uint coord = src_vector( p, &inst->Src[0], fs); 390 391 i915_emit_texld( p, 392 get_result_vector( p, &inst->Dst[0] ), 393 get_result_flags( inst ), 394 sampler, 395 coord, 396 opcode); 397} 398 399 400/** 401 * Generate a simple arithmetic instruction 402 * \param opcode the i915 opcode 403 * \param numArgs the number of input/src arguments 404 */ 405static void 406emit_simple_arith(struct i915_fp_compile *p, 407 const struct tgsi_full_instruction *inst, 408 uint opcode, uint numArgs, 409 struct i915_fragment_shader* fs) 410{ 411 uint arg1, arg2, arg3; 412 413 assert(numArgs <= 3); 414 415 arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); 416 arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); 417 arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); 418 419 i915_emit_arith( p, 420 opcode, 421 get_result_vector( p, &inst->Dst[0]), 422 get_result_flags( inst ), 0, 423 arg1, 424 arg2, 425 arg3 ); 426} 427 428 429/** As above, but swap the first two src regs */ 430static void 431emit_simple_arith_swap2(struct i915_fp_compile *p, 432 const struct tgsi_full_instruction *inst, 433 uint opcode, uint numArgs, 434 struct i915_fragment_shader* fs) 435{ 436 struct tgsi_full_instruction inst2; 437 438 assert(numArgs == 2); 439 440 /* transpose first two registers */ 441 inst2 = *inst; 442 inst2.Src[0] = inst->Src[1]; 443 inst2.Src[1] = inst->Src[0]; 444 445 emit_simple_arith(p, &inst2, opcode, numArgs, fs); 446} 447 448/* 449 * Translate TGSI instruction to i915 instruction. 450 * 451 * Possible concerns: 452 * 453 * SIN, COS -- could use another taylor step? 454 * LIT -- results seem a little different to sw mesa 455 * LOG -- different to mesa on negative numbers, but this is conformant. 456 */ 457static void 458i915_translate_instruction(struct i915_fp_compile *p, 459 const struct tgsi_full_instruction *inst, 460 struct i915_fragment_shader *fs) 461{ 462 uint writemask; 463 uint src0, src1, src2, flags; 464 uint tmp = 0; 465 466 switch (inst->Instruction.Opcode) { 467 case TGSI_OPCODE_ABS: 468 src0 = src_vector(p, &inst->Src[0], fs); 469 i915_emit_arith(p, 470 A0_MAX, 471 get_result_vector(p, &inst->Dst[0]), 472 get_result_flags(inst), 0, 473 src0, negate(src0, 1, 1, 1, 1), 0); 474 break; 475 476 case TGSI_OPCODE_ADD: 477 emit_simple_arith(p, inst, A0_ADD, 2, fs); 478 break; 479 480 case TGSI_OPCODE_CMP: 481 src0 = src_vector(p, &inst->Src[0], fs); 482 src1 = src_vector(p, &inst->Src[1], fs); 483 src2 = src_vector(p, &inst->Src[2], fs); 484 i915_emit_arith(p, A0_CMP, 485 get_result_vector(p, &inst->Dst[0]), 486 get_result_flags(inst), 487 0, src0, src2, src1); /* NOTE: order of src2, src1 */ 488 break; 489 490 case TGSI_OPCODE_COS: 491 src0 = src_vector(p, &inst->Src[0], fs); 492 tmp = i915_get_utemp(p); 493 494 i915_emit_arith(p, 495 A0_MUL, 496 tmp, A0_DEST_CHANNEL_X, 0, 497 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 498 499 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 500 501 /* 502 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 503 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 504 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 505 * result = DP4 t0, cos_constants 506 */ 507 i915_emit_arith(p, 508 A0_MUL, 509 tmp, A0_DEST_CHANNEL_XY, 0, 510 swizzle(tmp, X, X, ONE, ONE), 511 swizzle(tmp, X, ONE, ONE, ONE), 0); 512 513 i915_emit_arith(p, 514 A0_MUL, 515 tmp, A0_DEST_CHANNEL_XYZ, 0, 516 swizzle(tmp, X, Y, X, ONE), 517 swizzle(tmp, X, X, ONE, ONE), 0); 518 519 i915_emit_arith(p, 520 A0_MUL, 521 tmp, A0_DEST_CHANNEL_XYZ, 0, 522 swizzle(tmp, X, X, Z, ONE), 523 swizzle(tmp, Z, ONE, ONE, ONE), 0); 524 525 i915_emit_arith(p, 526 A0_DP4, 527 get_result_vector(p, &inst->Dst[0]), 528 get_result_flags(inst), 0, 529 swizzle(tmp, ONE, Z, Y, X), 530 i915_emit_const4fv(p, cos_constants), 0); 531 break; 532 533 case TGSI_OPCODE_DDX: 534 case TGSI_OPCODE_DDY: 535 /* XXX We just output 0 here */ 536 debug_printf("Punting DDX/DDX\n"); 537 src0 = get_result_vector(p, &inst->Dst[0]); 538 i915_emit_arith(p, 539 A0_MOV, 540 get_result_vector(p, &inst->Dst[0]), 541 get_result_flags(inst), 0, 542 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); 543 break; 544 545 case TGSI_OPCODE_DP2: 546 src0 = src_vector(p, &inst->Src[0], fs); 547 src1 = src_vector(p, &inst->Src[1], fs); 548 549 i915_emit_arith(p, 550 A0_DP3, 551 get_result_vector(p, &inst->Dst[0]), 552 get_result_flags(inst), 0, 553 swizzle(src0, X, Y, ZERO, ZERO), src1, 0); 554 break; 555 556 case TGSI_OPCODE_DP3: 557 emit_simple_arith(p, inst, A0_DP3, 2, fs); 558 break; 559 560 case TGSI_OPCODE_DP4: 561 emit_simple_arith(p, inst, A0_DP4, 2, fs); 562 break; 563 564 case TGSI_OPCODE_DPH: 565 src0 = src_vector(p, &inst->Src[0], fs); 566 src1 = src_vector(p, &inst->Src[1], fs); 567 568 i915_emit_arith(p, 569 A0_DP4, 570 get_result_vector(p, &inst->Dst[0]), 571 get_result_flags(inst), 0, 572 swizzle(src0, X, Y, Z, ONE), src1, 0); 573 break; 574 575 case TGSI_OPCODE_DST: 576 src0 = src_vector(p, &inst->Src[0], fs); 577 src1 = src_vector(p, &inst->Src[1], fs); 578 579 /* result[0] = 1 * 1; 580 * result[1] = a[1] * b[1]; 581 * result[2] = a[2] * 1; 582 * result[3] = 1 * b[3]; 583 */ 584 i915_emit_arith(p, 585 A0_MUL, 586 get_result_vector(p, &inst->Dst[0]), 587 get_result_flags(inst), 0, 588 swizzle(src0, ONE, Y, Z, ONE), 589 swizzle(src1, ONE, Y, ONE, W), 0); 590 break; 591 592 case TGSI_OPCODE_END: 593 /* no-op */ 594 break; 595 596 case TGSI_OPCODE_EX2: 597 src0 = src_vector(p, &inst->Src[0], fs); 598 599 i915_emit_arith(p, 600 A0_EXP, 601 get_result_vector(p, &inst->Dst[0]), 602 get_result_flags(inst), 0, 603 swizzle(src0, X, X, X, X), 0, 0); 604 break; 605 606 case TGSI_OPCODE_FLR: 607 emit_simple_arith(p, inst, A0_FLR, 1, fs); 608 break; 609 610 case TGSI_OPCODE_FRC: 611 emit_simple_arith(p, inst, A0_FRC, 1, fs); 612 break; 613 614 case TGSI_OPCODE_KIL: 615 /* kill if src[0].x < 0 || src[0].y < 0 ... */ 616 src0 = src_vector(p, &inst->Src[0], fs); 617 tmp = i915_get_utemp(p); 618 619 i915_emit_texld(p, 620 tmp, /* dest reg: a dummy reg */ 621 A0_DEST_CHANNEL_ALL, /* dest writemask */ 622 0, /* sampler */ 623 src0, /* coord*/ 624 T0_TEXKILL); /* opcode */ 625 break; 626 627 case TGSI_OPCODE_KILP: 628 assert(0); /* not tested yet */ 629 break; 630 631 case TGSI_OPCODE_LG2: 632 src0 = src_vector(p, &inst->Src[0], fs); 633 634 i915_emit_arith(p, 635 A0_LOG, 636 get_result_vector(p, &inst->Dst[0]), 637 get_result_flags(inst), 0, 638 swizzle(src0, X, X, X, X), 0, 0); 639 break; 640 641 case TGSI_OPCODE_LIT: 642 src0 = src_vector(p, &inst->Src[0], fs); 643 tmp = i915_get_utemp(p); 644 645 /* tmp = max( a.xyzw, a.00zw ) 646 * XXX: Clamp tmp.w to -128..128 647 * tmp.y = log(tmp.y) 648 * tmp.y = tmp.w * tmp.y 649 * tmp.y = exp(tmp.y) 650 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 651 */ 652 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 653 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 654 655 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 656 swizzle(tmp, Y, Y, Y, Y), 0, 0); 657 658 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 659 swizzle(tmp, ZERO, Y, ZERO, ZERO), 660 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 661 662 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 663 swizzle(tmp, Y, Y, Y, Y), 0, 0); 664 665 i915_emit_arith(p, A0_CMP, 666 get_result_vector(p, &inst->Dst[0]), 667 get_result_flags(inst), 0, 668 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 669 swizzle(tmp, ONE, X, ZERO, ONE), 670 swizzle(tmp, ONE, X, Y, ONE)); 671 672 break; 673 674 case TGSI_OPCODE_LRP: 675 src0 = src_vector(p, &inst->Src[0], fs); 676 src1 = src_vector(p, &inst->Src[1], fs); 677 src2 = src_vector(p, &inst->Src[2], fs); 678 flags = get_result_flags(inst); 679 tmp = i915_get_utemp(p); 680 681 /* b*a + c*(1-a) 682 * 683 * b*a + c - ca 684 * 685 * tmp = b*a + c, 686 * result = (-c)*a + tmp 687 */ 688 i915_emit_arith(p, A0_MAD, tmp, 689 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 690 691 i915_emit_arith(p, A0_MAD, 692 get_result_vector(p, &inst->Dst[0]), 693 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 694 break; 695 696 case TGSI_OPCODE_MAD: 697 emit_simple_arith(p, inst, A0_MAD, 3, fs); 698 break; 699 700 case TGSI_OPCODE_MAX: 701 emit_simple_arith(p, inst, A0_MAX, 2, fs); 702 break; 703 704 case TGSI_OPCODE_MIN: 705 src0 = src_vector(p, &inst->Src[0], fs); 706 src1 = src_vector(p, &inst->Src[1], fs); 707 tmp = i915_get_utemp(p); 708 flags = get_result_flags(inst); 709 710 i915_emit_arith(p, 711 A0_MAX, 712 tmp, flags & A0_DEST_CHANNEL_ALL, 0, 713 negate(src0, 1, 1, 1, 1), 714 negate(src1, 1, 1, 1, 1), 0); 715 716 i915_emit_arith(p, 717 A0_MOV, 718 get_result_vector(p, &inst->Dst[0]), 719 flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); 720 break; 721 722 case TGSI_OPCODE_MOV: 723 emit_simple_arith(p, inst, A0_MOV, 1, fs); 724 break; 725 726 case TGSI_OPCODE_MUL: 727 emit_simple_arith(p, inst, A0_MUL, 2, fs); 728 break; 729 730 case TGSI_OPCODE_POW: 731 src0 = src_vector(p, &inst->Src[0], fs); 732 src1 = src_vector(p, &inst->Src[1], fs); 733 tmp = i915_get_utemp(p); 734 flags = get_result_flags(inst); 735 736 /* XXX: masking on intermediate values, here and elsewhere. 737 */ 738 i915_emit_arith(p, 739 A0_LOG, 740 tmp, A0_DEST_CHANNEL_X, 0, 741 swizzle(src0, X, X, X, X), 0, 0); 742 743 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 744 745 i915_emit_arith(p, 746 A0_EXP, 747 get_result_vector(p, &inst->Dst[0]), 748 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 749 break; 750 751 case TGSI_OPCODE_RET: 752 /* XXX: no-op? */ 753 break; 754 755 case TGSI_OPCODE_RCP: 756 src0 = src_vector(p, &inst->Src[0], fs); 757 758 i915_emit_arith(p, 759 A0_RCP, 760 get_result_vector(p, &inst->Dst[0]), 761 get_result_flags(inst), 0, 762 swizzle(src0, X, X, X, X), 0, 0); 763 break; 764 765 case TGSI_OPCODE_RSQ: 766 src0 = src_vector(p, &inst->Src[0], fs); 767 768 i915_emit_arith(p, 769 A0_RSQ, 770 get_result_vector(p, &inst->Dst[0]), 771 get_result_flags(inst), 0, 772 swizzle(src0, X, X, X, X), 0, 0); 773 break; 774 775 case TGSI_OPCODE_SCS: 776 src0 = src_vector(p, &inst->Src[0], fs); 777 tmp = i915_get_utemp(p); 778 779 /* 780 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 781 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 782 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 783 * scs.x = DP4 t1, scs_sin_constants 784 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 785 * scs.y = DP4 t1, scs_cos_constants 786 */ 787 i915_emit_arith(p, 788 A0_MUL, 789 tmp, A0_DEST_CHANNEL_XY, 0, 790 swizzle(src0, X, X, ONE, ONE), 791 swizzle(src0, X, ONE, ONE, ONE), 0); 792 793 i915_emit_arith(p, 794 A0_MUL, 795 tmp, A0_DEST_CHANNEL_ALL, 0, 796 swizzle(tmp, X, Y, X, Y), 797 swizzle(tmp, X, X, ONE, ONE), 0); 798 799 writemask = inst->Dst[0].Register.WriteMask; 800 801 if (writemask & TGSI_WRITEMASK_Y) { 802 uint tmp1; 803 804 if (writemask & TGSI_WRITEMASK_X) 805 tmp1 = i915_get_utemp(p); 806 else 807 tmp1 = tmp; 808 809 i915_emit_arith(p, 810 A0_MUL, 811 tmp1, A0_DEST_CHANNEL_ALL, 0, 812 swizzle(tmp, X, Y, Y, W), 813 swizzle(tmp, X, Z, ONE, ONE), 0); 814 815 i915_emit_arith(p, 816 A0_DP4, 817 get_result_vector(p, &inst->Dst[0]), 818 A0_DEST_CHANNEL_Y, 0, 819 swizzle(tmp1, W, Z, Y, X), 820 i915_emit_const4fv(p, scs_sin_constants), 0); 821 } 822 823 if (writemask & TGSI_WRITEMASK_X) { 824 i915_emit_arith(p, 825 A0_MUL, 826 tmp, A0_DEST_CHANNEL_XYZ, 0, 827 swizzle(tmp, X, X, Z, ONE), 828 swizzle(tmp, Z, ONE, ONE, ONE), 0); 829 830 i915_emit_arith(p, 831 A0_DP4, 832 get_result_vector(p, &inst->Dst[0]), 833 A0_DEST_CHANNEL_X, 0, 834 swizzle(tmp, ONE, Z, Y, X), 835 i915_emit_const4fv(p, scs_cos_constants), 0); 836 } 837 break; 838 839 case TGSI_OPCODE_SEQ: 840 /* if we're both >= and <= then we're == */ 841 src0 = src_vector(p, &inst->Src[0], fs); 842 src1 = src_vector(p, &inst->Src[1], fs); 843 tmp = i915_get_utemp(p); 844 845 i915_emit_arith(p, 846 A0_SGE, 847 tmp, A0_DEST_CHANNEL_ALL, 0, 848 src0, 849 src1, 0); 850 851 i915_emit_arith(p, 852 A0_SGE, 853 get_result_vector(p, &inst->Dst[0]), 854 A0_DEST_CHANNEL_ALL, 0, 855 src1, 856 src0, 0); 857 858 i915_emit_arith(p, 859 A0_MUL, 860 get_result_vector(p, &inst->Dst[0]), 861 A0_DEST_CHANNEL_ALL, 0, 862 get_result_vector(p, &inst->Dst[0]), 863 tmp, 0); 864 865 break; 866 867 case TGSI_OPCODE_SGE: 868 emit_simple_arith(p, inst, A0_SGE, 2, fs); 869 break; 870 871 case TGSI_OPCODE_SIN: 872 src0 = src_vector(p, &inst->Src[0], fs); 873 tmp = i915_get_utemp(p); 874 875 i915_emit_arith(p, 876 A0_MUL, 877 tmp, A0_DEST_CHANNEL_X, 0, 878 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 879 880 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 881 882 /* 883 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 884 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 885 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 886 * result = DP4 t1.wzyx, sin_constants 887 */ 888 i915_emit_arith(p, 889 A0_MUL, 890 tmp, A0_DEST_CHANNEL_XY, 0, 891 swizzle(tmp, X, X, ONE, ONE), 892 swizzle(tmp, X, ONE, ONE, ONE), 0); 893 894 i915_emit_arith(p, 895 A0_MUL, 896 tmp, A0_DEST_CHANNEL_ALL, 0, 897 swizzle(tmp, X, Y, X, Y), 898 swizzle(tmp, X, X, ONE, ONE), 0); 899 900 i915_emit_arith(p, 901 A0_MUL, 902 tmp, A0_DEST_CHANNEL_ALL, 0, 903 swizzle(tmp, X, Y, Y, W), 904 swizzle(tmp, X, Z, ONE, ONE), 0); 905 906 i915_emit_arith(p, 907 A0_DP4, 908 get_result_vector(p, &inst->Dst[0]), 909 get_result_flags(inst), 0, 910 swizzle(tmp, W, Z, Y, X), 911 i915_emit_const4fv(p, sin_constants), 0); 912 break; 913 914 case TGSI_OPCODE_SLE: 915 /* like SGE, but swap reg0, reg1 */ 916 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); 917 break; 918 919 case TGSI_OPCODE_SLT: 920 emit_simple_arith(p, inst, A0_SLT, 2, fs); 921 break; 922 923 case TGSI_OPCODE_SGT: 924 /* like SLT, but swap reg0, reg1 */ 925 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); 926 break; 927 928 case TGSI_OPCODE_SNE: 929 /* if we're < or > then we're != */ 930 src0 = src_vector(p, &inst->Src[0], fs); 931 src1 = src_vector(p, &inst->Src[1], fs); 932 tmp = i915_get_utemp(p); 933 934 i915_emit_arith(p, 935 A0_SLT, 936 tmp, 937 A0_DEST_CHANNEL_ALL, 0, 938 src0, 939 src1, 0); 940 941 i915_emit_arith(p, 942 A0_SLT, 943 get_result_vector(p, &inst->Dst[0]), 944 A0_DEST_CHANNEL_ALL, 0, 945 src1, 946 src0, 0); 947 948 i915_emit_arith(p, 949 A0_ADD, 950 get_result_vector(p, &inst->Dst[0]), 951 A0_DEST_CHANNEL_ALL, 0, 952 get_result_vector(p, &inst->Dst[0]), 953 tmp, 0); 954 break; 955 956 case TGSI_OPCODE_SSG: 957 /* compute (src>0) - (src<0) */ 958 src0 = src_vector(p, &inst->Src[0], fs); 959 tmp = i915_get_utemp(p); 960 961 i915_emit_arith(p, 962 A0_SLT, 963 tmp, 964 A0_DEST_CHANNEL_ALL, 0, 965 src0, 966 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); 967 968 i915_emit_arith(p, 969 A0_SLT, 970 get_result_vector(p, &inst->Dst[0]), 971 A0_DEST_CHANNEL_ALL, 0, 972 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 973 src0, 0); 974 975 i915_emit_arith(p, 976 A0_ADD, 977 get_result_vector(p, &inst->Dst[0]), 978 A0_DEST_CHANNEL_ALL, 0, 979 get_result_vector(p, &inst->Dst[0]), 980 negate(tmp, 1, 1, 1, 1), 0); 981 break; 982 983 case TGSI_OPCODE_SUB: 984 src0 = src_vector(p, &inst->Src[0], fs); 985 src1 = src_vector(p, &inst->Src[1], fs); 986 987 i915_emit_arith(p, 988 A0_ADD, 989 get_result_vector(p, &inst->Dst[0]), 990 get_result_flags(inst), 0, 991 src0, negate(src1, 1, 1, 1, 1), 0); 992 break; 993 994 case TGSI_OPCODE_TEX: 995 emit_tex(p, inst, T0_TEXLD, fs); 996 break; 997 998 case TGSI_OPCODE_TRUNC: 999 emit_simple_arith(p, inst, A0_TRC, 1, fs); 1000 break; 1001 1002 case TGSI_OPCODE_TXB: 1003 emit_tex(p, inst, T0_TEXLDB, fs); 1004 break; 1005 1006 case TGSI_OPCODE_TXP: 1007 emit_tex(p, inst, T0_TEXLDP, fs); 1008 break; 1009 1010 case TGSI_OPCODE_XPD: 1011 /* Cross product: 1012 * result.x = src0.y * src1.z - src0.z * src1.y; 1013 * result.y = src0.z * src1.x - src0.x * src1.z; 1014 * result.z = src0.x * src1.y - src0.y * src1.x; 1015 * result.w = undef; 1016 */ 1017 src0 = src_vector(p, &inst->Src[0], fs); 1018 src1 = src_vector(p, &inst->Src[1], fs); 1019 tmp = i915_get_utemp(p); 1020 1021 i915_emit_arith(p, 1022 A0_MUL, 1023 tmp, A0_DEST_CHANNEL_ALL, 0, 1024 swizzle(src0, Z, X, Y, ONE), 1025 swizzle(src1, Y, Z, X, ONE), 0); 1026 1027 i915_emit_arith(p, 1028 A0_MAD, 1029 get_result_vector(p, &inst->Dst[0]), 1030 get_result_flags(inst), 0, 1031 swizzle(src0, Y, Z, X, ONE), 1032 swizzle(src1, Z, X, Y, ONE), 1033 negate(tmp, 1, 1, 1, 0)); 1034 break; 1035 1036 default: 1037 i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); 1038 p->error = 1; 1039 return; 1040 } 1041 1042 i915_release_utemps(p); 1043} 1044 1045 1046/** 1047 * Translate TGSI fragment shader into i915 hardware instructions. 1048 * \param p the translation state 1049 * \param tokens the TGSI token array 1050 */ 1051static void 1052i915_translate_instructions(struct i915_fp_compile *p, 1053 const struct tgsi_token *tokens, 1054 struct i915_fragment_shader *fs) 1055{ 1056 struct i915_fragment_shader *ifs = p->shader; 1057 struct tgsi_parse_context parse; 1058 1059 tgsi_parse_init( &parse, tokens ); 1060 1061 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1062 1063 tgsi_parse_token( &parse ); 1064 1065 switch( parse.FullToken.Token.Type ) { 1066 case TGSI_TOKEN_TYPE_PROPERTY: 1067 /* 1068 * We only support one cbuf, but we still need to ignore the property 1069 * correctly so we don't hit the assert at the end of the switch case. 1070 */ 1071 assert(parse.FullToken.FullProperty.Property.PropertyName == 1072 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); 1073 break; 1074 case TGSI_TOKEN_TYPE_DECLARATION: 1075 if (parse.FullToken.FullDeclaration.Declaration.File 1076 == TGSI_FILE_CONSTANT) { 1077 uint i; 1078 for (i = parse.FullToken.FullDeclaration.Range.First; 1079 i <= parse.FullToken.FullDeclaration.Range.Last; 1080 i++) { 1081 assert(ifs->constant_flags[i] == 0x0); 1082 ifs->constant_flags[i] = I915_CONSTFLAG_USER; 1083 ifs->num_constants = MAX2(ifs->num_constants, i + 1); 1084 } 1085 } 1086 else if (parse.FullToken.FullDeclaration.Declaration.File 1087 == TGSI_FILE_TEMPORARY) { 1088 uint i; 1089 for (i = parse.FullToken.FullDeclaration.Range.First; 1090 i <= parse.FullToken.FullDeclaration.Range.Last; 1091 i++) { 1092 assert(i < I915_MAX_TEMPORARY); 1093 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ 1094 p->temp_flag |= (1 << i); /* mark temp as used */ 1095 } 1096 } 1097 break; 1098 1099 case TGSI_TOKEN_TYPE_IMMEDIATE: 1100 { 1101 const struct tgsi_full_immediate *imm 1102 = &parse.FullToken.FullImmediate; 1103 const uint pos = p->num_immediates++; 1104 uint j; 1105 assert( imm->Immediate.NrTokens <= 4 + 1 ); 1106 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { 1107 p->immediates[pos][j] = imm->u[j].Float; 1108 } 1109 } 1110 break; 1111 1112 case TGSI_TOKEN_TYPE_INSTRUCTION: 1113 if (p->first_instruction) { 1114 /* resolve location of immediates */ 1115 uint i, j; 1116 for (i = 0; i < p->num_immediates; i++) { 1117 /* find constant slot for this immediate */ 1118 for (j = 0; j < I915_MAX_CONSTANT; j++) { 1119 if (ifs->constant_flags[j] == 0x0) { 1120 memcpy(ifs->constants[j], 1121 p->immediates[i], 1122 4 * sizeof(float)); 1123 /*printf("immediate %d maps to const %d\n", i, j);*/ 1124 ifs->constant_flags[j] = 0xf; /* all four comps used */ 1125 p->immediates_map[i] = j; 1126 ifs->num_constants = MAX2(ifs->num_constants, j + 1); 1127 break; 1128 } 1129 } 1130 } 1131 1132 p->first_instruction = FALSE; 1133 } 1134 1135 i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs); 1136 break; 1137 1138 default: 1139 assert( 0 ); 1140 } 1141 1142 } /* while */ 1143 1144 tgsi_parse_free (&parse); 1145} 1146 1147 1148static struct i915_fp_compile * 1149i915_init_compile(struct i915_context *i915, 1150 struct i915_fragment_shader *ifs) 1151{ 1152 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); 1153 int i; 1154 1155 p->shader = ifs; 1156 1157 /* Put new constants at end of const buffer, growing downward. 1158 * The problem is we don't know how many user-defined constants might 1159 * be specified with pipe->set_constant_buffer(). 1160 * Should pre-scan the user's program to determine the highest-numbered 1161 * constant referenced. 1162 */ 1163 ifs->num_constants = 0; 1164 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); 1165 1166 for (i = 0; i < I915_TEX_UNITS; i++) 1167 ifs->generic_mapping[i] = -1; 1168 1169 p->first_instruction = TRUE; 1170 1171 p->nr_tex_indirect = 1; /* correct? */ 1172 p->nr_tex_insn = 0; 1173 p->nr_alu_insn = 0; 1174 p->nr_decl_insn = 0; 1175 1176 p->csr = p->program; 1177 p->decl = p->declarations; 1178 p->decl_s = 0; 1179 p->decl_t = 0; 1180 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; 1181 p->utemp_flag = ~0x7; 1182 1183 /* initialize the first program word */ 1184 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; 1185 1186 return p; 1187} 1188 1189 1190/* Copy compile results to the fragment program struct and destroy the 1191 * compilation context. 1192 */ 1193static void 1194i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) 1195{ 1196 struct i915_fragment_shader *ifs = p->shader; 1197 unsigned long program_size = (unsigned long) (p->csr - p->program); 1198 unsigned long decl_size = (unsigned long) (p->decl - p->declarations); 1199 1200 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 1201 i915_program_error(p, "Exceeded max nr indirect texture lookups"); 1202 1203 if (p->nr_tex_insn > I915_MAX_TEX_INSN) 1204 i915_program_error(p, "Exceeded max TEX instructions"); 1205 1206 if (p->nr_alu_insn > I915_MAX_ALU_INSN) 1207 i915_program_error(p, "Exceeded max ALU instructions"); 1208 1209 if (p->nr_decl_insn > I915_MAX_DECL_INSN) 1210 i915_program_error(p, "Exceeded max DECL instructions"); 1211 1212 if (p->error) { 1213 p->NumNativeInstructions = 0; 1214 p->NumNativeAluInstructions = 0; 1215 p->NumNativeTexInstructions = 0; 1216 p->NumNativeTexIndirections = 0; 1217 1218 i915_use_passthrough_shader(ifs); 1219 } 1220 else { 1221 p->NumNativeInstructions 1222 = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; 1223 p->NumNativeAluInstructions = p->nr_alu_insn; 1224 p->NumNativeTexInstructions = p->nr_tex_insn; 1225 p->NumNativeTexIndirections = p->nr_tex_indirect; 1226 1227 /* patch in the program length */ 1228 p->declarations[0] |= program_size + decl_size - 2; 1229 1230 /* Copy compilation results to fragment program struct: 1231 */ 1232 assert(!ifs->program); 1233 ifs->program 1234 = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); 1235 if (ifs->program) { 1236 ifs->program_len = program_size + decl_size; 1237 1238 memcpy(ifs->program, 1239 p->declarations, 1240 decl_size * sizeof(uint)); 1241 1242 memcpy(ifs->program + decl_size, 1243 p->program, 1244 program_size * sizeof(uint)); 1245 } 1246 } 1247 1248 /* Release the compilation struct: 1249 */ 1250 FREE(p); 1251} 1252 1253 1254 1255 1256 1257/** 1258 * Rather than trying to intercept and jiggle depth writes during 1259 * emit, just move the value into its correct position at the end of 1260 * the program: 1261 */ 1262static void 1263i915_fixup_depth_write(struct i915_fp_compile *p) 1264{ 1265 /* XXX assuming pos/depth is always in output[0] */ 1266 if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 1267 const uint depth = UREG(REG_TYPE_OD, 0); 1268 1269 i915_emit_arith(p, 1270 A0_MOV, /* opcode */ 1271 depth, /* dest reg */ 1272 A0_DEST_CHANNEL_W, /* write mask */ 1273 0, /* saturate? */ 1274 swizzle(depth, X, Y, Z, Z), /* src0 */ 1275 0, 0 /* src1, src2 */); 1276 } 1277} 1278 1279 1280void 1281i915_translate_fragment_program( struct i915_context *i915, 1282 struct i915_fragment_shader *fs) 1283{ 1284 struct i915_fp_compile *p; 1285 const struct tgsi_token *tokens = fs->state.tokens; 1286 1287#if 0 1288 tgsi_dump(tokens, 0); 1289#endif 1290 1291 /* hw doesn't seem to like empty frag programs, even when the depth write 1292 * fixup gets emitted below - may that one is fishy, too? */ 1293 if (fs->info.num_instructions == 1) { 1294 i915_use_passthrough_shader(fs); 1295 1296 return; 1297 } 1298 1299 p = i915_init_compile(i915, fs); 1300 1301 i915_translate_instructions(p, tokens, fs); 1302 i915_fixup_depth_write(p); 1303 1304 i915_fini_compile(i915, p); 1305} 1306