tgsi_exec.c revision 89d8577fb3036547ef0b47498cc8dc5c77f886e0
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65#define TILE_TOP_LEFT 0 66#define TILE_TOP_RIGHT 1 67#define TILE_BOTTOM_LEFT 2 68#define TILE_BOTTOM_RIGHT 3 69 70#define CHAN_X 0 71#define CHAN_Y 1 72#define CHAN_Z 2 73#define CHAN_W 3 74 75/* 76 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 77 */ 78#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 79#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 80#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 81#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 82#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 83#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 84#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 85#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 86#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 87#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 88#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 89#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 90#define TEMP_128_I TGSI_EXEC_TEMP_128_I 91#define TEMP_128_C TGSI_EXEC_TEMP_128_C 92#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 93#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 94#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 95#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 96#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 97#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 98#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 99#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 100#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 101#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 102#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 103#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 104#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 105#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 106#define TEMP_R0 TGSI_EXEC_TEMP_R0 107#define TEMP_P0 TGSI_EXEC_TEMP_P0 108 109#define IS_CHANNEL_ENABLED(INST, CHAN)\ 110 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 111 112#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 113 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) 114 115#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 117 if (IS_CHANNEL_ENABLED( INST, CHAN )) 118 119#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 121 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 122 123 124/** The execution mask depends on the conditional mask and the loop mask */ 125#define UPDATE_EXEC_MASK(MACH) \ 126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 127 128 129static const union tgsi_exec_channel ZeroVec = 130 { { 0.0, 0.0, 0.0, 0.0 } }; 131 132 133#ifdef DEBUG 134static void 135check_inf_or_nan(const union tgsi_exec_channel *chan) 136{ 137 assert(!util_is_inf_or_nan(chan->f[0])); 138 assert(!util_is_inf_or_nan(chan->f[1])); 139 assert(!util_is_inf_or_nan(chan->f[2])); 140 assert(!util_is_inf_or_nan(chan->f[3])); 141} 142#endif 143 144 145#ifdef DEBUG 146static void 147print_chan(const char *msg, const union tgsi_exec_channel *chan) 148{ 149 debug_printf("%s = {%f, %f, %f, %f}\n", 150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 151} 152#endif 153 154 155#ifdef DEBUG 156static void 157print_temp(const struct tgsi_exec_machine *mach, uint index) 158{ 159 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 160 int i; 161 debug_printf("Temp[%u] =\n", index); 162 for (i = 0; i < 4; i++) { 163 debug_printf(" %c: { %f, %f, %f, %f }\n", 164 "XYZW"[i], 165 tmp->xyzw[i].f[0], 166 tmp->xyzw[i].f[1], 167 tmp->xyzw[i].f[2], 168 tmp->xyzw[i].f[3]); 169 } 170} 171#endif 172 173 174/** 175 * Check if there's a potential src/dst register data dependency when 176 * using SOA execution. 177 * Example: 178 * MOV T, T.yxwz; 179 * This would expand into: 180 * MOV t0, t1; 181 * MOV t1, t0; 182 * MOV t2, t3; 183 * MOV t3, t2; 184 * The second instruction will have the wrong value for t0 if executed as-is. 185 */ 186boolean 187tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 188{ 189 uint i, chan; 190 191 uint writemask = inst->Dst[0].Register.WriteMask; 192 if (writemask == TGSI_WRITEMASK_X || 193 writemask == TGSI_WRITEMASK_Y || 194 writemask == TGSI_WRITEMASK_Z || 195 writemask == TGSI_WRITEMASK_W || 196 writemask == TGSI_WRITEMASK_NONE) { 197 /* no chance of data dependency */ 198 return FALSE; 199 } 200 201 /* loop over src regs */ 202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 203 if ((inst->Src[i].Register.File == 204 inst->Dst[0].Register.File) && 205 (inst->Src[i].Register.Index == 206 inst->Dst[0].Register.Index)) { 207 /* loop over dest channels */ 208 uint channelsWritten = 0x0; 209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 210 /* check if we're reading a channel that's been written */ 211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 212 if (channelsWritten & (1 << swizzle)) { 213 return TRUE; 214 } 215 216 channelsWritten |= (1 << chan); 217 } 218 } 219 } 220 return FALSE; 221} 222 223 224/** 225 * Initialize machine state by expanding tokens to full instructions, 226 * allocating temporary storage, setting up constants, etc. 227 * After this, we can call tgsi_exec_machine_run() many times. 228 */ 229void 230tgsi_exec_machine_bind_shader( 231 struct tgsi_exec_machine *mach, 232 const struct tgsi_token *tokens, 233 uint numSamplers, 234 struct tgsi_sampler **samplers) 235{ 236 uint k; 237 struct tgsi_parse_context parse; 238 struct tgsi_exec_labels *labels = &mach->Labels; 239 struct tgsi_full_instruction *instructions; 240 struct tgsi_full_declaration *declarations; 241 uint maxInstructions = 10, numInstructions = 0; 242 uint maxDeclarations = 10, numDeclarations = 0; 243 uint instno = 0; 244 245#if 0 246 tgsi_dump(tokens, 0); 247#endif 248 249 util_init_math(); 250 251 mach->Tokens = tokens; 252 mach->Samplers = samplers; 253 254 k = tgsi_parse_init (&parse, mach->Tokens); 255 if (k != TGSI_PARSE_OK) { 256 debug_printf( "Problem parsing!\n" ); 257 return; 258 } 259 260 mach->Processor = parse.FullHeader.Processor.Processor; 261 mach->ImmLimit = 0; 262 labels->count = 0; 263 264 declarations = (struct tgsi_full_declaration *) 265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 266 267 if (!declarations) { 268 return; 269 } 270 271 instructions = (struct tgsi_full_instruction *) 272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 273 274 if (!instructions) { 275 FREE( declarations ); 276 return; 277 } 278 279 while( !tgsi_parse_end_of_tokens( &parse ) ) { 280 uint pointer = parse.Position; 281 uint i; 282 283 tgsi_parse_token( &parse ); 284 switch( parse.FullToken.Token.Type ) { 285 case TGSI_TOKEN_TYPE_DECLARATION: 286 /* save expanded declaration */ 287 if (numDeclarations == maxDeclarations) { 288 declarations = REALLOC(declarations, 289 maxDeclarations 290 * sizeof(struct tgsi_full_declaration), 291 (maxDeclarations + 10) 292 * sizeof(struct tgsi_full_declaration)); 293 maxDeclarations += 10; 294 } 295 memcpy(declarations + numDeclarations, 296 &parse.FullToken.FullDeclaration, 297 sizeof(declarations[0])); 298 numDeclarations++; 299 break; 300 301 case TGSI_TOKEN_TYPE_IMMEDIATE: 302 { 303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 304 assert( size <= 4 ); 305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 306 307 for( i = 0; i < size; i++ ) { 308 mach->Imms[mach->ImmLimit][i] = 309 parse.FullToken.FullImmediate.u[i].Float; 310 } 311 mach->ImmLimit += 1; 312 } 313 break; 314 315 case TGSI_TOKEN_TYPE_INSTRUCTION: 316 assert( labels->count < MAX_LABELS ); 317 318 labels->labels[labels->count][0] = instno; 319 labels->labels[labels->count][1] = pointer; 320 labels->count++; 321 322 /* save expanded instruction */ 323 if (numInstructions == maxInstructions) { 324 instructions = REALLOC(instructions, 325 maxInstructions 326 * sizeof(struct tgsi_full_instruction), 327 (maxInstructions + 10) 328 * sizeof(struct tgsi_full_instruction)); 329 maxInstructions += 10; 330 } 331 332 memcpy(instructions + numInstructions, 333 &parse.FullToken.FullInstruction, 334 sizeof(instructions[0])); 335 336 numInstructions++; 337 break; 338 339 case TGSI_TOKEN_TYPE_PROPERTY: 340 break; 341 342 default: 343 assert( 0 ); 344 } 345 } 346 tgsi_parse_free (&parse); 347 348 if (mach->Declarations) { 349 FREE( mach->Declarations ); 350 } 351 mach->Declarations = declarations; 352 mach->NumDeclarations = numDeclarations; 353 354 if (mach->Instructions) { 355 FREE( mach->Instructions ); 356 } 357 mach->Instructions = instructions; 358 mach->NumInstructions = numInstructions; 359} 360 361 362struct tgsi_exec_machine * 363tgsi_exec_machine_create( void ) 364{ 365 struct tgsi_exec_machine *mach; 366 uint i; 367 368 mach = align_malloc( sizeof *mach, 16 ); 369 if (!mach) 370 goto fail; 371 372 memset(mach, 0, sizeof(*mach)); 373 374 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 375 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 376 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 377 378 /* Setup constants. */ 379 for( i = 0; i < 4; i++ ) { 380 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 381 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 382 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 383 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 384 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 385 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 386 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 387 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 388 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 389 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 390 } 391 392#ifdef DEBUG 393 /* silence warnings */ 394 (void) print_chan; 395 (void) print_temp; 396#endif 397 398 return mach; 399 400fail: 401 align_free(mach); 402 return NULL; 403} 404 405 406void 407tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 408{ 409 if (mach) { 410 FREE(mach->Instructions); 411 FREE(mach->Declarations); 412 } 413 414 align_free(mach); 415} 416 417 418static void 419micro_abs( 420 union tgsi_exec_channel *dst, 421 const union tgsi_exec_channel *src ) 422{ 423 dst->f[0] = fabsf( src->f[0] ); 424 dst->f[1] = fabsf( src->f[1] ); 425 dst->f[2] = fabsf( src->f[2] ); 426 dst->f[3] = fabsf( src->f[3] ); 427} 428 429static void 430micro_add( 431 union tgsi_exec_channel *dst, 432 const union tgsi_exec_channel *src0, 433 const union tgsi_exec_channel *src1 ) 434{ 435 dst->f[0] = src0->f[0] + src1->f[0]; 436 dst->f[1] = src0->f[1] + src1->f[1]; 437 dst->f[2] = src0->f[2] + src1->f[2]; 438 dst->f[3] = src0->f[3] + src1->f[3]; 439} 440 441#if 0 442static void 443micro_iadd( 444 union tgsi_exec_channel *dst, 445 const union tgsi_exec_channel *src0, 446 const union tgsi_exec_channel *src1 ) 447{ 448 dst->i[0] = src0->i[0] + src1->i[0]; 449 dst->i[1] = src0->i[1] + src1->i[1]; 450 dst->i[2] = src0->i[2] + src1->i[2]; 451 dst->i[3] = src0->i[3] + src1->i[3]; 452} 453#endif 454 455static void 456micro_and( 457 union tgsi_exec_channel *dst, 458 const union tgsi_exec_channel *src0, 459 const union tgsi_exec_channel *src1 ) 460{ 461 dst->u[0] = src0->u[0] & src1->u[0]; 462 dst->u[1] = src0->u[1] & src1->u[1]; 463 dst->u[2] = src0->u[2] & src1->u[2]; 464 dst->u[3] = src0->u[3] & src1->u[3]; 465} 466 467static void 468micro_ceil( 469 union tgsi_exec_channel *dst, 470 const union tgsi_exec_channel *src ) 471{ 472 dst->f[0] = ceilf( src->f[0] ); 473 dst->f[1] = ceilf( src->f[1] ); 474 dst->f[2] = ceilf( src->f[2] ); 475 dst->f[3] = ceilf( src->f[3] ); 476} 477 478static void 479micro_cos( 480 union tgsi_exec_channel *dst, 481 const union tgsi_exec_channel *src ) 482{ 483 dst->f[0] = cosf( src->f[0] ); 484 dst->f[1] = cosf( src->f[1] ); 485 dst->f[2] = cosf( src->f[2] ); 486 dst->f[3] = cosf( src->f[3] ); 487} 488 489static void 490micro_ddx( 491 union tgsi_exec_channel *dst, 492 const union tgsi_exec_channel *src ) 493{ 494 dst->f[0] = 495 dst->f[1] = 496 dst->f[2] = 497 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 498} 499 500static void 501micro_ddy( 502 union tgsi_exec_channel *dst, 503 const union tgsi_exec_channel *src ) 504{ 505 dst->f[0] = 506 dst->f[1] = 507 dst->f[2] = 508 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 509} 510 511static void 512micro_div( 513 union tgsi_exec_channel *dst, 514 const union tgsi_exec_channel *src0, 515 const union tgsi_exec_channel *src1 ) 516{ 517 if (src1->f[0] != 0) { 518 dst->f[0] = src0->f[0] / src1->f[0]; 519 } 520 if (src1->f[1] != 0) { 521 dst->f[1] = src0->f[1] / src1->f[1]; 522 } 523 if (src1->f[2] != 0) { 524 dst->f[2] = src0->f[2] / src1->f[2]; 525 } 526 if (src1->f[3] != 0) { 527 dst->f[3] = src0->f[3] / src1->f[3]; 528 } 529} 530 531#if 0 532static void 533micro_udiv( 534 union tgsi_exec_channel *dst, 535 const union tgsi_exec_channel *src0, 536 const union tgsi_exec_channel *src1 ) 537{ 538 dst->u[0] = src0->u[0] / src1->u[0]; 539 dst->u[1] = src0->u[1] / src1->u[1]; 540 dst->u[2] = src0->u[2] / src1->u[2]; 541 dst->u[3] = src0->u[3] / src1->u[3]; 542} 543#endif 544 545static void 546micro_eq( 547 union tgsi_exec_channel *dst, 548 const union tgsi_exec_channel *src0, 549 const union tgsi_exec_channel *src1, 550 const union tgsi_exec_channel *src2, 551 const union tgsi_exec_channel *src3 ) 552{ 553 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 554 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 555 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 556 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 557} 558 559#if 0 560static void 561micro_ieq( 562 union tgsi_exec_channel *dst, 563 const union tgsi_exec_channel *src0, 564 const union tgsi_exec_channel *src1, 565 const union tgsi_exec_channel *src2, 566 const union tgsi_exec_channel *src3 ) 567{ 568 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 569 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 570 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 571 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 572} 573#endif 574 575static void 576micro_exp2( 577 union tgsi_exec_channel *dst, 578 const union tgsi_exec_channel *src) 579{ 580#if FAST_MATH 581 dst->f[0] = util_fast_exp2( src->f[0] ); 582 dst->f[1] = util_fast_exp2( src->f[1] ); 583 dst->f[2] = util_fast_exp2( src->f[2] ); 584 dst->f[3] = util_fast_exp2( src->f[3] ); 585#else 586 587#if DEBUG 588 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 589 uint i; 590 union tgsi_exec_channel clamped; 591 592 for (i = 0; i < 4; i++) { 593 if (src->f[i] > 127.99999f) { 594 clamped.f[i] = 127.99999f; 595 } else if (src->f[i] < -126.99999f) { 596 clamped.f[i] = -126.99999f; 597 } else { 598 clamped.f[i] = src->f[i]; 599 } 600 } 601 src = &clamped; 602#endif 603 604 dst->f[0] = powf( 2.0f, src->f[0] ); 605 dst->f[1] = powf( 2.0f, src->f[1] ); 606 dst->f[2] = powf( 2.0f, src->f[2] ); 607 dst->f[3] = powf( 2.0f, src->f[3] ); 608#endif 609} 610 611#if 0 612static void 613micro_f2ut( 614 union tgsi_exec_channel *dst, 615 const union tgsi_exec_channel *src ) 616{ 617 dst->u[0] = (uint) src->f[0]; 618 dst->u[1] = (uint) src->f[1]; 619 dst->u[2] = (uint) src->f[2]; 620 dst->u[3] = (uint) src->f[3]; 621} 622#endif 623 624static void 625micro_float_clamp(union tgsi_exec_channel *dst, 626 const union tgsi_exec_channel *src) 627{ 628 uint i; 629 630 for (i = 0; i < 4; i++) { 631 if (src->f[i] > 0.0f) { 632 if (src->f[i] > 1.884467e+019f) 633 dst->f[i] = 1.884467e+019f; 634 else if (src->f[i] < 5.42101e-020f) 635 dst->f[i] = 5.42101e-020f; 636 else 637 dst->f[i] = src->f[i]; 638 } 639 else { 640 if (src->f[i] < -1.884467e+019f) 641 dst->f[i] = -1.884467e+019f; 642 else if (src->f[i] > -5.42101e-020f) 643 dst->f[i] = -5.42101e-020f; 644 else 645 dst->f[i] = src->f[i]; 646 } 647 } 648} 649 650static void 651micro_flr( 652 union tgsi_exec_channel *dst, 653 const union tgsi_exec_channel *src ) 654{ 655 dst->f[0] = floorf( src->f[0] ); 656 dst->f[1] = floorf( src->f[1] ); 657 dst->f[2] = floorf( src->f[2] ); 658 dst->f[3] = floorf( src->f[3] ); 659} 660 661static void 662micro_frc( 663 union tgsi_exec_channel *dst, 664 const union tgsi_exec_channel *src ) 665{ 666 dst->f[0] = src->f[0] - floorf( src->f[0] ); 667 dst->f[1] = src->f[1] - floorf( src->f[1] ); 668 dst->f[2] = src->f[2] - floorf( src->f[2] ); 669 dst->f[3] = src->f[3] - floorf( src->f[3] ); 670} 671 672static void 673micro_i2f( 674 union tgsi_exec_channel *dst, 675 const union tgsi_exec_channel *src ) 676{ 677 dst->f[0] = (float) src->i[0]; 678 dst->f[1] = (float) src->i[1]; 679 dst->f[2] = (float) src->i[2]; 680 dst->f[3] = (float) src->i[3]; 681} 682 683static void 684micro_lg2( 685 union tgsi_exec_channel *dst, 686 const union tgsi_exec_channel *src ) 687{ 688#if FAST_MATH 689 dst->f[0] = util_fast_log2( src->f[0] ); 690 dst->f[1] = util_fast_log2( src->f[1] ); 691 dst->f[2] = util_fast_log2( src->f[2] ); 692 dst->f[3] = util_fast_log2( src->f[3] ); 693#else 694 dst->f[0] = logf( src->f[0] ) * 1.442695f; 695 dst->f[1] = logf( src->f[1] ) * 1.442695f; 696 dst->f[2] = logf( src->f[2] ) * 1.442695f; 697 dst->f[3] = logf( src->f[3] ) * 1.442695f; 698#endif 699} 700 701static void 702micro_le( 703 union tgsi_exec_channel *dst, 704 const union tgsi_exec_channel *src0, 705 const union tgsi_exec_channel *src1, 706 const union tgsi_exec_channel *src2, 707 const union tgsi_exec_channel *src3 ) 708{ 709 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 710 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 711 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 712 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 713} 714 715static void 716micro_lt( 717 union tgsi_exec_channel *dst, 718 const union tgsi_exec_channel *src0, 719 const union tgsi_exec_channel *src1, 720 const union tgsi_exec_channel *src2, 721 const union tgsi_exec_channel *src3 ) 722{ 723 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 724 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 725 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 726 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 727} 728 729#if 0 730static void 731micro_ilt( 732 union tgsi_exec_channel *dst, 733 const union tgsi_exec_channel *src0, 734 const union tgsi_exec_channel *src1, 735 const union tgsi_exec_channel *src2, 736 const union tgsi_exec_channel *src3 ) 737{ 738 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 739 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 740 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 741 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 742} 743#endif 744 745#if 0 746static void 747micro_ult( 748 union tgsi_exec_channel *dst, 749 const union tgsi_exec_channel *src0, 750 const union tgsi_exec_channel *src1, 751 const union tgsi_exec_channel *src2, 752 const union tgsi_exec_channel *src3 ) 753{ 754 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 755 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 756 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 757 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 758} 759#endif 760 761static void 762micro_max( 763 union tgsi_exec_channel *dst, 764 const union tgsi_exec_channel *src0, 765 const union tgsi_exec_channel *src1 ) 766{ 767 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 768 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 769 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 770 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 771} 772 773#if 0 774static void 775micro_imax( 776 union tgsi_exec_channel *dst, 777 const union tgsi_exec_channel *src0, 778 const union tgsi_exec_channel *src1 ) 779{ 780 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 781 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 782 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 783 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 784} 785#endif 786 787#if 0 788static void 789micro_umax( 790 union tgsi_exec_channel *dst, 791 const union tgsi_exec_channel *src0, 792 const union tgsi_exec_channel *src1 ) 793{ 794 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 795 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 796 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 797 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 798} 799#endif 800 801static void 802micro_min( 803 union tgsi_exec_channel *dst, 804 const union tgsi_exec_channel *src0, 805 const union tgsi_exec_channel *src1 ) 806{ 807 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 808 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 809 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 810 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 811} 812 813#if 0 814static void 815micro_imin( 816 union tgsi_exec_channel *dst, 817 const union tgsi_exec_channel *src0, 818 const union tgsi_exec_channel *src1 ) 819{ 820 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 821 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 822 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 823 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 824} 825#endif 826 827#if 0 828static void 829micro_umin( 830 union tgsi_exec_channel *dst, 831 const union tgsi_exec_channel *src0, 832 const union tgsi_exec_channel *src1 ) 833{ 834 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 835 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 836 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 837 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 838} 839#endif 840 841#if 0 842static void 843micro_umod( 844 union tgsi_exec_channel *dst, 845 const union tgsi_exec_channel *src0, 846 const union tgsi_exec_channel *src1 ) 847{ 848 dst->u[0] = src0->u[0] % src1->u[0]; 849 dst->u[1] = src0->u[1] % src1->u[1]; 850 dst->u[2] = src0->u[2] % src1->u[2]; 851 dst->u[3] = src0->u[3] % src1->u[3]; 852} 853#endif 854 855static void 856micro_mul( 857 union tgsi_exec_channel *dst, 858 const union tgsi_exec_channel *src0, 859 const union tgsi_exec_channel *src1 ) 860{ 861 dst->f[0] = src0->f[0] * src1->f[0]; 862 dst->f[1] = src0->f[1] * src1->f[1]; 863 dst->f[2] = src0->f[2] * src1->f[2]; 864 dst->f[3] = src0->f[3] * src1->f[3]; 865} 866 867#if 0 868static void 869micro_imul( 870 union tgsi_exec_channel *dst, 871 const union tgsi_exec_channel *src0, 872 const union tgsi_exec_channel *src1 ) 873{ 874 dst->i[0] = src0->i[0] * src1->i[0]; 875 dst->i[1] = src0->i[1] * src1->i[1]; 876 dst->i[2] = src0->i[2] * src1->i[2]; 877 dst->i[3] = src0->i[3] * src1->i[3]; 878} 879#endif 880 881#if 0 882static void 883micro_imul64( 884 union tgsi_exec_channel *dst0, 885 union tgsi_exec_channel *dst1, 886 const union tgsi_exec_channel *src0, 887 const union tgsi_exec_channel *src1 ) 888{ 889 dst1->i[0] = src0->i[0] * src1->i[0]; 890 dst1->i[1] = src0->i[1] * src1->i[1]; 891 dst1->i[2] = src0->i[2] * src1->i[2]; 892 dst1->i[3] = src0->i[3] * src1->i[3]; 893 dst0->i[0] = 0; 894 dst0->i[1] = 0; 895 dst0->i[2] = 0; 896 dst0->i[3] = 0; 897} 898#endif 899 900#if 0 901static void 902micro_umul64( 903 union tgsi_exec_channel *dst0, 904 union tgsi_exec_channel *dst1, 905 const union tgsi_exec_channel *src0, 906 const union tgsi_exec_channel *src1 ) 907{ 908 dst1->u[0] = src0->u[0] * src1->u[0]; 909 dst1->u[1] = src0->u[1] * src1->u[1]; 910 dst1->u[2] = src0->u[2] * src1->u[2]; 911 dst1->u[3] = src0->u[3] * src1->u[3]; 912 dst0->u[0] = 0; 913 dst0->u[1] = 0; 914 dst0->u[2] = 0; 915 dst0->u[3] = 0; 916} 917#endif 918 919 920#if 0 921static void 922micro_movc( 923 union tgsi_exec_channel *dst, 924 const union tgsi_exec_channel *src0, 925 const union tgsi_exec_channel *src1, 926 const union tgsi_exec_channel *src2 ) 927{ 928 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 929 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 930 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 931 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 932} 933#endif 934 935static void 936micro_neg( 937 union tgsi_exec_channel *dst, 938 const union tgsi_exec_channel *src ) 939{ 940 dst->f[0] = -src->f[0]; 941 dst->f[1] = -src->f[1]; 942 dst->f[2] = -src->f[2]; 943 dst->f[3] = -src->f[3]; 944} 945 946#if 0 947static void 948micro_ineg( 949 union tgsi_exec_channel *dst, 950 const union tgsi_exec_channel *src ) 951{ 952 dst->i[0] = -src->i[0]; 953 dst->i[1] = -src->i[1]; 954 dst->i[2] = -src->i[2]; 955 dst->i[3] = -src->i[3]; 956} 957#endif 958 959static void 960micro_not( 961 union tgsi_exec_channel *dst, 962 const union tgsi_exec_channel *src ) 963{ 964 dst->u[0] = ~src->u[0]; 965 dst->u[1] = ~src->u[1]; 966 dst->u[2] = ~src->u[2]; 967 dst->u[3] = ~src->u[3]; 968} 969 970static void 971micro_or( 972 union tgsi_exec_channel *dst, 973 const union tgsi_exec_channel *src0, 974 const union tgsi_exec_channel *src1 ) 975{ 976 dst->u[0] = src0->u[0] | src1->u[0]; 977 dst->u[1] = src0->u[1] | src1->u[1]; 978 dst->u[2] = src0->u[2] | src1->u[2]; 979 dst->u[3] = src0->u[3] | src1->u[3]; 980} 981 982static void 983micro_pow( 984 union tgsi_exec_channel *dst, 985 const union tgsi_exec_channel *src0, 986 const union tgsi_exec_channel *src1 ) 987{ 988#if FAST_MATH 989 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 990 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 991 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 992 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 993#else 994 dst->f[0] = powf( src0->f[0], src1->f[0] ); 995 dst->f[1] = powf( src0->f[1], src1->f[1] ); 996 dst->f[2] = powf( src0->f[2], src1->f[2] ); 997 dst->f[3] = powf( src0->f[3], src1->f[3] ); 998#endif 999} 1000 1001static void 1002micro_rnd( 1003 union tgsi_exec_channel *dst, 1004 const union tgsi_exec_channel *src ) 1005{ 1006 dst->f[0] = floorf( src->f[0] + 0.5f ); 1007 dst->f[1] = floorf( src->f[1] + 0.5f ); 1008 dst->f[2] = floorf( src->f[2] + 0.5f ); 1009 dst->f[3] = floorf( src->f[3] + 0.5f ); 1010} 1011 1012static void 1013micro_sgn( 1014 union tgsi_exec_channel *dst, 1015 const union tgsi_exec_channel *src ) 1016{ 1017 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 1018 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 1019 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 1020 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1021} 1022 1023static void 1024micro_shl( 1025 union tgsi_exec_channel *dst, 1026 const union tgsi_exec_channel *src0, 1027 const union tgsi_exec_channel *src1 ) 1028{ 1029 dst->i[0] = src0->i[0] << src1->i[0]; 1030 dst->i[1] = src0->i[1] << src1->i[1]; 1031 dst->i[2] = src0->i[2] << src1->i[2]; 1032 dst->i[3] = src0->i[3] << src1->i[3]; 1033} 1034 1035static void 1036micro_ishr( 1037 union tgsi_exec_channel *dst, 1038 const union tgsi_exec_channel *src0, 1039 const union tgsi_exec_channel *src1 ) 1040{ 1041 dst->i[0] = src0->i[0] >> src1->i[0]; 1042 dst->i[1] = src0->i[1] >> src1->i[1]; 1043 dst->i[2] = src0->i[2] >> src1->i[2]; 1044 dst->i[3] = src0->i[3] >> src1->i[3]; 1045} 1046 1047static void 1048micro_trunc( 1049 union tgsi_exec_channel *dst, 1050 const union tgsi_exec_channel *src0 ) 1051{ 1052 dst->f[0] = (float) (int) src0->f[0]; 1053 dst->f[1] = (float) (int) src0->f[1]; 1054 dst->f[2] = (float) (int) src0->f[2]; 1055 dst->f[3] = (float) (int) src0->f[3]; 1056} 1057 1058#if 0 1059static void 1060micro_ushr( 1061 union tgsi_exec_channel *dst, 1062 const union tgsi_exec_channel *src0, 1063 const union tgsi_exec_channel *src1 ) 1064{ 1065 dst->u[0] = src0->u[0] >> src1->u[0]; 1066 dst->u[1] = src0->u[1] >> src1->u[1]; 1067 dst->u[2] = src0->u[2] >> src1->u[2]; 1068 dst->u[3] = src0->u[3] >> src1->u[3]; 1069} 1070#endif 1071 1072static void 1073micro_sin( 1074 union tgsi_exec_channel *dst, 1075 const union tgsi_exec_channel *src ) 1076{ 1077 dst->f[0] = sinf( src->f[0] ); 1078 dst->f[1] = sinf( src->f[1] ); 1079 dst->f[2] = sinf( src->f[2] ); 1080 dst->f[3] = sinf( src->f[3] ); 1081} 1082 1083static void 1084micro_sqrt( union tgsi_exec_channel *dst, 1085 const union tgsi_exec_channel *src ) 1086{ 1087 dst->f[0] = sqrtf( src->f[0] ); 1088 dst->f[1] = sqrtf( src->f[1] ); 1089 dst->f[2] = sqrtf( src->f[2] ); 1090 dst->f[3] = sqrtf( src->f[3] ); 1091} 1092 1093static void 1094micro_sub( 1095 union tgsi_exec_channel *dst, 1096 const union tgsi_exec_channel *src0, 1097 const union tgsi_exec_channel *src1 ) 1098{ 1099 dst->f[0] = src0->f[0] - src1->f[0]; 1100 dst->f[1] = src0->f[1] - src1->f[1]; 1101 dst->f[2] = src0->f[2] - src1->f[2]; 1102 dst->f[3] = src0->f[3] - src1->f[3]; 1103} 1104 1105#if 0 1106static void 1107micro_u2f( 1108 union tgsi_exec_channel *dst, 1109 const union tgsi_exec_channel *src ) 1110{ 1111 dst->f[0] = (float) src->u[0]; 1112 dst->f[1] = (float) src->u[1]; 1113 dst->f[2] = (float) src->u[2]; 1114 dst->f[3] = (float) src->u[3]; 1115} 1116#endif 1117 1118static void 1119micro_xor( 1120 union tgsi_exec_channel *dst, 1121 const union tgsi_exec_channel *src0, 1122 const union tgsi_exec_channel *src1 ) 1123{ 1124 dst->u[0] = src0->u[0] ^ src1->u[0]; 1125 dst->u[1] = src0->u[1] ^ src1->u[1]; 1126 dst->u[2] = src0->u[2] ^ src1->u[2]; 1127 dst->u[3] = src0->u[3] ^ src1->u[3]; 1128} 1129 1130static void 1131fetch_src_file_channel( 1132 const struct tgsi_exec_machine *mach, 1133 const uint file, 1134 const uint swizzle, 1135 const union tgsi_exec_channel *index, 1136 union tgsi_exec_channel *chan ) 1137{ 1138 switch( swizzle ) { 1139 case TGSI_SWIZZLE_X: 1140 case TGSI_SWIZZLE_Y: 1141 case TGSI_SWIZZLE_Z: 1142 case TGSI_SWIZZLE_W: 1143 switch( file ) { 1144 case TGSI_FILE_CONSTANT: 1145 assert(mach->Consts); 1146 if (index->i[0] < 0) 1147 chan->f[0] = 0.0f; 1148 else 1149 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1150 if (index->i[1] < 0) 1151 chan->f[1] = 0.0f; 1152 else 1153 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1154 if (index->i[2] < 0) 1155 chan->f[2] = 0.0f; 1156 else 1157 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1158 if (index->i[3] < 0) 1159 chan->f[3] = 0.0f; 1160 else 1161 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1162 break; 1163 1164 case TGSI_FILE_INPUT: 1165 case TGSI_FILE_SYSTEM_VALUE: 1166 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1167 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1168 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1169 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1170 break; 1171 1172 case TGSI_FILE_TEMPORARY: 1173 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1174 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1175 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1176 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1177 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1178 break; 1179 1180 case TGSI_FILE_IMMEDIATE: 1181 assert( index->i[0] < (int) mach->ImmLimit ); 1182 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1183 assert( index->i[1] < (int) mach->ImmLimit ); 1184 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1185 assert( index->i[2] < (int) mach->ImmLimit ); 1186 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1187 assert( index->i[3] < (int) mach->ImmLimit ); 1188 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1189 break; 1190 1191 case TGSI_FILE_ADDRESS: 1192 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1193 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1194 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1195 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1196 break; 1197 1198 case TGSI_FILE_PREDICATE: 1199 assert(index->i[0] < TGSI_EXEC_NUM_PREDS); 1200 assert(index->i[1] < TGSI_EXEC_NUM_PREDS); 1201 assert(index->i[2] < TGSI_EXEC_NUM_PREDS); 1202 assert(index->i[3] < TGSI_EXEC_NUM_PREDS); 1203 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; 1204 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; 1205 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; 1206 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; 1207 break; 1208 1209 case TGSI_FILE_OUTPUT: 1210 /* vertex/fragment output vars can be read too */ 1211 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1212 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1213 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1214 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1215 break; 1216 1217 default: 1218 assert( 0 ); 1219 } 1220 break; 1221 1222 default: 1223 assert( 0 ); 1224 } 1225} 1226 1227static void 1228fetch_source( 1229 const struct tgsi_exec_machine *mach, 1230 union tgsi_exec_channel *chan, 1231 const struct tgsi_full_src_register *reg, 1232 const uint chan_index ) 1233{ 1234 union tgsi_exec_channel index; 1235 uint swizzle; 1236 1237 /* We start with a direct index into a register file. 1238 * 1239 * file[1], 1240 * where: 1241 * file = Register.File 1242 * [1] = Register.Index 1243 */ 1244 index.i[0] = 1245 index.i[1] = 1246 index.i[2] = 1247 index.i[3] = reg->Register.Index; 1248 1249 /* There is an extra source register that indirectly subscripts 1250 * a register file. The direct index now becomes an offset 1251 * that is being added to the indirect register. 1252 * 1253 * file[ind[2].x+1], 1254 * where: 1255 * ind = Indirect.File 1256 * [2] = Indirect.Index 1257 * .x = Indirect.SwizzleX 1258 */ 1259 if (reg->Register.Indirect) { 1260 union tgsi_exec_channel index2; 1261 union tgsi_exec_channel indir_index; 1262 const uint execmask = mach->ExecMask; 1263 uint i; 1264 1265 /* which address register (always zero now) */ 1266 index2.i[0] = 1267 index2.i[1] = 1268 index2.i[2] = 1269 index2.i[3] = reg->Indirect.Index; 1270 1271 /* get current value of address register[swizzle] */ 1272 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1273 fetch_src_file_channel( 1274 mach, 1275 reg->Indirect.File, 1276 swizzle, 1277 &index2, 1278 &indir_index ); 1279 1280 /* add value of address register to the offset */ 1281 index.i[0] += (int) indir_index.f[0]; 1282 index.i[1] += (int) indir_index.f[1]; 1283 index.i[2] += (int) indir_index.f[2]; 1284 index.i[3] += (int) indir_index.f[3]; 1285 1286 /* for disabled execution channels, zero-out the index to 1287 * avoid using a potential garbage value. 1288 */ 1289 for (i = 0; i < QUAD_SIZE; i++) { 1290 if ((execmask & (1 << i)) == 0) 1291 index.i[i] = 0; 1292 } 1293 } 1294 1295 /* There is an extra source register that is a second 1296 * subscript to a register file. Effectively it means that 1297 * the register file is actually a 2D array of registers. 1298 * 1299 * file[1][3] == file[1*sizeof(file[1])+3], 1300 * where: 1301 * [3] = Dimension.Index 1302 */ 1303 if (reg->Register.Dimension) { 1304 /* The size of the first-order array depends on the register file type. 1305 * We need to multiply the index to the first array to get an effective, 1306 * "flat" index that points to the beginning of the second-order array. 1307 */ 1308 switch (reg->Register.File) { 1309 case TGSI_FILE_INPUT: 1310 case TGSI_FILE_SYSTEM_VALUE: 1311 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1312 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1313 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1314 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1315 break; 1316 case TGSI_FILE_CONSTANT: 1317 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1318 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1319 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1320 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1321 break; 1322 default: 1323 assert( 0 ); 1324 } 1325 1326 index.i[0] += reg->Dimension.Index; 1327 index.i[1] += reg->Dimension.Index; 1328 index.i[2] += reg->Dimension.Index; 1329 index.i[3] += reg->Dimension.Index; 1330 1331 /* Again, the second subscript index can be addressed indirectly 1332 * identically to the first one. 1333 * Nothing stops us from indirectly addressing the indirect register, 1334 * but there is no need for that, so we won't exercise it. 1335 * 1336 * file[1][ind[4].y+3], 1337 * where: 1338 * ind = DimIndirect.File 1339 * [4] = DimIndirect.Index 1340 * .y = DimIndirect.SwizzleX 1341 */ 1342 if (reg->Dimension.Indirect) { 1343 union tgsi_exec_channel index2; 1344 union tgsi_exec_channel indir_index; 1345 const uint execmask = mach->ExecMask; 1346 uint i; 1347 1348 index2.i[0] = 1349 index2.i[1] = 1350 index2.i[2] = 1351 index2.i[3] = reg->DimIndirect.Index; 1352 1353 swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); 1354 fetch_src_file_channel( 1355 mach, 1356 reg->DimIndirect.File, 1357 swizzle, 1358 &index2, 1359 &indir_index ); 1360 1361 index.i[0] += (int) indir_index.f[0]; 1362 index.i[1] += (int) indir_index.f[1]; 1363 index.i[2] += (int) indir_index.f[2]; 1364 index.i[3] += (int) indir_index.f[3]; 1365 1366 /* for disabled execution channels, zero-out the index to 1367 * avoid using a potential garbage value. 1368 */ 1369 for (i = 0; i < QUAD_SIZE; i++) { 1370 if ((execmask & (1 << i)) == 0) 1371 index.i[i] = 0; 1372 } 1373 } 1374 1375 /* If by any chance there was a need for a 3D array of register 1376 * files, we would have to check whether Dimension is followed 1377 * by a dimension register and continue the saga. 1378 */ 1379 } 1380 1381 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1382 fetch_src_file_channel( 1383 mach, 1384 reg->Register.File, 1385 swizzle, 1386 &index, 1387 chan ); 1388 1389 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1390 case TGSI_UTIL_SIGN_CLEAR: 1391 micro_abs( chan, chan ); 1392 break; 1393 1394 case TGSI_UTIL_SIGN_SET: 1395 micro_abs( chan, chan ); 1396 micro_neg( chan, chan ); 1397 break; 1398 1399 case TGSI_UTIL_SIGN_TOGGLE: 1400 micro_neg( chan, chan ); 1401 break; 1402 1403 case TGSI_UTIL_SIGN_KEEP: 1404 break; 1405 } 1406} 1407 1408static void 1409store_dest( 1410 struct tgsi_exec_machine *mach, 1411 const union tgsi_exec_channel *chan, 1412 const struct tgsi_full_dst_register *reg, 1413 const struct tgsi_full_instruction *inst, 1414 uint chan_index ) 1415{ 1416 uint i; 1417 union tgsi_exec_channel null; 1418 union tgsi_exec_channel *dst; 1419 uint execmask = mach->ExecMask; 1420 int offset = 0; /* indirection offset */ 1421 int index; 1422 1423#ifdef DEBUG 1424 check_inf_or_nan(chan); 1425#endif 1426 1427 /* There is an extra source register that indirectly subscripts 1428 * a register file. The direct index now becomes an offset 1429 * that is being added to the indirect register. 1430 * 1431 * file[ind[2].x+1], 1432 * where: 1433 * ind = Indirect.File 1434 * [2] = Indirect.Index 1435 * .x = Indirect.SwizzleX 1436 */ 1437 if (reg->Register.Indirect) { 1438 union tgsi_exec_channel index; 1439 union tgsi_exec_channel indir_index; 1440 uint swizzle; 1441 1442 /* which address register (always zero for now) */ 1443 index.i[0] = 1444 index.i[1] = 1445 index.i[2] = 1446 index.i[3] = reg->Indirect.Index; 1447 1448 /* get current value of address register[swizzle] */ 1449 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1450 1451 /* fetch values from the address/indirection register */ 1452 fetch_src_file_channel( 1453 mach, 1454 reg->Indirect.File, 1455 swizzle, 1456 &index, 1457 &indir_index ); 1458 1459 /* save indirection offset */ 1460 offset = (int) indir_index.f[0]; 1461 } 1462 1463 switch (reg->Register.File) { 1464 case TGSI_FILE_NULL: 1465 dst = &null; 1466 break; 1467 1468 case TGSI_FILE_OUTPUT: 1469 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1470 + reg->Register.Index; 1471 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1472#if 0 1473 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1474 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", index, execmask); 1475 for (i = 0; i < QUAD_SIZE; i++) 1476 if (execmask & (1 << i)) 1477 fprintf(stderr, "%f, ", chan->f[i]); 1478 fprintf(stderr, ")\n"); 1479 } 1480#endif 1481 break; 1482 1483 case TGSI_FILE_TEMPORARY: 1484 index = reg->Register.Index; 1485 assert( index < TGSI_EXEC_NUM_TEMPS ); 1486 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1487 break; 1488 1489 case TGSI_FILE_ADDRESS: 1490 index = reg->Register.Index; 1491 dst = &mach->Addrs[index].xyzw[chan_index]; 1492 break; 1493 1494 case TGSI_FILE_LOOP: 1495 assert(reg->Register.Index == 0); 1496 assert(mach->LoopCounterStackTop > 0); 1497 assert(chan_index == CHAN_X); 1498 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; 1499 break; 1500 1501 case TGSI_FILE_PREDICATE: 1502 index = reg->Register.Index; 1503 assert(index < TGSI_EXEC_NUM_PREDS); 1504 dst = &mach->Predicates[index].xyzw[chan_index]; 1505 break; 1506 1507 default: 1508 assert( 0 ); 1509 return; 1510 } 1511 1512 if (inst->Instruction.Predicate) { 1513 uint swizzle; 1514 union tgsi_exec_channel *pred; 1515 1516 switch (chan_index) { 1517 case CHAN_X: 1518 swizzle = inst->Predicate.SwizzleX; 1519 break; 1520 case CHAN_Y: 1521 swizzle = inst->Predicate.SwizzleY; 1522 break; 1523 case CHAN_Z: 1524 swizzle = inst->Predicate.SwizzleZ; 1525 break; 1526 case CHAN_W: 1527 swizzle = inst->Predicate.SwizzleW; 1528 break; 1529 default: 1530 assert(0); 1531 return; 1532 } 1533 1534 assert(inst->Predicate.Index == 0); 1535 1536 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; 1537 1538 if (inst->Predicate.Negate) { 1539 for (i = 0; i < QUAD_SIZE; i++) { 1540 if (pred->u[i]) { 1541 execmask &= ~(1 << i); 1542 } 1543 } 1544 } else { 1545 for (i = 0; i < QUAD_SIZE; i++) { 1546 if (!pred->u[i]) { 1547 execmask &= ~(1 << i); 1548 } 1549 } 1550 } 1551 } 1552 1553 switch (inst->Instruction.Saturate) { 1554 case TGSI_SAT_NONE: 1555 for (i = 0; i < QUAD_SIZE; i++) 1556 if (execmask & (1 << i)) 1557 dst->i[i] = chan->i[i]; 1558 break; 1559 1560 case TGSI_SAT_ZERO_ONE: 1561 for (i = 0; i < QUAD_SIZE; i++) 1562 if (execmask & (1 << i)) { 1563 if (chan->f[i] < 0.0f) 1564 dst->f[i] = 0.0f; 1565 else if (chan->f[i] > 1.0f) 1566 dst->f[i] = 1.0f; 1567 else 1568 dst->i[i] = chan->i[i]; 1569 } 1570 break; 1571 1572 case TGSI_SAT_MINUS_PLUS_ONE: 1573 for (i = 0; i < QUAD_SIZE; i++) 1574 if (execmask & (1 << i)) { 1575 if (chan->f[i] < -1.0f) 1576 dst->f[i] = -1.0f; 1577 else if (chan->f[i] > 1.0f) 1578 dst->f[i] = 1.0f; 1579 else 1580 dst->i[i] = chan->i[i]; 1581 } 1582 break; 1583 1584 default: 1585 assert( 0 ); 1586 } 1587} 1588 1589#define FETCH(VAL,INDEX,CHAN)\ 1590 fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) 1591 1592#define STORE(VAL,INDEX,CHAN)\ 1593 store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) 1594 1595 1596/** 1597 * Execute ARB-style KIL which is predicated by a src register. 1598 * Kill fragment if any of the four values is less than zero. 1599 */ 1600static void 1601exec_kil(struct tgsi_exec_machine *mach, 1602 const struct tgsi_full_instruction *inst) 1603{ 1604 uint uniquemask; 1605 uint chan_index; 1606 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1607 union tgsi_exec_channel r[1]; 1608 1609 /* This mask stores component bits that were already tested. */ 1610 uniquemask = 0; 1611 1612 for (chan_index = 0; chan_index < 4; chan_index++) 1613 { 1614 uint swizzle; 1615 uint i; 1616 1617 /* unswizzle channel */ 1618 swizzle = tgsi_util_get_full_src_register_swizzle ( 1619 &inst->Src[0], 1620 chan_index); 1621 1622 /* check if the component has not been already tested */ 1623 if (uniquemask & (1 << swizzle)) 1624 continue; 1625 uniquemask |= 1 << swizzle; 1626 1627 FETCH(&r[0], 0, chan_index); 1628 for (i = 0; i < 4; i++) 1629 if (r[0].f[i] < 0.0f) 1630 kilmask |= 1 << i; 1631 } 1632 1633 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1634} 1635 1636/** 1637 * Execute NVIDIA-style KIL which is predicated by a condition code. 1638 * Kill fragment if the condition code is TRUE. 1639 */ 1640static void 1641exec_kilp(struct tgsi_exec_machine *mach, 1642 const struct tgsi_full_instruction *inst) 1643{ 1644 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1645 1646 /* "unconditional" kil */ 1647 kilmask = mach->ExecMask; 1648 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1649} 1650 1651static void 1652emit_vertex(struct tgsi_exec_machine *mach) 1653{ 1654 /* FIXME: check for exec mask correctly 1655 unsigned i; 1656 for (i = 0; i < QUAD_SIZE; ++i) { 1657 if ((mach->ExecMask & (1 << i))) 1658 */ 1659 if (mach->ExecMask) { 1660 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 1661 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 1662 } 1663} 1664 1665static void 1666emit_primitive(struct tgsi_exec_machine *mach) 1667{ 1668 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 1669 /* FIXME: check for exec mask correctly 1670 unsigned i; 1671 for (i = 0; i < QUAD_SIZE; ++i) { 1672 if ((mach->ExecMask & (1 << i))) 1673 */ 1674 if (mach->ExecMask) { 1675 ++(*prim_count); 1676 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 1677 mach->Primitives[*prim_count] = 0; 1678 } 1679} 1680 1681/* 1682 * Fetch a four texture samples using STR texture coordinates. 1683 */ 1684static void 1685fetch_texel( struct tgsi_sampler *sampler, 1686 const union tgsi_exec_channel *s, 1687 const union tgsi_exec_channel *t, 1688 const union tgsi_exec_channel *p, 1689 float lodbias, /* XXX should be float[4] */ 1690 union tgsi_exec_channel *r, 1691 union tgsi_exec_channel *g, 1692 union tgsi_exec_channel *b, 1693 union tgsi_exec_channel *a ) 1694{ 1695 uint j; 1696 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1697 1698 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1699 1700 for (j = 0; j < 4; j++) { 1701 r->f[j] = rgba[0][j]; 1702 g->f[j] = rgba[1][j]; 1703 b->f[j] = rgba[2][j]; 1704 a->f[j] = rgba[3][j]; 1705 } 1706} 1707 1708 1709static void 1710exec_tex(struct tgsi_exec_machine *mach, 1711 const struct tgsi_full_instruction *inst, 1712 boolean biasLod, 1713 boolean projected) 1714{ 1715 const uint unit = inst->Src[1].Register.Index; 1716 union tgsi_exec_channel r[4]; 1717 uint chan_index; 1718 float lodBias; 1719 1720 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1721 1722 switch (inst->Texture.Texture) { 1723 case TGSI_TEXTURE_1D: 1724 case TGSI_TEXTURE_SHADOW1D: 1725 1726 FETCH(&r[0], 0, CHAN_X); 1727 1728 if (projected) { 1729 FETCH(&r[1], 0, CHAN_W); 1730 micro_div( &r[0], &r[0], &r[1] ); 1731 } 1732 1733 if (biasLod) { 1734 FETCH(&r[1], 0, CHAN_W); 1735 lodBias = r[2].f[0]; 1736 } 1737 else 1738 lodBias = 0.0; 1739 1740 fetch_texel(mach->Samplers[unit], 1741 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1742 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1743 break; 1744 1745 case TGSI_TEXTURE_2D: 1746 case TGSI_TEXTURE_RECT: 1747 case TGSI_TEXTURE_SHADOW2D: 1748 case TGSI_TEXTURE_SHADOWRECT: 1749 1750 FETCH(&r[0], 0, CHAN_X); 1751 FETCH(&r[1], 0, CHAN_Y); 1752 FETCH(&r[2], 0, CHAN_Z); 1753 1754 if (projected) { 1755 FETCH(&r[3], 0, CHAN_W); 1756 micro_div( &r[0], &r[0], &r[3] ); 1757 micro_div( &r[1], &r[1], &r[3] ); 1758 micro_div( &r[2], &r[2], &r[3] ); 1759 } 1760 1761 if (biasLod) { 1762 FETCH(&r[3], 0, CHAN_W); 1763 lodBias = r[3].f[0]; 1764 } 1765 else 1766 lodBias = 0.0; 1767 1768 fetch_texel(mach->Samplers[unit], 1769 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1770 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1771 break; 1772 1773 case TGSI_TEXTURE_3D: 1774 case TGSI_TEXTURE_CUBE: 1775 1776 FETCH(&r[0], 0, CHAN_X); 1777 FETCH(&r[1], 0, CHAN_Y); 1778 FETCH(&r[2], 0, CHAN_Z); 1779 1780 if (projected) { 1781 FETCH(&r[3], 0, CHAN_W); 1782 micro_div( &r[0], &r[0], &r[3] ); 1783 micro_div( &r[1], &r[1], &r[3] ); 1784 micro_div( &r[2], &r[2], &r[3] ); 1785 } 1786 1787 if (biasLod) { 1788 FETCH(&r[3], 0, CHAN_W); 1789 lodBias = r[3].f[0]; 1790 } 1791 else 1792 lodBias = 0.0; 1793 1794 fetch_texel(mach->Samplers[unit], 1795 &r[0], &r[1], &r[2], lodBias, 1796 &r[0], &r[1], &r[2], &r[3]); 1797 break; 1798 1799 default: 1800 assert (0); 1801 } 1802 1803 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1804 STORE( &r[chan_index], 0, chan_index ); 1805 } 1806} 1807 1808static void 1809exec_txd(struct tgsi_exec_machine *mach, 1810 const struct tgsi_full_instruction *inst) 1811{ 1812 const uint unit = inst->Src[3].Register.Index; 1813 union tgsi_exec_channel r[4]; 1814 uint chan_index; 1815 1816 /* 1817 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1818 */ 1819 1820 switch (inst->Texture.Texture) { 1821 case TGSI_TEXTURE_1D: 1822 case TGSI_TEXTURE_SHADOW1D: 1823 1824 FETCH(&r[0], 0, CHAN_X); 1825 1826 fetch_texel(mach->Samplers[unit], 1827 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ 1828 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1829 break; 1830 1831 case TGSI_TEXTURE_2D: 1832 case TGSI_TEXTURE_RECT: 1833 case TGSI_TEXTURE_SHADOW2D: 1834 case TGSI_TEXTURE_SHADOWRECT: 1835 1836 FETCH(&r[0], 0, CHAN_X); 1837 FETCH(&r[1], 0, CHAN_Y); 1838 FETCH(&r[2], 0, CHAN_Z); 1839 1840 fetch_texel(mach->Samplers[unit], 1841 &r[0], &r[1], &r[2], 0.0f, /* inputs */ 1842 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1843 break; 1844 1845 case TGSI_TEXTURE_3D: 1846 case TGSI_TEXTURE_CUBE: 1847 1848 FETCH(&r[0], 0, CHAN_X); 1849 FETCH(&r[1], 0, CHAN_Y); 1850 FETCH(&r[2], 0, CHAN_Z); 1851 1852 fetch_texel(mach->Samplers[unit], 1853 &r[0], &r[1], &r[2], 0.0f, 1854 &r[0], &r[1], &r[2], &r[3]); 1855 break; 1856 1857 default: 1858 assert(0); 1859 } 1860 1861 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1862 STORE(&r[chan_index], 0, chan_index); 1863 } 1864} 1865 1866 1867/** 1868 * Evaluate a constant-valued coefficient at the position of the 1869 * current quad. 1870 */ 1871static void 1872eval_constant_coef( 1873 struct tgsi_exec_machine *mach, 1874 unsigned attrib, 1875 unsigned chan ) 1876{ 1877 unsigned i; 1878 1879 for( i = 0; i < QUAD_SIZE; i++ ) { 1880 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1881 } 1882} 1883 1884/** 1885 * Evaluate a linear-valued coefficient at the position of the 1886 * current quad. 1887 */ 1888static void 1889eval_linear_coef( 1890 struct tgsi_exec_machine *mach, 1891 unsigned attrib, 1892 unsigned chan ) 1893{ 1894 const float x = mach->QuadPos.xyzw[0].f[0]; 1895 const float y = mach->QuadPos.xyzw[1].f[0]; 1896 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1897 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1898 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1899 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1900 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1901 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1902 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1903} 1904 1905/** 1906 * Evaluate a perspective-valued coefficient at the position of the 1907 * current quad. 1908 */ 1909static void 1910eval_perspective_coef( 1911 struct tgsi_exec_machine *mach, 1912 unsigned attrib, 1913 unsigned chan ) 1914{ 1915 const float x = mach->QuadPos.xyzw[0].f[0]; 1916 const float y = mach->QuadPos.xyzw[1].f[0]; 1917 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1918 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1919 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1920 const float *w = mach->QuadPos.xyzw[3].f; 1921 /* divide by W here */ 1922 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1923 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1924 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1925 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1926} 1927 1928 1929typedef void (* eval_coef_func)( 1930 struct tgsi_exec_machine *mach, 1931 unsigned attrib, 1932 unsigned chan ); 1933 1934static void 1935exec_declaration(struct tgsi_exec_machine *mach, 1936 const struct tgsi_full_declaration *decl) 1937{ 1938 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 1939 if (decl->Declaration.File == TGSI_FILE_INPUT || 1940 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1941 uint first, last, mask; 1942 1943 first = decl->Range.First; 1944 last = decl->Range.Last; 1945 mask = decl->Declaration.UsageMask; 1946 1947 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { 1948 assert(decl->Semantic.Index == 0); 1949 assert(first == last); 1950 assert(mask = TGSI_WRITEMASK_XYZW); 1951 1952 mach->Inputs[first] = mach->QuadPos; 1953 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 1954 uint i; 1955 1956 assert(decl->Semantic.Index == 0); 1957 assert(first == last); 1958 1959 for (i = 0; i < QUAD_SIZE; i++) { 1960 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 1961 } 1962 } else { 1963 eval_coef_func eval; 1964 uint i, j; 1965 1966 switch (decl->Declaration.Interpolate) { 1967 case TGSI_INTERPOLATE_CONSTANT: 1968 eval = eval_constant_coef; 1969 break; 1970 1971 case TGSI_INTERPOLATE_LINEAR: 1972 eval = eval_linear_coef; 1973 break; 1974 1975 case TGSI_INTERPOLATE_PERSPECTIVE: 1976 eval = eval_perspective_coef; 1977 break; 1978 1979 default: 1980 assert(0); 1981 return; 1982 } 1983 1984 for (j = 0; j < NUM_CHANNELS; j++) { 1985 if (mask & (1 << j)) { 1986 for (i = first; i <= last; i++) { 1987 eval(mach, i, j); 1988 } 1989 } 1990 } 1991 } 1992 } 1993 } 1994} 1995 1996static void 1997exec_instruction( 1998 struct tgsi_exec_machine *mach, 1999 const struct tgsi_full_instruction *inst, 2000 int *pc ) 2001{ 2002 uint chan_index; 2003 union tgsi_exec_channel r[10]; 2004 union tgsi_exec_channel d[8]; 2005 2006 (*pc)++; 2007 2008 switch (inst->Instruction.Opcode) { 2009 case TGSI_OPCODE_ARL: 2010 case TGSI_OPCODE_FLR: 2011 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2012 FETCH( &r[0], 0, chan_index ); 2013 micro_flr(&d[chan_index], &r[0]); 2014 } 2015 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2016 STORE(&d[chan_index], 0, chan_index); 2017 } 2018 break; 2019 2020 case TGSI_OPCODE_MOV: 2021 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2022 FETCH(&d[chan_index], 0, chan_index); 2023 } 2024 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2025 STORE(&d[chan_index], 0, chan_index); 2026 } 2027 break; 2028 2029 case TGSI_OPCODE_LIT: 2030 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2031 FETCH( &r[0], 0, CHAN_X ); 2032 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2033 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2034 } 2035 2036 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2037 FETCH( &r[1], 0, CHAN_Y ); 2038 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2039 2040 FETCH( &r[2], 0, CHAN_W ); 2041 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2042 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2043 micro_pow( &r[1], &r[1], &r[2] ); 2044 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2045 } 2046 2047 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2048 STORE(&d[CHAN_Y], 0, CHAN_Y); 2049 } 2050 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2051 STORE(&d[CHAN_Z], 0, CHAN_Z); 2052 } 2053 } 2054 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2055 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2056 } 2057 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2058 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2059 } 2060 break; 2061 2062 case TGSI_OPCODE_RCP: 2063 /* TGSI_OPCODE_RECIP */ 2064 FETCH( &r[0], 0, CHAN_X ); 2065 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2066 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2067 STORE( &r[0], 0, chan_index ); 2068 } 2069 break; 2070 2071 case TGSI_OPCODE_RSQ: 2072 /* TGSI_OPCODE_RECIPSQRT */ 2073 FETCH( &r[0], 0, CHAN_X ); 2074 micro_abs( &r[0], &r[0] ); 2075 micro_sqrt( &r[0], &r[0] ); 2076 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2077 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2078 STORE( &r[0], 0, chan_index ); 2079 } 2080 break; 2081 2082 case TGSI_OPCODE_EXP: 2083 FETCH( &r[0], 0, CHAN_X ); 2084 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2085 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2086 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2087 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2088 } 2089 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2090 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2091 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2092 } 2093 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2094 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2095 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2096 } 2097 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2098 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2099 } 2100 break; 2101 2102 case TGSI_OPCODE_LOG: 2103 FETCH( &r[0], 0, CHAN_X ); 2104 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2105 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2106 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2107 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2108 STORE( &r[0], 0, CHAN_X ); 2109 } 2110 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2111 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2112 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2113 STORE( &r[0], 0, CHAN_Y ); 2114 } 2115 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2116 STORE( &r[1], 0, CHAN_Z ); 2117 } 2118 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2119 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2120 } 2121 break; 2122 2123 case TGSI_OPCODE_MUL: 2124 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2125 FETCH(&r[0], 0, chan_index); 2126 FETCH(&r[1], 1, chan_index); 2127 micro_mul(&d[chan_index], &r[0], &r[1]); 2128 } 2129 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2130 STORE(&d[chan_index], 0, chan_index); 2131 } 2132 break; 2133 2134 case TGSI_OPCODE_ADD: 2135 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2136 FETCH( &r[0], 0, chan_index ); 2137 FETCH( &r[1], 1, chan_index ); 2138 micro_add(&d[chan_index], &r[0], &r[1]); 2139 } 2140 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2141 STORE(&d[chan_index], 0, chan_index); 2142 } 2143 break; 2144 2145 case TGSI_OPCODE_DP3: 2146 /* TGSI_OPCODE_DOT3 */ 2147 FETCH( &r[0], 0, CHAN_X ); 2148 FETCH( &r[1], 1, CHAN_X ); 2149 micro_mul( &r[0], &r[0], &r[1] ); 2150 2151 FETCH( &r[1], 0, CHAN_Y ); 2152 FETCH( &r[2], 1, CHAN_Y ); 2153 micro_mul( &r[1], &r[1], &r[2] ); 2154 micro_add( &r[0], &r[0], &r[1] ); 2155 2156 FETCH( &r[1], 0, CHAN_Z ); 2157 FETCH( &r[2], 1, CHAN_Z ); 2158 micro_mul( &r[1], &r[1], &r[2] ); 2159 micro_add( &r[0], &r[0], &r[1] ); 2160 2161 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2162 STORE( &r[0], 0, chan_index ); 2163 } 2164 break; 2165 2166 case TGSI_OPCODE_DP4: 2167 /* TGSI_OPCODE_DOT4 */ 2168 FETCH(&r[0], 0, CHAN_X); 2169 FETCH(&r[1], 1, CHAN_X); 2170 2171 micro_mul( &r[0], &r[0], &r[1] ); 2172 2173 FETCH(&r[1], 0, CHAN_Y); 2174 FETCH(&r[2], 1, CHAN_Y); 2175 2176 micro_mul( &r[1], &r[1], &r[2] ); 2177 micro_add( &r[0], &r[0], &r[1] ); 2178 2179 FETCH(&r[1], 0, CHAN_Z); 2180 FETCH(&r[2], 1, CHAN_Z); 2181 2182 micro_mul( &r[1], &r[1], &r[2] ); 2183 micro_add( &r[0], &r[0], &r[1] ); 2184 2185 FETCH(&r[1], 0, CHAN_W); 2186 FETCH(&r[2], 1, CHAN_W); 2187 2188 micro_mul( &r[1], &r[1], &r[2] ); 2189 micro_add( &r[0], &r[0], &r[1] ); 2190 2191 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2192 STORE( &r[0], 0, chan_index ); 2193 } 2194 break; 2195 2196 case TGSI_OPCODE_DST: 2197 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2198 FETCH( &r[0], 0, CHAN_Y ); 2199 FETCH( &r[1], 1, CHAN_Y); 2200 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2201 } 2202 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2203 FETCH(&d[CHAN_Z], 0, CHAN_Z); 2204 } 2205 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2206 FETCH(&d[CHAN_W], 1, CHAN_W); 2207 } 2208 2209 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2210 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); 2211 } 2212 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2213 STORE(&d[CHAN_Y], 0, CHAN_Y); 2214 } 2215 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2216 STORE(&d[CHAN_Z], 0, CHAN_Z); 2217 } 2218 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2219 STORE(&d[CHAN_W], 0, CHAN_W); 2220 } 2221 break; 2222 2223 case TGSI_OPCODE_MIN: 2224 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2225 FETCH(&r[0], 0, chan_index); 2226 FETCH(&r[1], 1, chan_index); 2227 2228 /* XXX use micro_min()?? */ 2229 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); 2230 } 2231 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2232 STORE(&d[chan_index], 0, chan_index); 2233 } 2234 break; 2235 2236 case TGSI_OPCODE_MAX: 2237 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2238 FETCH(&r[0], 0, chan_index); 2239 FETCH(&r[1], 1, chan_index); 2240 2241 /* XXX use micro_max()?? */ 2242 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); 2243 } 2244 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2245 STORE(&d[chan_index], 0, chan_index); 2246 } 2247 break; 2248 2249 case TGSI_OPCODE_SLT: 2250 /* TGSI_OPCODE_SETLT */ 2251 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2252 FETCH( &r[0], 0, chan_index ); 2253 FETCH( &r[1], 1, chan_index ); 2254 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2255 } 2256 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2257 STORE(&d[chan_index], 0, chan_index); 2258 } 2259 break; 2260 2261 case TGSI_OPCODE_SGE: 2262 /* TGSI_OPCODE_SETGE */ 2263 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2264 FETCH( &r[0], 0, chan_index ); 2265 FETCH( &r[1], 1, chan_index ); 2266 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2267 } 2268 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2269 STORE(&d[chan_index], 0, chan_index); 2270 } 2271 break; 2272 2273 case TGSI_OPCODE_MAD: 2274 /* TGSI_OPCODE_MADD */ 2275 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2276 FETCH( &r[0], 0, chan_index ); 2277 FETCH( &r[1], 1, chan_index ); 2278 micro_mul( &r[0], &r[0], &r[1] ); 2279 FETCH( &r[1], 2, chan_index ); 2280 micro_add(&d[chan_index], &r[0], &r[1]); 2281 } 2282 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2283 STORE(&d[chan_index], 0, chan_index); 2284 } 2285 break; 2286 2287 case TGSI_OPCODE_SUB: 2288 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2289 FETCH(&r[0], 0, chan_index); 2290 FETCH(&r[1], 1, chan_index); 2291 micro_sub(&d[chan_index], &r[0], &r[1]); 2292 } 2293 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2294 STORE(&d[chan_index], 0, chan_index); 2295 } 2296 break; 2297 2298 case TGSI_OPCODE_LRP: 2299 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2300 FETCH(&r[0], 0, chan_index); 2301 FETCH(&r[1], 1, chan_index); 2302 FETCH(&r[2], 2, chan_index); 2303 micro_sub( &r[1], &r[1], &r[2] ); 2304 micro_mul( &r[0], &r[0], &r[1] ); 2305 micro_add(&d[chan_index], &r[0], &r[2]); 2306 } 2307 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2308 STORE(&d[chan_index], 0, chan_index); 2309 } 2310 break; 2311 2312 case TGSI_OPCODE_CND: 2313 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2314 FETCH(&r[0], 0, chan_index); 2315 FETCH(&r[1], 1, chan_index); 2316 FETCH(&r[2], 2, chan_index); 2317 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2318 } 2319 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2320 STORE(&d[chan_index], 0, chan_index); 2321 } 2322 break; 2323 2324 case TGSI_OPCODE_DP2A: 2325 FETCH( &r[0], 0, CHAN_X ); 2326 FETCH( &r[1], 1, CHAN_X ); 2327 micro_mul( &r[0], &r[0], &r[1] ); 2328 2329 FETCH( &r[1], 0, CHAN_Y ); 2330 FETCH( &r[2], 1, CHAN_Y ); 2331 micro_mul( &r[1], &r[1], &r[2] ); 2332 micro_add( &r[0], &r[0], &r[1] ); 2333 2334 FETCH( &r[2], 2, CHAN_X ); 2335 micro_add( &r[0], &r[0], &r[2] ); 2336 2337 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2338 STORE( &r[0], 0, chan_index ); 2339 } 2340 break; 2341 2342 case TGSI_OPCODE_FRC: 2343 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2344 FETCH( &r[0], 0, chan_index ); 2345 micro_frc(&d[chan_index], &r[0]); 2346 } 2347 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2348 STORE(&d[chan_index], 0, chan_index); 2349 } 2350 break; 2351 2352 case TGSI_OPCODE_CLAMP: 2353 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2354 FETCH(&r[0], 0, chan_index); 2355 FETCH(&r[1], 1, chan_index); 2356 micro_max(&r[0], &r[0], &r[1]); 2357 FETCH(&r[1], 2, chan_index); 2358 micro_min(&d[chan_index], &r[0], &r[1]); 2359 } 2360 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2361 STORE(&d[chan_index], 0, chan_index); 2362 } 2363 break; 2364 2365 case TGSI_OPCODE_ROUND: 2366 case TGSI_OPCODE_ARR: 2367 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2368 FETCH( &r[0], 0, chan_index ); 2369 micro_rnd(&d[chan_index], &r[0]); 2370 } 2371 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2372 STORE(&d[chan_index], 0, chan_index); 2373 } 2374 break; 2375 2376 case TGSI_OPCODE_EX2: 2377 FETCH(&r[0], 0, CHAN_X); 2378 2379 micro_exp2( &r[0], &r[0] ); 2380 2381 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2382 STORE( &r[0], 0, chan_index ); 2383 } 2384 break; 2385 2386 case TGSI_OPCODE_LG2: 2387 FETCH( &r[0], 0, CHAN_X ); 2388 micro_lg2( &r[0], &r[0] ); 2389 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2390 STORE( &r[0], 0, chan_index ); 2391 } 2392 break; 2393 2394 case TGSI_OPCODE_POW: 2395 FETCH(&r[0], 0, CHAN_X); 2396 FETCH(&r[1], 1, CHAN_X); 2397 2398 micro_pow( &r[0], &r[0], &r[1] ); 2399 2400 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2401 STORE( &r[0], 0, chan_index ); 2402 } 2403 break; 2404 2405 case TGSI_OPCODE_XPD: 2406 FETCH(&r[0], 0, CHAN_Y); 2407 FETCH(&r[1], 1, CHAN_Z); 2408 2409 micro_mul( &r[2], &r[0], &r[1] ); 2410 2411 FETCH(&r[3], 0, CHAN_Z); 2412 FETCH(&r[4], 1, CHAN_Y); 2413 2414 micro_mul( &r[5], &r[3], &r[4] ); 2415 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2416 2417 FETCH(&r[2], 1, CHAN_X); 2418 2419 micro_mul( &r[3], &r[3], &r[2] ); 2420 2421 FETCH(&r[5], 0, CHAN_X); 2422 2423 micro_mul( &r[1], &r[1], &r[5] ); 2424 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2425 2426 micro_mul( &r[5], &r[5], &r[4] ); 2427 micro_mul( &r[0], &r[0], &r[2] ); 2428 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2429 2430 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2431 STORE(&d[CHAN_X], 0, CHAN_X); 2432 } 2433 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2434 STORE(&d[CHAN_Y], 0, CHAN_Y); 2435 } 2436 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2437 STORE(&d[CHAN_Z], 0, CHAN_Z); 2438 } 2439 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2440 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2441 } 2442 break; 2443 2444 case TGSI_OPCODE_ABS: 2445 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2446 FETCH(&r[0], 0, chan_index); 2447 micro_abs(&d[chan_index], &r[0]); 2448 } 2449 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2450 STORE(&d[chan_index], 0, chan_index); 2451 } 2452 break; 2453 2454 case TGSI_OPCODE_RCC: 2455 FETCH(&r[0], 0, CHAN_X); 2456 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2457 micro_float_clamp(&r[0], &r[0]); 2458 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2459 STORE(&r[0], 0, chan_index); 2460 } 2461 break; 2462 2463 case TGSI_OPCODE_DPH: 2464 FETCH(&r[0], 0, CHAN_X); 2465 FETCH(&r[1], 1, CHAN_X); 2466 2467 micro_mul( &r[0], &r[0], &r[1] ); 2468 2469 FETCH(&r[1], 0, CHAN_Y); 2470 FETCH(&r[2], 1, CHAN_Y); 2471 2472 micro_mul( &r[1], &r[1], &r[2] ); 2473 micro_add( &r[0], &r[0], &r[1] ); 2474 2475 FETCH(&r[1], 0, CHAN_Z); 2476 FETCH(&r[2], 1, CHAN_Z); 2477 2478 micro_mul( &r[1], &r[1], &r[2] ); 2479 micro_add( &r[0], &r[0], &r[1] ); 2480 2481 FETCH(&r[1], 1, CHAN_W); 2482 2483 micro_add( &r[0], &r[0], &r[1] ); 2484 2485 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2486 STORE( &r[0], 0, chan_index ); 2487 } 2488 break; 2489 2490 case TGSI_OPCODE_COS: 2491 FETCH(&r[0], 0, CHAN_X); 2492 2493 micro_cos( &r[0], &r[0] ); 2494 2495 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2496 STORE( &r[0], 0, chan_index ); 2497 } 2498 break; 2499 2500 case TGSI_OPCODE_DDX: 2501 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2502 FETCH( &r[0], 0, chan_index ); 2503 micro_ddx(&d[chan_index], &r[0]); 2504 } 2505 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2506 STORE(&d[chan_index], 0, chan_index); 2507 } 2508 break; 2509 2510 case TGSI_OPCODE_DDY: 2511 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2512 FETCH( &r[0], 0, chan_index ); 2513 micro_ddy(&d[chan_index], &r[0]); 2514 } 2515 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2516 STORE(&d[chan_index], 0, chan_index); 2517 } 2518 break; 2519 2520 case TGSI_OPCODE_KILP: 2521 exec_kilp (mach, inst); 2522 break; 2523 2524 case TGSI_OPCODE_KIL: 2525 exec_kil (mach, inst); 2526 break; 2527 2528 case TGSI_OPCODE_PK2H: 2529 assert (0); 2530 break; 2531 2532 case TGSI_OPCODE_PK2US: 2533 assert (0); 2534 break; 2535 2536 case TGSI_OPCODE_PK4B: 2537 assert (0); 2538 break; 2539 2540 case TGSI_OPCODE_PK4UB: 2541 assert (0); 2542 break; 2543 2544 case TGSI_OPCODE_RFL: 2545 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2546 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2547 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2548 /* r0 = dp3(src0, src0) */ 2549 FETCH(&r[2], 0, CHAN_X); 2550 micro_mul(&r[0], &r[2], &r[2]); 2551 FETCH(&r[4], 0, CHAN_Y); 2552 micro_mul(&r[8], &r[4], &r[4]); 2553 micro_add(&r[0], &r[0], &r[8]); 2554 FETCH(&r[6], 0, CHAN_Z); 2555 micro_mul(&r[8], &r[6], &r[6]); 2556 micro_add(&r[0], &r[0], &r[8]); 2557 2558 /* r1 = dp3(src0, src1) */ 2559 FETCH(&r[3], 1, CHAN_X); 2560 micro_mul(&r[1], &r[2], &r[3]); 2561 FETCH(&r[5], 1, CHAN_Y); 2562 micro_mul(&r[8], &r[4], &r[5]); 2563 micro_add(&r[1], &r[1], &r[8]); 2564 FETCH(&r[7], 1, CHAN_Z); 2565 micro_mul(&r[8], &r[6], &r[7]); 2566 micro_add(&r[1], &r[1], &r[8]); 2567 2568 /* r1 = 2 * r1 / r0 */ 2569 micro_add(&r[1], &r[1], &r[1]); 2570 micro_div(&r[1], &r[1], &r[0]); 2571 2572 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2573 micro_mul(&r[2], &r[2], &r[1]); 2574 micro_sub(&r[2], &r[2], &r[3]); 2575 STORE(&r[2], 0, CHAN_X); 2576 } 2577 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2578 micro_mul(&r[4], &r[4], &r[1]); 2579 micro_sub(&r[4], &r[4], &r[5]); 2580 STORE(&r[4], 0, CHAN_Y); 2581 } 2582 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2583 micro_mul(&r[6], &r[6], &r[1]); 2584 micro_sub(&r[6], &r[6], &r[7]); 2585 STORE(&r[6], 0, CHAN_Z); 2586 } 2587 } 2588 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2589 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2590 } 2591 break; 2592 2593 case TGSI_OPCODE_SEQ: 2594 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2595 FETCH( &r[0], 0, chan_index ); 2596 FETCH( &r[1], 1, chan_index ); 2597 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2598 } 2599 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2600 STORE(&d[chan_index], 0, chan_index); 2601 } 2602 break; 2603 2604 case TGSI_OPCODE_SFL: 2605 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2606 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2607 } 2608 break; 2609 2610 case TGSI_OPCODE_SGT: 2611 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2612 FETCH( &r[0], 0, chan_index ); 2613 FETCH( &r[1], 1, chan_index ); 2614 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2615 } 2616 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2617 STORE(&d[chan_index], 0, chan_index); 2618 } 2619 break; 2620 2621 case TGSI_OPCODE_SIN: 2622 FETCH( &r[0], 0, CHAN_X ); 2623 micro_sin( &r[0], &r[0] ); 2624 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2625 STORE( &r[0], 0, chan_index ); 2626 } 2627 break; 2628 2629 case TGSI_OPCODE_SLE: 2630 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2631 FETCH( &r[0], 0, chan_index ); 2632 FETCH( &r[1], 1, chan_index ); 2633 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2634 } 2635 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2636 STORE(&d[chan_index], 0, chan_index); 2637 } 2638 break; 2639 2640 case TGSI_OPCODE_SNE: 2641 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2642 FETCH( &r[0], 0, chan_index ); 2643 FETCH( &r[1], 1, chan_index ); 2644 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2645 } 2646 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2647 STORE(&d[chan_index], 0, chan_index); 2648 } 2649 break; 2650 2651 case TGSI_OPCODE_STR: 2652 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2653 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2654 } 2655 break; 2656 2657 case TGSI_OPCODE_TEX: 2658 /* simple texture lookup */ 2659 /* src[0] = texcoord */ 2660 /* src[1] = sampler unit */ 2661 exec_tex(mach, inst, FALSE, FALSE); 2662 break; 2663 2664 case TGSI_OPCODE_TXB: 2665 /* Texture lookup with lod bias */ 2666 /* src[0] = texcoord (src[0].w = LOD bias) */ 2667 /* src[1] = sampler unit */ 2668 exec_tex(mach, inst, TRUE, FALSE); 2669 break; 2670 2671 case TGSI_OPCODE_TXD: 2672 /* Texture lookup with explict partial derivatives */ 2673 /* src[0] = texcoord */ 2674 /* src[1] = d[strq]/dx */ 2675 /* src[2] = d[strq]/dy */ 2676 /* src[3] = sampler unit */ 2677 exec_txd(mach, inst); 2678 break; 2679 2680 case TGSI_OPCODE_TXL: 2681 /* Texture lookup with explit LOD */ 2682 /* src[0] = texcoord (src[0].w = LOD) */ 2683 /* src[1] = sampler unit */ 2684 exec_tex(mach, inst, TRUE, FALSE); 2685 break; 2686 2687 case TGSI_OPCODE_TXP: 2688 /* Texture lookup with projection */ 2689 /* src[0] = texcoord (src[0].w = projection) */ 2690 /* src[1] = sampler unit */ 2691 exec_tex(mach, inst, FALSE, TRUE); 2692 break; 2693 2694 case TGSI_OPCODE_UP2H: 2695 assert (0); 2696 break; 2697 2698 case TGSI_OPCODE_UP2US: 2699 assert (0); 2700 break; 2701 2702 case TGSI_OPCODE_UP4B: 2703 assert (0); 2704 break; 2705 2706 case TGSI_OPCODE_UP4UB: 2707 assert (0); 2708 break; 2709 2710 case TGSI_OPCODE_X2D: 2711 FETCH(&r[0], 1, CHAN_X); 2712 FETCH(&r[1], 1, CHAN_Y); 2713 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2714 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2715 FETCH(&r[2], 2, CHAN_X); 2716 micro_mul(&r[2], &r[2], &r[0]); 2717 FETCH(&r[3], 2, CHAN_Y); 2718 micro_mul(&r[3], &r[3], &r[1]); 2719 micro_add(&r[2], &r[2], &r[3]); 2720 FETCH(&r[3], 0, CHAN_X); 2721 micro_add(&d[CHAN_X], &r[2], &r[3]); 2722 2723 } 2724 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2725 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2726 FETCH(&r[2], 2, CHAN_Z); 2727 micro_mul(&r[2], &r[2], &r[0]); 2728 FETCH(&r[3], 2, CHAN_W); 2729 micro_mul(&r[3], &r[3], &r[1]); 2730 micro_add(&r[2], &r[2], &r[3]); 2731 FETCH(&r[3], 0, CHAN_Y); 2732 micro_add(&d[CHAN_Y], &r[2], &r[3]); 2733 2734 } 2735 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2736 STORE(&d[CHAN_X], 0, CHAN_X); 2737 } 2738 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2739 STORE(&d[CHAN_Y], 0, CHAN_Y); 2740 } 2741 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2742 STORE(&d[CHAN_X], 0, CHAN_Z); 2743 } 2744 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2745 STORE(&d[CHAN_Y], 0, CHAN_W); 2746 } 2747 break; 2748 2749 case TGSI_OPCODE_ARA: 2750 assert (0); 2751 break; 2752 2753 case TGSI_OPCODE_BRA: 2754 assert (0); 2755 break; 2756 2757 case TGSI_OPCODE_CAL: 2758 /* skip the call if no execution channels are enabled */ 2759 if (mach->ExecMask) { 2760 /* do the call */ 2761 2762 /* First, record the depths of the execution stacks. 2763 * This is important for deeply nested/looped return statements. 2764 * We have to unwind the stacks by the correct amount. For a 2765 * real code generator, we could determine the number of entries 2766 * to pop off each stack with simple static analysis and avoid 2767 * implementing this data structure at run time. 2768 */ 2769 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 2770 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 2771 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 2772 /* note that PC was already incremented above */ 2773 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 2774 2775 mach->CallStackTop++; 2776 2777 /* Second, push the Cond, Loop, Cont, Func stacks */ 2778 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2779 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2780 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2781 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2782 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2783 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2784 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2785 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2786 2787 /* Finally, jump to the subroutine */ 2788 *pc = inst->Label.Label; 2789 } 2790 break; 2791 2792 case TGSI_OPCODE_RET: 2793 mach->FuncMask &= ~mach->ExecMask; 2794 UPDATE_EXEC_MASK(mach); 2795 2796 if (mach->FuncMask == 0x0) { 2797 /* really return now (otherwise, keep executing */ 2798 2799 if (mach->CallStackTop == 0) { 2800 /* returning from main() */ 2801 *pc = -1; 2802 return; 2803 } 2804 2805 assert(mach->CallStackTop > 0); 2806 mach->CallStackTop--; 2807 2808 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 2809 mach->CondMask = mach->CondStack[mach->CondStackTop]; 2810 2811 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 2812 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 2813 2814 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 2815 mach->ContMask = mach->ContStack[mach->ContStackTop]; 2816 2817 assert(mach->FuncStackTop > 0); 2818 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2819 2820 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 2821 2822 UPDATE_EXEC_MASK(mach); 2823 } 2824 break; 2825 2826 case TGSI_OPCODE_SSG: 2827 /* TGSI_OPCODE_SGN */ 2828 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2829 FETCH( &r[0], 0, chan_index ); 2830 micro_sgn(&d[chan_index], &r[0]); 2831 } 2832 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2833 STORE(&d[chan_index], 0, chan_index); 2834 } 2835 break; 2836 2837 case TGSI_OPCODE_CMP: 2838 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2839 FETCH(&r[0], 0, chan_index); 2840 FETCH(&r[1], 1, chan_index); 2841 FETCH(&r[2], 2, chan_index); 2842 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); 2843 } 2844 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2845 STORE(&d[chan_index], 0, chan_index); 2846 } 2847 break; 2848 2849 case TGSI_OPCODE_SCS: 2850 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2851 FETCH( &r[0], 0, CHAN_X ); 2852 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2853 micro_cos(&r[1], &r[0]); 2854 STORE(&r[1], 0, CHAN_X); 2855 } 2856 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2857 micro_sin(&r[1], &r[0]); 2858 STORE(&r[1], 0, CHAN_Y); 2859 } 2860 } 2861 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2862 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2863 } 2864 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2865 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2866 } 2867 break; 2868 2869 case TGSI_OPCODE_NRM: 2870 /* 3-component vector normalize */ 2871 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2872 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2873 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2874 /* r3 = sqrt(dp3(src0, src0)) */ 2875 FETCH(&r[0], 0, CHAN_X); 2876 micro_mul(&r[3], &r[0], &r[0]); 2877 FETCH(&r[1], 0, CHAN_Y); 2878 micro_mul(&r[4], &r[1], &r[1]); 2879 micro_add(&r[3], &r[3], &r[4]); 2880 FETCH(&r[2], 0, CHAN_Z); 2881 micro_mul(&r[4], &r[2], &r[2]); 2882 micro_add(&r[3], &r[3], &r[4]); 2883 micro_sqrt(&r[3], &r[3]); 2884 2885 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2886 micro_div(&r[0], &r[0], &r[3]); 2887 STORE(&r[0], 0, CHAN_X); 2888 } 2889 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2890 micro_div(&r[1], &r[1], &r[3]); 2891 STORE(&r[1], 0, CHAN_Y); 2892 } 2893 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2894 micro_div(&r[2], &r[2], &r[3]); 2895 STORE(&r[2], 0, CHAN_Z); 2896 } 2897 } 2898 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2899 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2900 } 2901 break; 2902 2903 case TGSI_OPCODE_NRM4: 2904 /* 4-component vector normalize */ 2905 { 2906 union tgsi_exec_channel tmp, dot; 2907 2908 /* tmp = dp4(src0, src0): */ 2909 FETCH( &r[0], 0, CHAN_X ); 2910 micro_mul( &tmp, &r[0], &r[0] ); 2911 2912 FETCH( &r[1], 0, CHAN_Y ); 2913 micro_mul( &dot, &r[1], &r[1] ); 2914 micro_add( &tmp, &tmp, &dot ); 2915 2916 FETCH( &r[2], 0, CHAN_Z ); 2917 micro_mul( &dot, &r[2], &r[2] ); 2918 micro_add( &tmp, &tmp, &dot ); 2919 2920 FETCH( &r[3], 0, CHAN_W ); 2921 micro_mul( &dot, &r[3], &r[3] ); 2922 micro_add( &tmp, &tmp, &dot ); 2923 2924 /* tmp = 1 / sqrt(tmp) */ 2925 micro_sqrt( &tmp, &tmp ); 2926 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2927 2928 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2929 /* chan = chan * tmp */ 2930 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2931 STORE( &r[chan_index], 0, chan_index ); 2932 } 2933 } 2934 break; 2935 2936 case TGSI_OPCODE_DIV: 2937 assert( 0 ); 2938 break; 2939 2940 case TGSI_OPCODE_DP2: 2941 FETCH( &r[0], 0, CHAN_X ); 2942 FETCH( &r[1], 1, CHAN_X ); 2943 micro_mul( &r[0], &r[0], &r[1] ); 2944 2945 FETCH( &r[1], 0, CHAN_Y ); 2946 FETCH( &r[2], 1, CHAN_Y ); 2947 micro_mul( &r[1], &r[1], &r[2] ); 2948 micro_add( &r[0], &r[0], &r[1] ); 2949 2950 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2951 STORE( &r[0], 0, chan_index ); 2952 } 2953 break; 2954 2955 case TGSI_OPCODE_IF: 2956 /* push CondMask */ 2957 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2958 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2959 FETCH( &r[0], 0, CHAN_X ); 2960 /* update CondMask */ 2961 if( ! r[0].u[0] ) { 2962 mach->CondMask &= ~0x1; 2963 } 2964 if( ! r[0].u[1] ) { 2965 mach->CondMask &= ~0x2; 2966 } 2967 if( ! r[0].u[2] ) { 2968 mach->CondMask &= ~0x4; 2969 } 2970 if( ! r[0].u[3] ) { 2971 mach->CondMask &= ~0x8; 2972 } 2973 UPDATE_EXEC_MASK(mach); 2974 /* Todo: If CondMask==0, jump to ELSE */ 2975 break; 2976 2977 case TGSI_OPCODE_ELSE: 2978 /* invert CondMask wrt previous mask */ 2979 { 2980 uint prevMask; 2981 assert(mach->CondStackTop > 0); 2982 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2983 mach->CondMask = ~mach->CondMask & prevMask; 2984 UPDATE_EXEC_MASK(mach); 2985 /* Todo: If CondMask==0, jump to ENDIF */ 2986 } 2987 break; 2988 2989 case TGSI_OPCODE_ENDIF: 2990 /* pop CondMask */ 2991 assert(mach->CondStackTop > 0); 2992 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2993 UPDATE_EXEC_MASK(mach); 2994 break; 2995 2996 case TGSI_OPCODE_END: 2997 /* halt execution */ 2998 *pc = -1; 2999 break; 3000 3001 case TGSI_OPCODE_REP: 3002 assert (0); 3003 break; 3004 3005 case TGSI_OPCODE_ENDREP: 3006 assert (0); 3007 break; 3008 3009 case TGSI_OPCODE_PUSHA: 3010 assert (0); 3011 break; 3012 3013 case TGSI_OPCODE_POPA: 3014 assert (0); 3015 break; 3016 3017 case TGSI_OPCODE_CEIL: 3018 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3019 FETCH( &r[0], 0, chan_index ); 3020 micro_ceil(&d[chan_index], &r[0]); 3021 } 3022 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3023 STORE(&d[chan_index], 0, chan_index); 3024 } 3025 break; 3026 3027 case TGSI_OPCODE_I2F: 3028 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3029 FETCH( &r[0], 0, chan_index ); 3030 micro_i2f(&d[chan_index], &r[0]); 3031 } 3032 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3033 STORE(&d[chan_index], 0, chan_index); 3034 } 3035 break; 3036 3037 case TGSI_OPCODE_NOT: 3038 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3039 FETCH( &r[0], 0, chan_index ); 3040 micro_not(&d[chan_index], &r[0]); 3041 } 3042 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3043 STORE(&d[chan_index], 0, chan_index); 3044 } 3045 break; 3046 3047 case TGSI_OPCODE_TRUNC: 3048 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3049 FETCH( &r[0], 0, chan_index ); 3050 micro_trunc(&d[chan_index], &r[0]); 3051 } 3052 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3053 STORE(&d[chan_index], 0, chan_index); 3054 } 3055 break; 3056 3057 case TGSI_OPCODE_SHL: 3058 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3059 FETCH( &r[0], 0, chan_index ); 3060 FETCH( &r[1], 1, chan_index ); 3061 micro_shl(&d[chan_index], &r[0], &r[1]); 3062 } 3063 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3064 STORE(&d[chan_index], 0, chan_index); 3065 } 3066 break; 3067 3068 case TGSI_OPCODE_SHR: 3069 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3070 FETCH( &r[0], 0, chan_index ); 3071 FETCH( &r[1], 1, chan_index ); 3072 micro_ishr(&d[chan_index], &r[0], &r[1]); 3073 } 3074 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3075 STORE(&d[chan_index], 0, chan_index); 3076 } 3077 break; 3078 3079 case TGSI_OPCODE_AND: 3080 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3081 FETCH( &r[0], 0, chan_index ); 3082 FETCH( &r[1], 1, chan_index ); 3083 micro_and(&d[chan_index], &r[0], &r[1]); 3084 } 3085 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3086 STORE(&d[chan_index], 0, chan_index); 3087 } 3088 break; 3089 3090 case TGSI_OPCODE_OR: 3091 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3092 FETCH( &r[0], 0, chan_index ); 3093 FETCH( &r[1], 1, chan_index ); 3094 micro_or(&d[chan_index], &r[0], &r[1]); 3095 } 3096 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3097 STORE(&d[chan_index], 0, chan_index); 3098 } 3099 break; 3100 3101 case TGSI_OPCODE_MOD: 3102 assert (0); 3103 break; 3104 3105 case TGSI_OPCODE_XOR: 3106 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3107 FETCH( &r[0], 0, chan_index ); 3108 FETCH( &r[1], 1, chan_index ); 3109 micro_xor(&d[chan_index], &r[0], &r[1]); 3110 } 3111 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3112 STORE(&d[chan_index], 0, chan_index); 3113 } 3114 break; 3115 3116 case TGSI_OPCODE_SAD: 3117 assert (0); 3118 break; 3119 3120 case TGSI_OPCODE_TXF: 3121 assert (0); 3122 break; 3123 3124 case TGSI_OPCODE_TXQ: 3125 assert (0); 3126 break; 3127 3128 case TGSI_OPCODE_EMIT: 3129 emit_vertex(mach); 3130 break; 3131 3132 case TGSI_OPCODE_ENDPRIM: 3133 emit_primitive(mach); 3134 break; 3135 3136 case TGSI_OPCODE_BGNFOR: 3137 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3138 for (chan_index = 0; chan_index < 3; chan_index++) { 3139 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3140 } 3141 ++mach->LoopCounterStackTop; 3142 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); 3143 /* update LoopMask */ 3144 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3145 mach->LoopMask &= ~0x1; 3146 } 3147 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3148 mach->LoopMask &= ~0x2; 3149 } 3150 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3151 mach->LoopMask &= ~0x4; 3152 } 3153 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3154 mach->LoopMask &= ~0x8; 3155 } 3156 /* TODO: if mach->LoopMask == 0, jump to end of loop */ 3157 UPDATE_EXEC_MASK(mach); 3158 /* fall-through (for now) */ 3159 case TGSI_OPCODE_BGNLOOP: 3160 /* push LoopMask and ContMasks */ 3161 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3162 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3163 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3164 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3165 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3166 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3167 break; 3168 3169 case TGSI_OPCODE_ENDFOR: 3170 assert(mach->LoopCounterStackTop > 0); 3171 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3172 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3173 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 3174 /* update LoopMask */ 3175 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3176 mach->LoopMask &= ~0x1; 3177 } 3178 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3179 mach->LoopMask &= ~0x2; 3180 } 3181 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3182 mach->LoopMask &= ~0x4; 3183 } 3184 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3185 mach->LoopMask &= ~0x8; 3186 } 3187 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3188 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3189 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3190 assert(mach->LoopLabelStackTop > 0); 3191 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3192 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); 3193 /* Restore ContMask, but don't pop */ 3194 assert(mach->ContStackTop > 0); 3195 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3196 UPDATE_EXEC_MASK(mach); 3197 if (mach->ExecMask) { 3198 /* repeat loop: jump to instruction just past BGNLOOP */ 3199 assert(mach->LoopLabelStackTop > 0); 3200 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3201 } 3202 else { 3203 /* exit loop: pop LoopMask */ 3204 assert(mach->LoopStackTop > 0); 3205 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3206 /* pop ContMask */ 3207 assert(mach->ContStackTop > 0); 3208 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3209 assert(mach->LoopLabelStackTop > 0); 3210 --mach->LoopLabelStackTop; 3211 assert(mach->LoopCounterStackTop > 0); 3212 --mach->LoopCounterStackTop; 3213 } 3214 UPDATE_EXEC_MASK(mach); 3215 break; 3216 3217 case TGSI_OPCODE_ENDLOOP: 3218 /* Restore ContMask, but don't pop */ 3219 assert(mach->ContStackTop > 0); 3220 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3221 UPDATE_EXEC_MASK(mach); 3222 if (mach->ExecMask) { 3223 /* repeat loop: jump to instruction just past BGNLOOP */ 3224 assert(mach->LoopLabelStackTop > 0); 3225 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3226 } 3227 else { 3228 /* exit loop: pop LoopMask */ 3229 assert(mach->LoopStackTop > 0); 3230 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3231 /* pop ContMask */ 3232 assert(mach->ContStackTop > 0); 3233 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3234 assert(mach->LoopLabelStackTop > 0); 3235 --mach->LoopLabelStackTop; 3236 } 3237 UPDATE_EXEC_MASK(mach); 3238 break; 3239 3240 case TGSI_OPCODE_BRK: 3241 /* turn off loop channels for each enabled exec channel */ 3242 mach->LoopMask &= ~mach->ExecMask; 3243 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3244 UPDATE_EXEC_MASK(mach); 3245 break; 3246 3247 case TGSI_OPCODE_CONT: 3248 /* turn off cont channels for each enabled exec channel */ 3249 mach->ContMask &= ~mach->ExecMask; 3250 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3251 UPDATE_EXEC_MASK(mach); 3252 break; 3253 3254 case TGSI_OPCODE_BGNSUB: 3255 /* no-op */ 3256 break; 3257 3258 case TGSI_OPCODE_ENDSUB: 3259 /* 3260 * XXX: This really should be a no-op. We should never reach this opcode. 3261 */ 3262 3263 assert(mach->CallStackTop > 0); 3264 mach->CallStackTop--; 3265 3266 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3267 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3268 3269 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3270 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3271 3272 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3273 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3274 3275 assert(mach->FuncStackTop > 0); 3276 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3277 3278 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3279 3280 UPDATE_EXEC_MASK(mach); 3281 break; 3282 3283 case TGSI_OPCODE_NOP: 3284 break; 3285 3286 case TGSI_OPCODE_BREAKC: 3287 FETCH(&r[0], 0, CHAN_X); 3288 /* update CondMask */ 3289 if (r[0].u[0] && (mach->ExecMask & 0x1)) { 3290 mach->LoopMask &= ~0x1; 3291 } 3292 if (r[0].u[1] && (mach->ExecMask & 0x2)) { 3293 mach->LoopMask &= ~0x2; 3294 } 3295 if (r[0].u[2] && (mach->ExecMask & 0x4)) { 3296 mach->LoopMask &= ~0x4; 3297 } 3298 if (r[0].u[3] && (mach->ExecMask & 0x8)) { 3299 mach->LoopMask &= ~0x8; 3300 } 3301 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3302 UPDATE_EXEC_MASK(mach); 3303 break; 3304 3305 default: 3306 assert( 0 ); 3307 } 3308} 3309 3310#define DEBUG_EXECUTION 0 3311 3312 3313/** 3314 * Run TGSI interpreter. 3315 * \return bitmask of "alive" quad components 3316 */ 3317uint 3318tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3319{ 3320 uint i; 3321 int pc = 0; 3322 3323 mach->CondMask = 0xf; 3324 mach->LoopMask = 0xf; 3325 mach->ContMask = 0xf; 3326 mach->FuncMask = 0xf; 3327 mach->ExecMask = 0xf; 3328 3329 assert(mach->CondStackTop == 0); 3330 assert(mach->LoopStackTop == 0); 3331 assert(mach->ContStackTop == 0); 3332 assert(mach->CallStackTop == 0); 3333 3334 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3335 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3336 3337 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3338 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3339 mach->Primitives[0] = 0; 3340 } 3341 3342 for (i = 0; i < QUAD_SIZE; i++) { 3343 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3344 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3345 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3346 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3347 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3348 } 3349 3350 /* execute declarations (interpolants) */ 3351 for (i = 0; i < mach->NumDeclarations; i++) { 3352 exec_declaration( mach, mach->Declarations+i ); 3353 } 3354 3355 { 3356#if DEBUG_EXECUTION 3357 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 3358 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 3359 uint inst = 1; 3360 3361 memcpy(temps, mach->Temps, sizeof(temps)); 3362 memcpy(outputs, mach->Outputs, sizeof(outputs)); 3363#endif 3364 3365 /* execute instructions, until pc is set to -1 */ 3366 while (pc != -1) { 3367 3368#if DEBUG_EXECUTION 3369 uint i; 3370 3371 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 3372#endif 3373 3374 assert(pc < (int) mach->NumInstructions); 3375 exec_instruction(mach, mach->Instructions + pc, &pc); 3376 3377#if DEBUG_EXECUTION 3378 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 3379 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 3380 uint j; 3381 3382 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 3383 debug_printf("TEMP[%2u] = ", i); 3384 for (j = 0; j < 4; j++) { 3385 if (j > 0) { 3386 debug_printf(" "); 3387 } 3388 debug_printf("(%6f, %6f, %6f, %6f)\n", 3389 temps[i].xyzw[0].f[j], 3390 temps[i].xyzw[1].f[j], 3391 temps[i].xyzw[2].f[j], 3392 temps[i].xyzw[3].f[j]); 3393 } 3394 } 3395 } 3396 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 3397 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 3398 uint j; 3399 3400 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 3401 debug_printf("OUT[%2u] = ", i); 3402 for (j = 0; j < 4; j++) { 3403 if (j > 0) { 3404 debug_printf(" "); 3405 } 3406 debug_printf("{%6f, %6f, %6f, %6f}\n", 3407 outputs[i].xyzw[0].f[j], 3408 outputs[i].xyzw[1].f[j], 3409 outputs[i].xyzw[2].f[j], 3410 outputs[i].xyzw[3].f[j]); 3411 } 3412 } 3413 } 3414#endif 3415 } 3416 } 3417 3418#if 0 3419 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3420 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3421 /* 3422 * Scale back depth component. 3423 */ 3424 for (i = 0; i < 4; i++) 3425 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3426 } 3427#endif 3428 3429 assert(mach->CondStackTop == 0); 3430 assert(mach->LoopStackTop == 0); 3431 assert(mach->ContStackTop == 0); 3432 assert(mach->CallStackTop == 0); 3433 3434 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3435} 3436