tgsi_exec.c revision 3ff688ea299581e60caf5d6e1a464f68c717fe83
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65#define TILE_TOP_LEFT 0 66#define TILE_TOP_RIGHT 1 67#define TILE_BOTTOM_LEFT 2 68#define TILE_BOTTOM_RIGHT 3 69 70#define CHAN_X 0 71#define CHAN_Y 1 72#define CHAN_Z 2 73#define CHAN_W 3 74 75/* 76 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 77 */ 78#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 79#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 80#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 81#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 82#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 83#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 84#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 85#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 86#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 87#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 88#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 89#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 90#define TEMP_128_I TGSI_EXEC_TEMP_128_I 91#define TEMP_128_C TGSI_EXEC_TEMP_128_C 92#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 93#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 94#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 95#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 96#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 97#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 98#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 99#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 100#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 101#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 102#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 103#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 104#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 105#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 106#define TEMP_R0 TGSI_EXEC_TEMP_R0 107#define TEMP_P0 TGSI_EXEC_TEMP_P0 108 109#define IS_CHANNEL_ENABLED(INST, CHAN)\ 110 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 111 112#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 113 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) 114 115#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 117 if (IS_CHANNEL_ENABLED( INST, CHAN )) 118 119#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 121 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 122 123 124/** The execution mask depends on the conditional mask and the loop mask */ 125#define UPDATE_EXEC_MASK(MACH) \ 126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 127 128 129static const union tgsi_exec_channel ZeroVec = 130 { { 0.0, 0.0, 0.0, 0.0 } }; 131 132 133#ifdef DEBUG 134static void 135check_inf_or_nan(const union tgsi_exec_channel *chan) 136{ 137 assert(!util_is_inf_or_nan(chan->f[0])); 138 assert(!util_is_inf_or_nan(chan->f[1])); 139 assert(!util_is_inf_or_nan(chan->f[2])); 140 assert(!util_is_inf_or_nan(chan->f[3])); 141} 142#endif 143 144 145#ifdef DEBUG 146static void 147print_chan(const char *msg, const union tgsi_exec_channel *chan) 148{ 149 debug_printf("%s = {%f, %f, %f, %f}\n", 150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 151} 152#endif 153 154 155#ifdef DEBUG 156static void 157print_temp(const struct tgsi_exec_machine *mach, uint index) 158{ 159 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 160 int i; 161 debug_printf("Temp[%u] =\n", index); 162 for (i = 0; i < 4; i++) { 163 debug_printf(" %c: { %f, %f, %f, %f }\n", 164 "XYZW"[i], 165 tmp->xyzw[i].f[0], 166 tmp->xyzw[i].f[1], 167 tmp->xyzw[i].f[2], 168 tmp->xyzw[i].f[3]); 169 } 170} 171#endif 172 173 174/** 175 * Check if there's a potential src/dst register data dependency when 176 * using SOA execution. 177 * Example: 178 * MOV T, T.yxwz; 179 * This would expand into: 180 * MOV t0, t1; 181 * MOV t1, t0; 182 * MOV t2, t3; 183 * MOV t3, t2; 184 * The second instruction will have the wrong value for t0 if executed as-is. 185 */ 186boolean 187tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 188{ 189 uint i, chan; 190 191 uint writemask = inst->Dst[0].Register.WriteMask; 192 if (writemask == TGSI_WRITEMASK_X || 193 writemask == TGSI_WRITEMASK_Y || 194 writemask == TGSI_WRITEMASK_Z || 195 writemask == TGSI_WRITEMASK_W || 196 writemask == TGSI_WRITEMASK_NONE) { 197 /* no chance of data dependency */ 198 return FALSE; 199 } 200 201 /* loop over src regs */ 202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 203 if ((inst->Src[i].Register.File == 204 inst->Dst[0].Register.File) && 205 (inst->Src[i].Register.Index == 206 inst->Dst[0].Register.Index)) { 207 /* loop over dest channels */ 208 uint channelsWritten = 0x0; 209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 210 /* check if we're reading a channel that's been written */ 211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 212 if (channelsWritten & (1 << swizzle)) { 213 return TRUE; 214 } 215 216 channelsWritten |= (1 << chan); 217 } 218 } 219 } 220 return FALSE; 221} 222 223 224/** 225 * Initialize machine state by expanding tokens to full instructions, 226 * allocating temporary storage, setting up constants, etc. 227 * After this, we can call tgsi_exec_machine_run() many times. 228 */ 229void 230tgsi_exec_machine_bind_shader( 231 struct tgsi_exec_machine *mach, 232 const struct tgsi_token *tokens, 233 uint numSamplers, 234 struct tgsi_sampler **samplers) 235{ 236 uint k; 237 struct tgsi_parse_context parse; 238 struct tgsi_exec_labels *labels = &mach->Labels; 239 struct tgsi_full_instruction *instructions; 240 struct tgsi_full_declaration *declarations; 241 uint maxInstructions = 10, numInstructions = 0; 242 uint maxDeclarations = 10, numDeclarations = 0; 243 uint instno = 0; 244 245#if 0 246 tgsi_dump(tokens, 0); 247#endif 248 249 util_init_math(); 250 251 mach->Tokens = tokens; 252 mach->Samplers = samplers; 253 254 k = tgsi_parse_init (&parse, mach->Tokens); 255 if (k != TGSI_PARSE_OK) { 256 debug_printf( "Problem parsing!\n" ); 257 return; 258 } 259 260 mach->Processor = parse.FullHeader.Processor.Processor; 261 mach->ImmLimit = 0; 262 labels->count = 0; 263 264 declarations = (struct tgsi_full_declaration *) 265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 266 267 if (!declarations) { 268 return; 269 } 270 271 instructions = (struct tgsi_full_instruction *) 272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 273 274 if (!instructions) { 275 FREE( declarations ); 276 return; 277 } 278 279 while( !tgsi_parse_end_of_tokens( &parse ) ) { 280 uint pointer = parse.Position; 281 uint i; 282 283 tgsi_parse_token( &parse ); 284 switch( parse.FullToken.Token.Type ) { 285 case TGSI_TOKEN_TYPE_DECLARATION: 286 /* save expanded declaration */ 287 if (numDeclarations == maxDeclarations) { 288 declarations = REALLOC(declarations, 289 maxDeclarations 290 * sizeof(struct tgsi_full_declaration), 291 (maxDeclarations + 10) 292 * sizeof(struct tgsi_full_declaration)); 293 maxDeclarations += 10; 294 } 295 memcpy(declarations + numDeclarations, 296 &parse.FullToken.FullDeclaration, 297 sizeof(declarations[0])); 298 numDeclarations++; 299 break; 300 301 case TGSI_TOKEN_TYPE_IMMEDIATE: 302 { 303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 304 assert( size <= 4 ); 305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 306 307 for( i = 0; i < size; i++ ) { 308 mach->Imms[mach->ImmLimit][i] = 309 parse.FullToken.FullImmediate.u[i].Float; 310 } 311 mach->ImmLimit += 1; 312 } 313 break; 314 315 case TGSI_TOKEN_TYPE_INSTRUCTION: 316 assert( labels->count < MAX_LABELS ); 317 318 labels->labels[labels->count][0] = instno; 319 labels->labels[labels->count][1] = pointer; 320 labels->count++; 321 322 /* save expanded instruction */ 323 if (numInstructions == maxInstructions) { 324 instructions = REALLOC(instructions, 325 maxInstructions 326 * sizeof(struct tgsi_full_instruction), 327 (maxInstructions + 10) 328 * sizeof(struct tgsi_full_instruction)); 329 maxInstructions += 10; 330 } 331 332 memcpy(instructions + numInstructions, 333 &parse.FullToken.FullInstruction, 334 sizeof(instructions[0])); 335 336 numInstructions++; 337 break; 338 339 case TGSI_TOKEN_TYPE_PROPERTY: 340 break; 341 342 default: 343 assert( 0 ); 344 } 345 } 346 tgsi_parse_free (&parse); 347 348 if (mach->Declarations) { 349 FREE( mach->Declarations ); 350 } 351 mach->Declarations = declarations; 352 mach->NumDeclarations = numDeclarations; 353 354 if (mach->Instructions) { 355 FREE( mach->Instructions ); 356 } 357 mach->Instructions = instructions; 358 mach->NumInstructions = numInstructions; 359} 360 361 362struct tgsi_exec_machine * 363tgsi_exec_machine_create( void ) 364{ 365 struct tgsi_exec_machine *mach; 366 uint i; 367 368 mach = align_malloc( sizeof *mach, 16 ); 369 if (!mach) 370 goto fail; 371 372 memset(mach, 0, sizeof(*mach)); 373 374 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 375 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 376 377 /* Setup constants. */ 378 for( i = 0; i < 4; i++ ) { 379 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 380 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 381 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 382 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 383 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 384 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 385 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 386 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 387 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 388 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 389 } 390 391#ifdef DEBUG 392 /* silence warnings */ 393 (void) print_chan; 394 (void) print_temp; 395#endif 396 397 return mach; 398 399fail: 400 align_free(mach); 401 return NULL; 402} 403 404 405void 406tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 407{ 408 if (mach) { 409 FREE(mach->Instructions); 410 FREE(mach->Declarations); 411 } 412 413 align_free(mach); 414} 415 416 417static void 418micro_abs( 419 union tgsi_exec_channel *dst, 420 const union tgsi_exec_channel *src ) 421{ 422 dst->f[0] = fabsf( src->f[0] ); 423 dst->f[1] = fabsf( src->f[1] ); 424 dst->f[2] = fabsf( src->f[2] ); 425 dst->f[3] = fabsf( src->f[3] ); 426} 427 428static void 429micro_add( 430 union tgsi_exec_channel *dst, 431 const union tgsi_exec_channel *src0, 432 const union tgsi_exec_channel *src1 ) 433{ 434 dst->f[0] = src0->f[0] + src1->f[0]; 435 dst->f[1] = src0->f[1] + src1->f[1]; 436 dst->f[2] = src0->f[2] + src1->f[2]; 437 dst->f[3] = src0->f[3] + src1->f[3]; 438} 439 440#if 0 441static void 442micro_iadd( 443 union tgsi_exec_channel *dst, 444 const union tgsi_exec_channel *src0, 445 const union tgsi_exec_channel *src1 ) 446{ 447 dst->i[0] = src0->i[0] + src1->i[0]; 448 dst->i[1] = src0->i[1] + src1->i[1]; 449 dst->i[2] = src0->i[2] + src1->i[2]; 450 dst->i[3] = src0->i[3] + src1->i[3]; 451} 452#endif 453 454static void 455micro_and( 456 union tgsi_exec_channel *dst, 457 const union tgsi_exec_channel *src0, 458 const union tgsi_exec_channel *src1 ) 459{ 460 dst->u[0] = src0->u[0] & src1->u[0]; 461 dst->u[1] = src0->u[1] & src1->u[1]; 462 dst->u[2] = src0->u[2] & src1->u[2]; 463 dst->u[3] = src0->u[3] & src1->u[3]; 464} 465 466static void 467micro_ceil( 468 union tgsi_exec_channel *dst, 469 const union tgsi_exec_channel *src ) 470{ 471 dst->f[0] = ceilf( src->f[0] ); 472 dst->f[1] = ceilf( src->f[1] ); 473 dst->f[2] = ceilf( src->f[2] ); 474 dst->f[3] = ceilf( src->f[3] ); 475} 476 477static void 478micro_cos( 479 union tgsi_exec_channel *dst, 480 const union tgsi_exec_channel *src ) 481{ 482 dst->f[0] = cosf( src->f[0] ); 483 dst->f[1] = cosf( src->f[1] ); 484 dst->f[2] = cosf( src->f[2] ); 485 dst->f[3] = cosf( src->f[3] ); 486} 487 488static void 489micro_ddx( 490 union tgsi_exec_channel *dst, 491 const union tgsi_exec_channel *src ) 492{ 493 dst->f[0] = 494 dst->f[1] = 495 dst->f[2] = 496 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 497} 498 499static void 500micro_ddy( 501 union tgsi_exec_channel *dst, 502 const union tgsi_exec_channel *src ) 503{ 504 dst->f[0] = 505 dst->f[1] = 506 dst->f[2] = 507 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 508} 509 510static void 511micro_div( 512 union tgsi_exec_channel *dst, 513 const union tgsi_exec_channel *src0, 514 const union tgsi_exec_channel *src1 ) 515{ 516 if (src1->f[0] != 0) { 517 dst->f[0] = src0->f[0] / src1->f[0]; 518 } 519 if (src1->f[1] != 0) { 520 dst->f[1] = src0->f[1] / src1->f[1]; 521 } 522 if (src1->f[2] != 0) { 523 dst->f[2] = src0->f[2] / src1->f[2]; 524 } 525 if (src1->f[3] != 0) { 526 dst->f[3] = src0->f[3] / src1->f[3]; 527 } 528} 529 530#if 0 531static void 532micro_udiv( 533 union tgsi_exec_channel *dst, 534 const union tgsi_exec_channel *src0, 535 const union tgsi_exec_channel *src1 ) 536{ 537 dst->u[0] = src0->u[0] / src1->u[0]; 538 dst->u[1] = src0->u[1] / src1->u[1]; 539 dst->u[2] = src0->u[2] / src1->u[2]; 540 dst->u[3] = src0->u[3] / src1->u[3]; 541} 542#endif 543 544static void 545micro_eq( 546 union tgsi_exec_channel *dst, 547 const union tgsi_exec_channel *src0, 548 const union tgsi_exec_channel *src1, 549 const union tgsi_exec_channel *src2, 550 const union tgsi_exec_channel *src3 ) 551{ 552 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 553 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 554 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 555 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 556} 557 558#if 0 559static void 560micro_ieq( 561 union tgsi_exec_channel *dst, 562 const union tgsi_exec_channel *src0, 563 const union tgsi_exec_channel *src1, 564 const union tgsi_exec_channel *src2, 565 const union tgsi_exec_channel *src3 ) 566{ 567 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 568 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 569 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 570 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 571} 572#endif 573 574static void 575micro_exp2( 576 union tgsi_exec_channel *dst, 577 const union tgsi_exec_channel *src) 578{ 579#if FAST_MATH 580 dst->f[0] = util_fast_exp2( src->f[0] ); 581 dst->f[1] = util_fast_exp2( src->f[1] ); 582 dst->f[2] = util_fast_exp2( src->f[2] ); 583 dst->f[3] = util_fast_exp2( src->f[3] ); 584#else 585 586#if DEBUG 587 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 588 uint i; 589 union tgsi_exec_channel clamped; 590 591 for (i = 0; i < 4; i++) { 592 if (src->f[i] > 127.99999f) { 593 clamped.f[i] = 127.99999f; 594 } else if (src->f[i] < -126.99999f) { 595 clamped.f[i] = -126.99999f; 596 } else { 597 clamped.f[i] = src->f[i]; 598 } 599 } 600 src = &clamped; 601#endif 602 603 dst->f[0] = powf( 2.0f, src->f[0] ); 604 dst->f[1] = powf( 2.0f, src->f[1] ); 605 dst->f[2] = powf( 2.0f, src->f[2] ); 606 dst->f[3] = powf( 2.0f, src->f[3] ); 607#endif 608} 609 610#if 0 611static void 612micro_f2ut( 613 union tgsi_exec_channel *dst, 614 const union tgsi_exec_channel *src ) 615{ 616 dst->u[0] = (uint) src->f[0]; 617 dst->u[1] = (uint) src->f[1]; 618 dst->u[2] = (uint) src->f[2]; 619 dst->u[3] = (uint) src->f[3]; 620} 621#endif 622 623static void 624micro_float_clamp(union tgsi_exec_channel *dst, 625 const union tgsi_exec_channel *src) 626{ 627 uint i; 628 629 for (i = 0; i < 4; i++) { 630 if (src->f[i] > 0.0f) { 631 if (src->f[i] > 1.884467e+019f) 632 dst->f[i] = 1.884467e+019f; 633 else if (src->f[i] < 5.42101e-020f) 634 dst->f[i] = 5.42101e-020f; 635 else 636 dst->f[i] = src->f[i]; 637 } 638 else { 639 if (src->f[i] < -1.884467e+019f) 640 dst->f[i] = -1.884467e+019f; 641 else if (src->f[i] > -5.42101e-020f) 642 dst->f[i] = -5.42101e-020f; 643 else 644 dst->f[i] = src->f[i]; 645 } 646 } 647} 648 649static void 650micro_flr( 651 union tgsi_exec_channel *dst, 652 const union tgsi_exec_channel *src ) 653{ 654 dst->f[0] = floorf( src->f[0] ); 655 dst->f[1] = floorf( src->f[1] ); 656 dst->f[2] = floorf( src->f[2] ); 657 dst->f[3] = floorf( src->f[3] ); 658} 659 660static void 661micro_frc( 662 union tgsi_exec_channel *dst, 663 const union tgsi_exec_channel *src ) 664{ 665 dst->f[0] = src->f[0] - floorf( src->f[0] ); 666 dst->f[1] = src->f[1] - floorf( src->f[1] ); 667 dst->f[2] = src->f[2] - floorf( src->f[2] ); 668 dst->f[3] = src->f[3] - floorf( src->f[3] ); 669} 670 671static void 672micro_i2f( 673 union tgsi_exec_channel *dst, 674 const union tgsi_exec_channel *src ) 675{ 676 dst->f[0] = (float) src->i[0]; 677 dst->f[1] = (float) src->i[1]; 678 dst->f[2] = (float) src->i[2]; 679 dst->f[3] = (float) src->i[3]; 680} 681 682static void 683micro_lg2( 684 union tgsi_exec_channel *dst, 685 const union tgsi_exec_channel *src ) 686{ 687#if FAST_MATH 688 dst->f[0] = util_fast_log2( src->f[0] ); 689 dst->f[1] = util_fast_log2( src->f[1] ); 690 dst->f[2] = util_fast_log2( src->f[2] ); 691 dst->f[3] = util_fast_log2( src->f[3] ); 692#else 693 dst->f[0] = logf( src->f[0] ) * 1.442695f; 694 dst->f[1] = logf( src->f[1] ) * 1.442695f; 695 dst->f[2] = logf( src->f[2] ) * 1.442695f; 696 dst->f[3] = logf( src->f[3] ) * 1.442695f; 697#endif 698} 699 700static void 701micro_le( 702 union tgsi_exec_channel *dst, 703 const union tgsi_exec_channel *src0, 704 const union tgsi_exec_channel *src1, 705 const union tgsi_exec_channel *src2, 706 const union tgsi_exec_channel *src3 ) 707{ 708 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 709 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 710 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 711 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 712} 713 714static void 715micro_lt( 716 union tgsi_exec_channel *dst, 717 const union tgsi_exec_channel *src0, 718 const union tgsi_exec_channel *src1, 719 const union tgsi_exec_channel *src2, 720 const union tgsi_exec_channel *src3 ) 721{ 722 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 723 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 724 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 725 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 726} 727 728#if 0 729static void 730micro_ilt( 731 union tgsi_exec_channel *dst, 732 const union tgsi_exec_channel *src0, 733 const union tgsi_exec_channel *src1, 734 const union tgsi_exec_channel *src2, 735 const union tgsi_exec_channel *src3 ) 736{ 737 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 738 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 739 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 740 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 741} 742#endif 743 744#if 0 745static void 746micro_ult( 747 union tgsi_exec_channel *dst, 748 const union tgsi_exec_channel *src0, 749 const union tgsi_exec_channel *src1, 750 const union tgsi_exec_channel *src2, 751 const union tgsi_exec_channel *src3 ) 752{ 753 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 754 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 755 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 756 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 757} 758#endif 759 760static void 761micro_max( 762 union tgsi_exec_channel *dst, 763 const union tgsi_exec_channel *src0, 764 const union tgsi_exec_channel *src1 ) 765{ 766 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 767 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 768 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 769 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 770} 771 772#if 0 773static void 774micro_imax( 775 union tgsi_exec_channel *dst, 776 const union tgsi_exec_channel *src0, 777 const union tgsi_exec_channel *src1 ) 778{ 779 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 780 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 781 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 782 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 783} 784#endif 785 786#if 0 787static void 788micro_umax( 789 union tgsi_exec_channel *dst, 790 const union tgsi_exec_channel *src0, 791 const union tgsi_exec_channel *src1 ) 792{ 793 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 794 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 795 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 796 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 797} 798#endif 799 800static void 801micro_min( 802 union tgsi_exec_channel *dst, 803 const union tgsi_exec_channel *src0, 804 const union tgsi_exec_channel *src1 ) 805{ 806 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 807 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 808 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 809 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 810} 811 812#if 0 813static void 814micro_imin( 815 union tgsi_exec_channel *dst, 816 const union tgsi_exec_channel *src0, 817 const union tgsi_exec_channel *src1 ) 818{ 819 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 820 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 821 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 822 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 823} 824#endif 825 826#if 0 827static void 828micro_umin( 829 union tgsi_exec_channel *dst, 830 const union tgsi_exec_channel *src0, 831 const union tgsi_exec_channel *src1 ) 832{ 833 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 834 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 835 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 836 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 837} 838#endif 839 840#if 0 841static void 842micro_umod( 843 union tgsi_exec_channel *dst, 844 const union tgsi_exec_channel *src0, 845 const union tgsi_exec_channel *src1 ) 846{ 847 dst->u[0] = src0->u[0] % src1->u[0]; 848 dst->u[1] = src0->u[1] % src1->u[1]; 849 dst->u[2] = src0->u[2] % src1->u[2]; 850 dst->u[3] = src0->u[3] % src1->u[3]; 851} 852#endif 853 854static void 855micro_mul( 856 union tgsi_exec_channel *dst, 857 const union tgsi_exec_channel *src0, 858 const union tgsi_exec_channel *src1 ) 859{ 860 dst->f[0] = src0->f[0] * src1->f[0]; 861 dst->f[1] = src0->f[1] * src1->f[1]; 862 dst->f[2] = src0->f[2] * src1->f[2]; 863 dst->f[3] = src0->f[3] * src1->f[3]; 864} 865 866#if 0 867static void 868micro_imul( 869 union tgsi_exec_channel *dst, 870 const union tgsi_exec_channel *src0, 871 const union tgsi_exec_channel *src1 ) 872{ 873 dst->i[0] = src0->i[0] * src1->i[0]; 874 dst->i[1] = src0->i[1] * src1->i[1]; 875 dst->i[2] = src0->i[2] * src1->i[2]; 876 dst->i[3] = src0->i[3] * src1->i[3]; 877} 878#endif 879 880#if 0 881static void 882micro_imul64( 883 union tgsi_exec_channel *dst0, 884 union tgsi_exec_channel *dst1, 885 const union tgsi_exec_channel *src0, 886 const union tgsi_exec_channel *src1 ) 887{ 888 dst1->i[0] = src0->i[0] * src1->i[0]; 889 dst1->i[1] = src0->i[1] * src1->i[1]; 890 dst1->i[2] = src0->i[2] * src1->i[2]; 891 dst1->i[3] = src0->i[3] * src1->i[3]; 892 dst0->i[0] = 0; 893 dst0->i[1] = 0; 894 dst0->i[2] = 0; 895 dst0->i[3] = 0; 896} 897#endif 898 899#if 0 900static void 901micro_umul64( 902 union tgsi_exec_channel *dst0, 903 union tgsi_exec_channel *dst1, 904 const union tgsi_exec_channel *src0, 905 const union tgsi_exec_channel *src1 ) 906{ 907 dst1->u[0] = src0->u[0] * src1->u[0]; 908 dst1->u[1] = src0->u[1] * src1->u[1]; 909 dst1->u[2] = src0->u[2] * src1->u[2]; 910 dst1->u[3] = src0->u[3] * src1->u[3]; 911 dst0->u[0] = 0; 912 dst0->u[1] = 0; 913 dst0->u[2] = 0; 914 dst0->u[3] = 0; 915} 916#endif 917 918 919#if 0 920static void 921micro_movc( 922 union tgsi_exec_channel *dst, 923 const union tgsi_exec_channel *src0, 924 const union tgsi_exec_channel *src1, 925 const union tgsi_exec_channel *src2 ) 926{ 927 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 928 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 929 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 930 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 931} 932#endif 933 934static void 935micro_neg( 936 union tgsi_exec_channel *dst, 937 const union tgsi_exec_channel *src ) 938{ 939 dst->f[0] = -src->f[0]; 940 dst->f[1] = -src->f[1]; 941 dst->f[2] = -src->f[2]; 942 dst->f[3] = -src->f[3]; 943} 944 945#if 0 946static void 947micro_ineg( 948 union tgsi_exec_channel *dst, 949 const union tgsi_exec_channel *src ) 950{ 951 dst->i[0] = -src->i[0]; 952 dst->i[1] = -src->i[1]; 953 dst->i[2] = -src->i[2]; 954 dst->i[3] = -src->i[3]; 955} 956#endif 957 958static void 959micro_not( 960 union tgsi_exec_channel *dst, 961 const union tgsi_exec_channel *src ) 962{ 963 dst->u[0] = ~src->u[0]; 964 dst->u[1] = ~src->u[1]; 965 dst->u[2] = ~src->u[2]; 966 dst->u[3] = ~src->u[3]; 967} 968 969static void 970micro_or( 971 union tgsi_exec_channel *dst, 972 const union tgsi_exec_channel *src0, 973 const union tgsi_exec_channel *src1 ) 974{ 975 dst->u[0] = src0->u[0] | src1->u[0]; 976 dst->u[1] = src0->u[1] | src1->u[1]; 977 dst->u[2] = src0->u[2] | src1->u[2]; 978 dst->u[3] = src0->u[3] | src1->u[3]; 979} 980 981static void 982micro_pow( 983 union tgsi_exec_channel *dst, 984 const union tgsi_exec_channel *src0, 985 const union tgsi_exec_channel *src1 ) 986{ 987#if FAST_MATH 988 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 989 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 990 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 991 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 992#else 993 dst->f[0] = powf( src0->f[0], src1->f[0] ); 994 dst->f[1] = powf( src0->f[1], src1->f[1] ); 995 dst->f[2] = powf( src0->f[2], src1->f[2] ); 996 dst->f[3] = powf( src0->f[3], src1->f[3] ); 997#endif 998} 999 1000static void 1001micro_rnd( 1002 union tgsi_exec_channel *dst, 1003 const union tgsi_exec_channel *src ) 1004{ 1005 dst->f[0] = floorf( src->f[0] + 0.5f ); 1006 dst->f[1] = floorf( src->f[1] + 0.5f ); 1007 dst->f[2] = floorf( src->f[2] + 0.5f ); 1008 dst->f[3] = floorf( src->f[3] + 0.5f ); 1009} 1010 1011static void 1012micro_sgn( 1013 union tgsi_exec_channel *dst, 1014 const union tgsi_exec_channel *src ) 1015{ 1016 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 1017 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 1018 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 1019 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1020} 1021 1022static void 1023micro_shl( 1024 union tgsi_exec_channel *dst, 1025 const union tgsi_exec_channel *src0, 1026 const union tgsi_exec_channel *src1 ) 1027{ 1028 dst->i[0] = src0->i[0] << src1->i[0]; 1029 dst->i[1] = src0->i[1] << src1->i[1]; 1030 dst->i[2] = src0->i[2] << src1->i[2]; 1031 dst->i[3] = src0->i[3] << src1->i[3]; 1032} 1033 1034static void 1035micro_ishr( 1036 union tgsi_exec_channel *dst, 1037 const union tgsi_exec_channel *src0, 1038 const union tgsi_exec_channel *src1 ) 1039{ 1040 dst->i[0] = src0->i[0] >> src1->i[0]; 1041 dst->i[1] = src0->i[1] >> src1->i[1]; 1042 dst->i[2] = src0->i[2] >> src1->i[2]; 1043 dst->i[3] = src0->i[3] >> src1->i[3]; 1044} 1045 1046static void 1047micro_trunc( 1048 union tgsi_exec_channel *dst, 1049 const union tgsi_exec_channel *src0 ) 1050{ 1051 dst->f[0] = (float) (int) src0->f[0]; 1052 dst->f[1] = (float) (int) src0->f[1]; 1053 dst->f[2] = (float) (int) src0->f[2]; 1054 dst->f[3] = (float) (int) src0->f[3]; 1055} 1056 1057#if 0 1058static void 1059micro_ushr( 1060 union tgsi_exec_channel *dst, 1061 const union tgsi_exec_channel *src0, 1062 const union tgsi_exec_channel *src1 ) 1063{ 1064 dst->u[0] = src0->u[0] >> src1->u[0]; 1065 dst->u[1] = src0->u[1] >> src1->u[1]; 1066 dst->u[2] = src0->u[2] >> src1->u[2]; 1067 dst->u[3] = src0->u[3] >> src1->u[3]; 1068} 1069#endif 1070 1071static void 1072micro_sin( 1073 union tgsi_exec_channel *dst, 1074 const union tgsi_exec_channel *src ) 1075{ 1076 dst->f[0] = sinf( src->f[0] ); 1077 dst->f[1] = sinf( src->f[1] ); 1078 dst->f[2] = sinf( src->f[2] ); 1079 dst->f[3] = sinf( src->f[3] ); 1080} 1081 1082static void 1083micro_sqrt( union tgsi_exec_channel *dst, 1084 const union tgsi_exec_channel *src ) 1085{ 1086 dst->f[0] = sqrtf( src->f[0] ); 1087 dst->f[1] = sqrtf( src->f[1] ); 1088 dst->f[2] = sqrtf( src->f[2] ); 1089 dst->f[3] = sqrtf( src->f[3] ); 1090} 1091 1092static void 1093micro_sub( 1094 union tgsi_exec_channel *dst, 1095 const union tgsi_exec_channel *src0, 1096 const union tgsi_exec_channel *src1 ) 1097{ 1098 dst->f[0] = src0->f[0] - src1->f[0]; 1099 dst->f[1] = src0->f[1] - src1->f[1]; 1100 dst->f[2] = src0->f[2] - src1->f[2]; 1101 dst->f[3] = src0->f[3] - src1->f[3]; 1102} 1103 1104#if 0 1105static void 1106micro_u2f( 1107 union tgsi_exec_channel *dst, 1108 const union tgsi_exec_channel *src ) 1109{ 1110 dst->f[0] = (float) src->u[0]; 1111 dst->f[1] = (float) src->u[1]; 1112 dst->f[2] = (float) src->u[2]; 1113 dst->f[3] = (float) src->u[3]; 1114} 1115#endif 1116 1117static void 1118micro_xor( 1119 union tgsi_exec_channel *dst, 1120 const union tgsi_exec_channel *src0, 1121 const union tgsi_exec_channel *src1 ) 1122{ 1123 dst->u[0] = src0->u[0] ^ src1->u[0]; 1124 dst->u[1] = src0->u[1] ^ src1->u[1]; 1125 dst->u[2] = src0->u[2] ^ src1->u[2]; 1126 dst->u[3] = src0->u[3] ^ src1->u[3]; 1127} 1128 1129static void 1130fetch_src_file_channel( 1131 const struct tgsi_exec_machine *mach, 1132 const uint file, 1133 const uint swizzle, 1134 const union tgsi_exec_channel *index, 1135 union tgsi_exec_channel *chan ) 1136{ 1137 switch( swizzle ) { 1138 case TGSI_SWIZZLE_X: 1139 case TGSI_SWIZZLE_Y: 1140 case TGSI_SWIZZLE_Z: 1141 case TGSI_SWIZZLE_W: 1142 switch( file ) { 1143 case TGSI_FILE_CONSTANT: 1144 assert(mach->Consts); 1145 if (index->i[0] < 0) 1146 chan->f[0] = 0.0f; 1147 else 1148 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1149 if (index->i[1] < 0) 1150 chan->f[1] = 0.0f; 1151 else 1152 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1153 if (index->i[2] < 0) 1154 chan->f[2] = 0.0f; 1155 else 1156 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1157 if (index->i[3] < 0) 1158 chan->f[3] = 0.0f; 1159 else 1160 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1161 break; 1162 1163 case TGSI_FILE_INPUT: 1164 case TGSI_FILE_SYSTEM_VALUE: 1165 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1166 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1167 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1168 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1169 break; 1170 1171 case TGSI_FILE_TEMPORARY: 1172 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1173 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1174 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1175 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1176 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1177 break; 1178 1179 case TGSI_FILE_IMMEDIATE: 1180 assert( index->i[0] < (int) mach->ImmLimit ); 1181 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1182 assert( index->i[1] < (int) mach->ImmLimit ); 1183 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1184 assert( index->i[2] < (int) mach->ImmLimit ); 1185 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1186 assert( index->i[3] < (int) mach->ImmLimit ); 1187 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1188 break; 1189 1190 case TGSI_FILE_ADDRESS: 1191 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1192 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1193 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1194 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1195 break; 1196 1197 case TGSI_FILE_PREDICATE: 1198 assert(index->i[0] < TGSI_EXEC_NUM_PREDS); 1199 assert(index->i[1] < TGSI_EXEC_NUM_PREDS); 1200 assert(index->i[2] < TGSI_EXEC_NUM_PREDS); 1201 assert(index->i[3] < TGSI_EXEC_NUM_PREDS); 1202 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; 1203 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; 1204 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; 1205 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; 1206 break; 1207 1208 case TGSI_FILE_OUTPUT: 1209 /* vertex/fragment output vars can be read too */ 1210 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1211 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1212 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1213 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1214 break; 1215 1216 default: 1217 assert( 0 ); 1218 } 1219 break; 1220 1221 default: 1222 assert( 0 ); 1223 } 1224} 1225 1226static void 1227fetch_source( 1228 const struct tgsi_exec_machine *mach, 1229 union tgsi_exec_channel *chan, 1230 const struct tgsi_full_src_register *reg, 1231 const uint chan_index ) 1232{ 1233 union tgsi_exec_channel index; 1234 uint swizzle; 1235 1236 /* We start with a direct index into a register file. 1237 * 1238 * file[1], 1239 * where: 1240 * file = Register.File 1241 * [1] = Register.Index 1242 */ 1243 index.i[0] = 1244 index.i[1] = 1245 index.i[2] = 1246 index.i[3] = reg->Register.Index; 1247 1248 /* There is an extra source register that indirectly subscripts 1249 * a register file. The direct index now becomes an offset 1250 * that is being added to the indirect register. 1251 * 1252 * file[ind[2].x+1], 1253 * where: 1254 * ind = Indirect.File 1255 * [2] = Indirect.Index 1256 * .x = Indirect.SwizzleX 1257 */ 1258 if (reg->Register.Indirect) { 1259 union tgsi_exec_channel index2; 1260 union tgsi_exec_channel indir_index; 1261 const uint execmask = mach->ExecMask; 1262 uint i; 1263 1264 /* which address register (always zero now) */ 1265 index2.i[0] = 1266 index2.i[1] = 1267 index2.i[2] = 1268 index2.i[3] = reg->Indirect.Index; 1269 1270 /* get current value of address register[swizzle] */ 1271 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1272 fetch_src_file_channel( 1273 mach, 1274 reg->Indirect.File, 1275 swizzle, 1276 &index2, 1277 &indir_index ); 1278 1279 /* add value of address register to the offset */ 1280 index.i[0] += (int) indir_index.f[0]; 1281 index.i[1] += (int) indir_index.f[1]; 1282 index.i[2] += (int) indir_index.f[2]; 1283 index.i[3] += (int) indir_index.f[3]; 1284 1285 /* for disabled execution channels, zero-out the index to 1286 * avoid using a potential garbage value. 1287 */ 1288 for (i = 0; i < QUAD_SIZE; i++) { 1289 if ((execmask & (1 << i)) == 0) 1290 index.i[i] = 0; 1291 } 1292 } 1293 1294 /* There is an extra source register that is a second 1295 * subscript to a register file. Effectively it means that 1296 * the register file is actually a 2D array of registers. 1297 * 1298 * file[1][3] == file[1*sizeof(file[1])+3], 1299 * where: 1300 * [3] = Dimension.Index 1301 */ 1302 if (reg->Register.Dimension) { 1303 /* The size of the first-order array depends on the register file type. 1304 * We need to multiply the index to the first array to get an effective, 1305 * "flat" index that points to the beginning of the second-order array. 1306 */ 1307 switch (reg->Register.File) { 1308 case TGSI_FILE_INPUT: 1309 case TGSI_FILE_SYSTEM_VALUE: 1310 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1311 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1312 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1313 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1314 break; 1315 case TGSI_FILE_CONSTANT: 1316 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1317 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1318 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1319 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1320 break; 1321 default: 1322 assert( 0 ); 1323 } 1324 1325 index.i[0] += reg->Dimension.Index; 1326 index.i[1] += reg->Dimension.Index; 1327 index.i[2] += reg->Dimension.Index; 1328 index.i[3] += reg->Dimension.Index; 1329 1330 /* Again, the second subscript index can be addressed indirectly 1331 * identically to the first one. 1332 * Nothing stops us from indirectly addressing the indirect register, 1333 * but there is no need for that, so we won't exercise it. 1334 * 1335 * file[1][ind[4].y+3], 1336 * where: 1337 * ind = DimIndirect.File 1338 * [4] = DimIndirect.Index 1339 * .y = DimIndirect.SwizzleX 1340 */ 1341 if (reg->Dimension.Indirect) { 1342 union tgsi_exec_channel index2; 1343 union tgsi_exec_channel indir_index; 1344 const uint execmask = mach->ExecMask; 1345 uint i; 1346 1347 index2.i[0] = 1348 index2.i[1] = 1349 index2.i[2] = 1350 index2.i[3] = reg->DimIndirect.Index; 1351 1352 swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); 1353 fetch_src_file_channel( 1354 mach, 1355 reg->DimIndirect.File, 1356 swizzle, 1357 &index2, 1358 &indir_index ); 1359 1360 index.i[0] += (int) indir_index.f[0]; 1361 index.i[1] += (int) indir_index.f[1]; 1362 index.i[2] += (int) indir_index.f[2]; 1363 index.i[3] += (int) indir_index.f[3]; 1364 1365 /* for disabled execution channels, zero-out the index to 1366 * avoid using a potential garbage value. 1367 */ 1368 for (i = 0; i < QUAD_SIZE; i++) { 1369 if ((execmask & (1 << i)) == 0) 1370 index.i[i] = 0; 1371 } 1372 } 1373 1374 /* If by any chance there was a need for a 3D array of register 1375 * files, we would have to check whether Dimension is followed 1376 * by a dimension register and continue the saga. 1377 */ 1378 } 1379 1380 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1381 fetch_src_file_channel( 1382 mach, 1383 reg->Register.File, 1384 swizzle, 1385 &index, 1386 chan ); 1387 1388 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1389 case TGSI_UTIL_SIGN_CLEAR: 1390 micro_abs( chan, chan ); 1391 break; 1392 1393 case TGSI_UTIL_SIGN_SET: 1394 micro_abs( chan, chan ); 1395 micro_neg( chan, chan ); 1396 break; 1397 1398 case TGSI_UTIL_SIGN_TOGGLE: 1399 micro_neg( chan, chan ); 1400 break; 1401 1402 case TGSI_UTIL_SIGN_KEEP: 1403 break; 1404 } 1405} 1406 1407static void 1408store_dest( 1409 struct tgsi_exec_machine *mach, 1410 const union tgsi_exec_channel *chan, 1411 const struct tgsi_full_dst_register *reg, 1412 const struct tgsi_full_instruction *inst, 1413 uint chan_index ) 1414{ 1415 uint i; 1416 union tgsi_exec_channel null; 1417 union tgsi_exec_channel *dst; 1418 uint execmask = mach->ExecMask; 1419 int offset = 0; /* indirection offset */ 1420 int index; 1421 1422#ifdef DEBUG 1423 check_inf_or_nan(chan); 1424#endif 1425 1426 /* There is an extra source register that indirectly subscripts 1427 * a register file. The direct index now becomes an offset 1428 * that is being added to the indirect register. 1429 * 1430 * file[ind[2].x+1], 1431 * where: 1432 * ind = Indirect.File 1433 * [2] = Indirect.Index 1434 * .x = Indirect.SwizzleX 1435 */ 1436 if (reg->Register.Indirect) { 1437 union tgsi_exec_channel index; 1438 union tgsi_exec_channel indir_index; 1439 uint swizzle; 1440 1441 /* which address register (always zero for now) */ 1442 index.i[0] = 1443 index.i[1] = 1444 index.i[2] = 1445 index.i[3] = reg->Indirect.Index; 1446 1447 /* get current value of address register[swizzle] */ 1448 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1449 1450 /* fetch values from the address/indirection register */ 1451 fetch_src_file_channel( 1452 mach, 1453 reg->Indirect.File, 1454 swizzle, 1455 &index, 1456 &indir_index ); 1457 1458 /* save indirection offset */ 1459 offset = (int) indir_index.f[0]; 1460 } 1461 1462 switch (reg->Register.File) { 1463 case TGSI_FILE_NULL: 1464 dst = &null; 1465 break; 1466 1467 case TGSI_FILE_OUTPUT: 1468 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1469 + reg->Register.Index; 1470 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1471 break; 1472 1473 case TGSI_FILE_TEMPORARY: 1474 index = reg->Register.Index; 1475 assert( index < TGSI_EXEC_NUM_TEMPS ); 1476 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1477 break; 1478 1479 case TGSI_FILE_ADDRESS: 1480 index = reg->Register.Index; 1481 dst = &mach->Addrs[index].xyzw[chan_index]; 1482 break; 1483 1484 case TGSI_FILE_LOOP: 1485 assert(reg->Register.Index == 0); 1486 assert(mach->LoopCounterStackTop > 0); 1487 assert(chan_index == CHAN_X); 1488 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; 1489 break; 1490 1491 case TGSI_FILE_PREDICATE: 1492 index = reg->Register.Index; 1493 assert(index < TGSI_EXEC_NUM_PREDS); 1494 dst = &mach->Predicates[index].xyzw[chan_index]; 1495 break; 1496 1497 default: 1498 assert( 0 ); 1499 return; 1500 } 1501 1502 if (inst->Instruction.Predicate) { 1503 uint swizzle; 1504 union tgsi_exec_channel *pred; 1505 1506 switch (chan_index) { 1507 case CHAN_X: 1508 swizzle = inst->Predicate.SwizzleX; 1509 break; 1510 case CHAN_Y: 1511 swizzle = inst->Predicate.SwizzleY; 1512 break; 1513 case CHAN_Z: 1514 swizzle = inst->Predicate.SwizzleZ; 1515 break; 1516 case CHAN_W: 1517 swizzle = inst->Predicate.SwizzleW; 1518 break; 1519 default: 1520 assert(0); 1521 return; 1522 } 1523 1524 assert(inst->Predicate.Index == 0); 1525 1526 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; 1527 1528 if (inst->Predicate.Negate) { 1529 for (i = 0; i < QUAD_SIZE; i++) { 1530 if (pred->u[i]) { 1531 execmask &= ~(1 << i); 1532 } 1533 } 1534 } else { 1535 for (i = 0; i < QUAD_SIZE; i++) { 1536 if (!pred->u[i]) { 1537 execmask &= ~(1 << i); 1538 } 1539 } 1540 } 1541 } 1542 1543 switch (inst->Instruction.Saturate) { 1544 case TGSI_SAT_NONE: 1545 for (i = 0; i < QUAD_SIZE; i++) 1546 if (execmask & (1 << i)) 1547 dst->i[i] = chan->i[i]; 1548 break; 1549 1550 case TGSI_SAT_ZERO_ONE: 1551 for (i = 0; i < QUAD_SIZE; i++) 1552 if (execmask & (1 << i)) { 1553 if (chan->f[i] < 0.0f) 1554 dst->f[i] = 0.0f; 1555 else if (chan->f[i] > 1.0f) 1556 dst->f[i] = 1.0f; 1557 else 1558 dst->i[i] = chan->i[i]; 1559 } 1560 break; 1561 1562 case TGSI_SAT_MINUS_PLUS_ONE: 1563 for (i = 0; i < QUAD_SIZE; i++) 1564 if (execmask & (1 << i)) { 1565 if (chan->f[i] < -1.0f) 1566 dst->f[i] = -1.0f; 1567 else if (chan->f[i] > 1.0f) 1568 dst->f[i] = 1.0f; 1569 else 1570 dst->i[i] = chan->i[i]; 1571 } 1572 break; 1573 1574 default: 1575 assert( 0 ); 1576 } 1577} 1578 1579#define FETCH(VAL,INDEX,CHAN)\ 1580 fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) 1581 1582#define STORE(VAL,INDEX,CHAN)\ 1583 store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) 1584 1585 1586/** 1587 * Execute ARB-style KIL which is predicated by a src register. 1588 * Kill fragment if any of the four values is less than zero. 1589 */ 1590static void 1591exec_kil(struct tgsi_exec_machine *mach, 1592 const struct tgsi_full_instruction *inst) 1593{ 1594 uint uniquemask; 1595 uint chan_index; 1596 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1597 union tgsi_exec_channel r[1]; 1598 1599 /* This mask stores component bits that were already tested. */ 1600 uniquemask = 0; 1601 1602 for (chan_index = 0; chan_index < 4; chan_index++) 1603 { 1604 uint swizzle; 1605 uint i; 1606 1607 /* unswizzle channel */ 1608 swizzle = tgsi_util_get_full_src_register_swizzle ( 1609 &inst->Src[0], 1610 chan_index); 1611 1612 /* check if the component has not been already tested */ 1613 if (uniquemask & (1 << swizzle)) 1614 continue; 1615 uniquemask |= 1 << swizzle; 1616 1617 FETCH(&r[0], 0, chan_index); 1618 for (i = 0; i < 4; i++) 1619 if (r[0].f[i] < 0.0f) 1620 kilmask |= 1 << i; 1621 } 1622 1623 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1624} 1625 1626/** 1627 * Execute NVIDIA-style KIL which is predicated by a condition code. 1628 * Kill fragment if the condition code is TRUE. 1629 */ 1630static void 1631exec_kilp(struct tgsi_exec_machine *mach, 1632 const struct tgsi_full_instruction *inst) 1633{ 1634 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1635 1636 /* "unconditional" kil */ 1637 kilmask = mach->ExecMask; 1638 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1639} 1640 1641 1642/* 1643 * Fetch a four texture samples using STR texture coordinates. 1644 */ 1645static void 1646fetch_texel( struct tgsi_sampler *sampler, 1647 const union tgsi_exec_channel *s, 1648 const union tgsi_exec_channel *t, 1649 const union tgsi_exec_channel *p, 1650 float lodbias, /* XXX should be float[4] */ 1651 union tgsi_exec_channel *r, 1652 union tgsi_exec_channel *g, 1653 union tgsi_exec_channel *b, 1654 union tgsi_exec_channel *a ) 1655{ 1656 uint j; 1657 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1658 1659 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1660 1661 for (j = 0; j < 4; j++) { 1662 r->f[j] = rgba[0][j]; 1663 g->f[j] = rgba[1][j]; 1664 b->f[j] = rgba[2][j]; 1665 a->f[j] = rgba[3][j]; 1666 } 1667} 1668 1669 1670static void 1671exec_tex(struct tgsi_exec_machine *mach, 1672 const struct tgsi_full_instruction *inst, 1673 boolean biasLod, 1674 boolean projected) 1675{ 1676 const uint unit = inst->Src[1].Register.Index; 1677 union tgsi_exec_channel r[4]; 1678 uint chan_index; 1679 float lodBias; 1680 1681 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1682 1683 switch (inst->Texture.Texture) { 1684 case TGSI_TEXTURE_1D: 1685 case TGSI_TEXTURE_SHADOW1D: 1686 1687 FETCH(&r[0], 0, CHAN_X); 1688 1689 if (projected) { 1690 FETCH(&r[1], 0, CHAN_W); 1691 micro_div( &r[0], &r[0], &r[1] ); 1692 } 1693 1694 if (biasLod) { 1695 FETCH(&r[1], 0, CHAN_W); 1696 lodBias = r[2].f[0]; 1697 } 1698 else 1699 lodBias = 0.0; 1700 1701 fetch_texel(mach->Samplers[unit], 1702 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1703 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1704 break; 1705 1706 case TGSI_TEXTURE_2D: 1707 case TGSI_TEXTURE_RECT: 1708 case TGSI_TEXTURE_SHADOW2D: 1709 case TGSI_TEXTURE_SHADOWRECT: 1710 1711 FETCH(&r[0], 0, CHAN_X); 1712 FETCH(&r[1], 0, CHAN_Y); 1713 FETCH(&r[2], 0, CHAN_Z); 1714 1715 if (projected) { 1716 FETCH(&r[3], 0, CHAN_W); 1717 micro_div( &r[0], &r[0], &r[3] ); 1718 micro_div( &r[1], &r[1], &r[3] ); 1719 micro_div( &r[2], &r[2], &r[3] ); 1720 } 1721 1722 if (biasLod) { 1723 FETCH(&r[3], 0, CHAN_W); 1724 lodBias = r[3].f[0]; 1725 } 1726 else 1727 lodBias = 0.0; 1728 1729 fetch_texel(mach->Samplers[unit], 1730 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1731 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1732 break; 1733 1734 case TGSI_TEXTURE_3D: 1735 case TGSI_TEXTURE_CUBE: 1736 1737 FETCH(&r[0], 0, CHAN_X); 1738 FETCH(&r[1], 0, CHAN_Y); 1739 FETCH(&r[2], 0, CHAN_Z); 1740 1741 if (projected) { 1742 FETCH(&r[3], 0, CHAN_W); 1743 micro_div( &r[0], &r[0], &r[3] ); 1744 micro_div( &r[1], &r[1], &r[3] ); 1745 micro_div( &r[2], &r[2], &r[3] ); 1746 } 1747 1748 if (biasLod) { 1749 FETCH(&r[3], 0, CHAN_W); 1750 lodBias = r[3].f[0]; 1751 } 1752 else 1753 lodBias = 0.0; 1754 1755 fetch_texel(mach->Samplers[unit], 1756 &r[0], &r[1], &r[2], lodBias, 1757 &r[0], &r[1], &r[2], &r[3]); 1758 break; 1759 1760 default: 1761 assert (0); 1762 } 1763 1764 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1765 STORE( &r[chan_index], 0, chan_index ); 1766 } 1767} 1768 1769static void 1770exec_txd(struct tgsi_exec_machine *mach, 1771 const struct tgsi_full_instruction *inst) 1772{ 1773 const uint unit = inst->Src[3].Register.Index; 1774 union tgsi_exec_channel r[4]; 1775 uint chan_index; 1776 1777 /* 1778 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1779 */ 1780 1781 switch (inst->Texture.Texture) { 1782 case TGSI_TEXTURE_1D: 1783 case TGSI_TEXTURE_SHADOW1D: 1784 1785 FETCH(&r[0], 0, CHAN_X); 1786 1787 fetch_texel(mach->Samplers[unit], 1788 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ 1789 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1790 break; 1791 1792 case TGSI_TEXTURE_2D: 1793 case TGSI_TEXTURE_RECT: 1794 case TGSI_TEXTURE_SHADOW2D: 1795 case TGSI_TEXTURE_SHADOWRECT: 1796 1797 FETCH(&r[0], 0, CHAN_X); 1798 FETCH(&r[1], 0, CHAN_Y); 1799 FETCH(&r[2], 0, CHAN_Z); 1800 1801 fetch_texel(mach->Samplers[unit], 1802 &r[0], &r[1], &r[2], 0.0f, /* inputs */ 1803 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1804 break; 1805 1806 case TGSI_TEXTURE_3D: 1807 case TGSI_TEXTURE_CUBE: 1808 1809 FETCH(&r[0], 0, CHAN_X); 1810 FETCH(&r[1], 0, CHAN_Y); 1811 FETCH(&r[2], 0, CHAN_Z); 1812 1813 fetch_texel(mach->Samplers[unit], 1814 &r[0], &r[1], &r[2], 0.0f, 1815 &r[0], &r[1], &r[2], &r[3]); 1816 break; 1817 1818 default: 1819 assert(0); 1820 } 1821 1822 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1823 STORE(&r[chan_index], 0, chan_index); 1824 } 1825} 1826 1827 1828/** 1829 * Evaluate a constant-valued coefficient at the position of the 1830 * current quad. 1831 */ 1832static void 1833eval_constant_coef( 1834 struct tgsi_exec_machine *mach, 1835 unsigned attrib, 1836 unsigned chan ) 1837{ 1838 unsigned i; 1839 1840 for( i = 0; i < QUAD_SIZE; i++ ) { 1841 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1842 } 1843} 1844 1845/** 1846 * Evaluate a linear-valued coefficient at the position of the 1847 * current quad. 1848 */ 1849static void 1850eval_linear_coef( 1851 struct tgsi_exec_machine *mach, 1852 unsigned attrib, 1853 unsigned chan ) 1854{ 1855 const float x = mach->QuadPos.xyzw[0].f[0]; 1856 const float y = mach->QuadPos.xyzw[1].f[0]; 1857 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1858 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1859 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1860 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1861 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1862 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1863 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1864} 1865 1866/** 1867 * Evaluate a perspective-valued coefficient at the position of the 1868 * current quad. 1869 */ 1870static void 1871eval_perspective_coef( 1872 struct tgsi_exec_machine *mach, 1873 unsigned attrib, 1874 unsigned chan ) 1875{ 1876 const float x = mach->QuadPos.xyzw[0].f[0]; 1877 const float y = mach->QuadPos.xyzw[1].f[0]; 1878 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1879 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1880 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1881 const float *w = mach->QuadPos.xyzw[3].f; 1882 /* divide by W here */ 1883 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1884 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1885 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1886 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1887} 1888 1889 1890typedef void (* eval_coef_func)( 1891 struct tgsi_exec_machine *mach, 1892 unsigned attrib, 1893 unsigned chan ); 1894 1895static void 1896exec_declaration(struct tgsi_exec_machine *mach, 1897 const struct tgsi_full_declaration *decl) 1898{ 1899 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 1900 if (decl->Declaration.File == TGSI_FILE_INPUT || 1901 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1902 uint first, last, mask; 1903 1904 first = decl->Range.First; 1905 last = decl->Range.Last; 1906 mask = decl->Declaration.UsageMask; 1907 1908 if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { 1909 assert(decl->Semantic.Index == 0); 1910 assert(first == last); 1911 assert(mask = TGSI_WRITEMASK_XYZW); 1912 1913 mach->Inputs[first] = mach->QuadPos; 1914 } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 1915 uint i; 1916 1917 assert(decl->Semantic.Index == 0); 1918 assert(first == last); 1919 1920 for (i = 0; i < QUAD_SIZE; i++) { 1921 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 1922 } 1923 } else { 1924 eval_coef_func eval; 1925 uint i, j; 1926 1927 switch (decl->Declaration.Interpolate) { 1928 case TGSI_INTERPOLATE_CONSTANT: 1929 eval = eval_constant_coef; 1930 break; 1931 1932 case TGSI_INTERPOLATE_LINEAR: 1933 eval = eval_linear_coef; 1934 break; 1935 1936 case TGSI_INTERPOLATE_PERSPECTIVE: 1937 eval = eval_perspective_coef; 1938 break; 1939 1940 default: 1941 assert(0); 1942 return; 1943 } 1944 1945 for (j = 0; j < NUM_CHANNELS; j++) { 1946 if (mask & (1 << j)) { 1947 for (i = first; i <= last; i++) { 1948 eval(mach, i, j); 1949 } 1950 } 1951 } 1952 } 1953 } 1954 } 1955} 1956 1957static void 1958exec_instruction( 1959 struct tgsi_exec_machine *mach, 1960 const struct tgsi_full_instruction *inst, 1961 int *pc ) 1962{ 1963 uint chan_index; 1964 union tgsi_exec_channel r[10]; 1965 union tgsi_exec_channel d[8]; 1966 1967 (*pc)++; 1968 1969 switch (inst->Instruction.Opcode) { 1970 case TGSI_OPCODE_ARL: 1971 case TGSI_OPCODE_FLR: 1972 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1973 FETCH( &r[0], 0, chan_index ); 1974 micro_flr(&d[chan_index], &r[0]); 1975 } 1976 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1977 STORE(&d[chan_index], 0, chan_index); 1978 } 1979 break; 1980 1981 case TGSI_OPCODE_MOV: 1982 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1983 FETCH(&d[chan_index], 0, chan_index); 1984 } 1985 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1986 STORE(&d[chan_index], 0, chan_index); 1987 } 1988 break; 1989 1990 case TGSI_OPCODE_LIT: 1991 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1992 FETCH( &r[0], 0, CHAN_X ); 1993 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1994 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 1995 } 1996 1997 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1998 FETCH( &r[1], 0, CHAN_Y ); 1999 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2000 2001 FETCH( &r[2], 0, CHAN_W ); 2002 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2003 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2004 micro_pow( &r[1], &r[1], &r[2] ); 2005 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2006 } 2007 2008 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2009 STORE(&d[CHAN_Y], 0, CHAN_Y); 2010 } 2011 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2012 STORE(&d[CHAN_Z], 0, CHAN_Z); 2013 } 2014 } 2015 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2016 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2017 } 2018 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2019 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2020 } 2021 break; 2022 2023 case TGSI_OPCODE_RCP: 2024 /* TGSI_OPCODE_RECIP */ 2025 FETCH( &r[0], 0, CHAN_X ); 2026 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2027 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2028 STORE( &r[0], 0, chan_index ); 2029 } 2030 break; 2031 2032 case TGSI_OPCODE_RSQ: 2033 /* TGSI_OPCODE_RECIPSQRT */ 2034 FETCH( &r[0], 0, CHAN_X ); 2035 micro_abs( &r[0], &r[0] ); 2036 micro_sqrt( &r[0], &r[0] ); 2037 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2038 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2039 STORE( &r[0], 0, chan_index ); 2040 } 2041 break; 2042 2043 case TGSI_OPCODE_EXP: 2044 FETCH( &r[0], 0, CHAN_X ); 2045 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2046 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2047 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2048 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2049 } 2050 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2051 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2052 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2053 } 2054 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2055 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2056 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2057 } 2058 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2059 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2060 } 2061 break; 2062 2063 case TGSI_OPCODE_LOG: 2064 FETCH( &r[0], 0, CHAN_X ); 2065 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2066 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2067 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2068 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2069 STORE( &r[0], 0, CHAN_X ); 2070 } 2071 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2072 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2073 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2074 STORE( &r[0], 0, CHAN_Y ); 2075 } 2076 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2077 STORE( &r[1], 0, CHAN_Z ); 2078 } 2079 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2080 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2081 } 2082 break; 2083 2084 case TGSI_OPCODE_MUL: 2085 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2086 FETCH(&r[0], 0, chan_index); 2087 FETCH(&r[1], 1, chan_index); 2088 micro_mul(&d[chan_index], &r[0], &r[1]); 2089 } 2090 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2091 STORE(&d[chan_index], 0, chan_index); 2092 } 2093 break; 2094 2095 case TGSI_OPCODE_ADD: 2096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2097 FETCH( &r[0], 0, chan_index ); 2098 FETCH( &r[1], 1, chan_index ); 2099 micro_add(&d[chan_index], &r[0], &r[1]); 2100 } 2101 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2102 STORE(&d[chan_index], 0, chan_index); 2103 } 2104 break; 2105 2106 case TGSI_OPCODE_DP3: 2107 /* TGSI_OPCODE_DOT3 */ 2108 FETCH( &r[0], 0, CHAN_X ); 2109 FETCH( &r[1], 1, CHAN_X ); 2110 micro_mul( &r[0], &r[0], &r[1] ); 2111 2112 FETCH( &r[1], 0, CHAN_Y ); 2113 FETCH( &r[2], 1, CHAN_Y ); 2114 micro_mul( &r[1], &r[1], &r[2] ); 2115 micro_add( &r[0], &r[0], &r[1] ); 2116 2117 FETCH( &r[1], 0, CHAN_Z ); 2118 FETCH( &r[2], 1, CHAN_Z ); 2119 micro_mul( &r[1], &r[1], &r[2] ); 2120 micro_add( &r[0], &r[0], &r[1] ); 2121 2122 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2123 STORE( &r[0], 0, chan_index ); 2124 } 2125 break; 2126 2127 case TGSI_OPCODE_DP4: 2128 /* TGSI_OPCODE_DOT4 */ 2129 FETCH(&r[0], 0, CHAN_X); 2130 FETCH(&r[1], 1, CHAN_X); 2131 2132 micro_mul( &r[0], &r[0], &r[1] ); 2133 2134 FETCH(&r[1], 0, CHAN_Y); 2135 FETCH(&r[2], 1, CHAN_Y); 2136 2137 micro_mul( &r[1], &r[1], &r[2] ); 2138 micro_add( &r[0], &r[0], &r[1] ); 2139 2140 FETCH(&r[1], 0, CHAN_Z); 2141 FETCH(&r[2], 1, CHAN_Z); 2142 2143 micro_mul( &r[1], &r[1], &r[2] ); 2144 micro_add( &r[0], &r[0], &r[1] ); 2145 2146 FETCH(&r[1], 0, CHAN_W); 2147 FETCH(&r[2], 1, CHAN_W); 2148 2149 micro_mul( &r[1], &r[1], &r[2] ); 2150 micro_add( &r[0], &r[0], &r[1] ); 2151 2152 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2153 STORE( &r[0], 0, chan_index ); 2154 } 2155 break; 2156 2157 case TGSI_OPCODE_DST: 2158 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2159 FETCH( &r[0], 0, CHAN_Y ); 2160 FETCH( &r[1], 1, CHAN_Y); 2161 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2162 } 2163 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2164 FETCH(&d[CHAN_Z], 0, CHAN_Z); 2165 } 2166 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2167 FETCH(&d[CHAN_W], 1, CHAN_W); 2168 } 2169 2170 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2171 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); 2172 } 2173 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2174 STORE(&d[CHAN_Y], 0, CHAN_Y); 2175 } 2176 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2177 STORE(&d[CHAN_Z], 0, CHAN_Z); 2178 } 2179 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2180 STORE(&d[CHAN_W], 0, CHAN_W); 2181 } 2182 break; 2183 2184 case TGSI_OPCODE_MIN: 2185 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2186 FETCH(&r[0], 0, chan_index); 2187 FETCH(&r[1], 1, chan_index); 2188 2189 /* XXX use micro_min()?? */ 2190 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); 2191 } 2192 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2193 STORE(&d[chan_index], 0, chan_index); 2194 } 2195 break; 2196 2197 case TGSI_OPCODE_MAX: 2198 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2199 FETCH(&r[0], 0, chan_index); 2200 FETCH(&r[1], 1, chan_index); 2201 2202 /* XXX use micro_max()?? */ 2203 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); 2204 } 2205 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2206 STORE(&d[chan_index], 0, chan_index); 2207 } 2208 break; 2209 2210 case TGSI_OPCODE_SLT: 2211 /* TGSI_OPCODE_SETLT */ 2212 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2213 FETCH( &r[0], 0, chan_index ); 2214 FETCH( &r[1], 1, chan_index ); 2215 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2216 } 2217 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2218 STORE(&d[chan_index], 0, chan_index); 2219 } 2220 break; 2221 2222 case TGSI_OPCODE_SGE: 2223 /* TGSI_OPCODE_SETGE */ 2224 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2225 FETCH( &r[0], 0, chan_index ); 2226 FETCH( &r[1], 1, chan_index ); 2227 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2228 } 2229 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2230 STORE(&d[chan_index], 0, chan_index); 2231 } 2232 break; 2233 2234 case TGSI_OPCODE_MAD: 2235 /* TGSI_OPCODE_MADD */ 2236 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2237 FETCH( &r[0], 0, chan_index ); 2238 FETCH( &r[1], 1, chan_index ); 2239 micro_mul( &r[0], &r[0], &r[1] ); 2240 FETCH( &r[1], 2, chan_index ); 2241 micro_add(&d[chan_index], &r[0], &r[1]); 2242 } 2243 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2244 STORE(&d[chan_index], 0, chan_index); 2245 } 2246 break; 2247 2248 case TGSI_OPCODE_SUB: 2249 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2250 FETCH(&r[0], 0, chan_index); 2251 FETCH(&r[1], 1, chan_index); 2252 micro_sub(&d[chan_index], &r[0], &r[1]); 2253 } 2254 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2255 STORE(&d[chan_index], 0, chan_index); 2256 } 2257 break; 2258 2259 case TGSI_OPCODE_LRP: 2260 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2261 FETCH(&r[0], 0, chan_index); 2262 FETCH(&r[1], 1, chan_index); 2263 FETCH(&r[2], 2, chan_index); 2264 micro_sub( &r[1], &r[1], &r[2] ); 2265 micro_mul( &r[0], &r[0], &r[1] ); 2266 micro_add(&d[chan_index], &r[0], &r[2]); 2267 } 2268 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2269 STORE(&d[chan_index], 0, chan_index); 2270 } 2271 break; 2272 2273 case TGSI_OPCODE_CND: 2274 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2275 FETCH(&r[0], 0, chan_index); 2276 FETCH(&r[1], 1, chan_index); 2277 FETCH(&r[2], 2, chan_index); 2278 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2279 } 2280 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2281 STORE(&d[chan_index], 0, chan_index); 2282 } 2283 break; 2284 2285 case TGSI_OPCODE_DP2A: 2286 FETCH( &r[0], 0, CHAN_X ); 2287 FETCH( &r[1], 1, CHAN_X ); 2288 micro_mul( &r[0], &r[0], &r[1] ); 2289 2290 FETCH( &r[1], 0, CHAN_Y ); 2291 FETCH( &r[2], 1, CHAN_Y ); 2292 micro_mul( &r[1], &r[1], &r[2] ); 2293 micro_add( &r[0], &r[0], &r[1] ); 2294 2295 FETCH( &r[2], 2, CHAN_X ); 2296 micro_add( &r[0], &r[0], &r[2] ); 2297 2298 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2299 STORE( &r[0], 0, chan_index ); 2300 } 2301 break; 2302 2303 case TGSI_OPCODE_FRC: 2304 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2305 FETCH( &r[0], 0, chan_index ); 2306 micro_frc(&d[chan_index], &r[0]); 2307 } 2308 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2309 STORE(&d[chan_index], 0, chan_index); 2310 } 2311 break; 2312 2313 case TGSI_OPCODE_CLAMP: 2314 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2315 FETCH(&r[0], 0, chan_index); 2316 FETCH(&r[1], 1, chan_index); 2317 micro_max(&r[0], &r[0], &r[1]); 2318 FETCH(&r[1], 2, chan_index); 2319 micro_min(&d[chan_index], &r[0], &r[1]); 2320 } 2321 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2322 STORE(&d[chan_index], 0, chan_index); 2323 } 2324 break; 2325 2326 case TGSI_OPCODE_ROUND: 2327 case TGSI_OPCODE_ARR: 2328 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2329 FETCH( &r[0], 0, chan_index ); 2330 micro_rnd(&d[chan_index], &r[0]); 2331 } 2332 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2333 STORE(&d[chan_index], 0, chan_index); 2334 } 2335 break; 2336 2337 case TGSI_OPCODE_EX2: 2338 FETCH(&r[0], 0, CHAN_X); 2339 2340 micro_exp2( &r[0], &r[0] ); 2341 2342 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2343 STORE( &r[0], 0, chan_index ); 2344 } 2345 break; 2346 2347 case TGSI_OPCODE_LG2: 2348 FETCH( &r[0], 0, CHAN_X ); 2349 micro_lg2( &r[0], &r[0] ); 2350 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2351 STORE( &r[0], 0, chan_index ); 2352 } 2353 break; 2354 2355 case TGSI_OPCODE_POW: 2356 FETCH(&r[0], 0, CHAN_X); 2357 FETCH(&r[1], 1, CHAN_X); 2358 2359 micro_pow( &r[0], &r[0], &r[1] ); 2360 2361 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2362 STORE( &r[0], 0, chan_index ); 2363 } 2364 break; 2365 2366 case TGSI_OPCODE_XPD: 2367 FETCH(&r[0], 0, CHAN_Y); 2368 FETCH(&r[1], 1, CHAN_Z); 2369 2370 micro_mul( &r[2], &r[0], &r[1] ); 2371 2372 FETCH(&r[3], 0, CHAN_Z); 2373 FETCH(&r[4], 1, CHAN_Y); 2374 2375 micro_mul( &r[5], &r[3], &r[4] ); 2376 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2377 2378 FETCH(&r[2], 1, CHAN_X); 2379 2380 micro_mul( &r[3], &r[3], &r[2] ); 2381 2382 FETCH(&r[5], 0, CHAN_X); 2383 2384 micro_mul( &r[1], &r[1], &r[5] ); 2385 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2386 2387 micro_mul( &r[5], &r[5], &r[4] ); 2388 micro_mul( &r[0], &r[0], &r[2] ); 2389 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2390 2391 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2392 STORE(&d[CHAN_X], 0, CHAN_X); 2393 } 2394 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2395 STORE(&d[CHAN_Y], 0, CHAN_Y); 2396 } 2397 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2398 STORE(&d[CHAN_Z], 0, CHAN_Z); 2399 } 2400 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2401 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2402 } 2403 break; 2404 2405 case TGSI_OPCODE_ABS: 2406 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2407 FETCH(&r[0], 0, chan_index); 2408 micro_abs(&d[chan_index], &r[0]); 2409 } 2410 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2411 STORE(&d[chan_index], 0, chan_index); 2412 } 2413 break; 2414 2415 case TGSI_OPCODE_RCC: 2416 FETCH(&r[0], 0, CHAN_X); 2417 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2418 micro_float_clamp(&r[0], &r[0]); 2419 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2420 STORE(&r[0], 0, chan_index); 2421 } 2422 break; 2423 2424 case TGSI_OPCODE_DPH: 2425 FETCH(&r[0], 0, CHAN_X); 2426 FETCH(&r[1], 1, CHAN_X); 2427 2428 micro_mul( &r[0], &r[0], &r[1] ); 2429 2430 FETCH(&r[1], 0, CHAN_Y); 2431 FETCH(&r[2], 1, CHAN_Y); 2432 2433 micro_mul( &r[1], &r[1], &r[2] ); 2434 micro_add( &r[0], &r[0], &r[1] ); 2435 2436 FETCH(&r[1], 0, CHAN_Z); 2437 FETCH(&r[2], 1, CHAN_Z); 2438 2439 micro_mul( &r[1], &r[1], &r[2] ); 2440 micro_add( &r[0], &r[0], &r[1] ); 2441 2442 FETCH(&r[1], 1, CHAN_W); 2443 2444 micro_add( &r[0], &r[0], &r[1] ); 2445 2446 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2447 STORE( &r[0], 0, chan_index ); 2448 } 2449 break; 2450 2451 case TGSI_OPCODE_COS: 2452 FETCH(&r[0], 0, CHAN_X); 2453 2454 micro_cos( &r[0], &r[0] ); 2455 2456 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2457 STORE( &r[0], 0, chan_index ); 2458 } 2459 break; 2460 2461 case TGSI_OPCODE_DDX: 2462 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2463 FETCH( &r[0], 0, chan_index ); 2464 micro_ddx(&d[chan_index], &r[0]); 2465 } 2466 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2467 STORE(&d[chan_index], 0, chan_index); 2468 } 2469 break; 2470 2471 case TGSI_OPCODE_DDY: 2472 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2473 FETCH( &r[0], 0, chan_index ); 2474 micro_ddy(&d[chan_index], &r[0]); 2475 } 2476 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2477 STORE(&d[chan_index], 0, chan_index); 2478 } 2479 break; 2480 2481 case TGSI_OPCODE_KILP: 2482 exec_kilp (mach, inst); 2483 break; 2484 2485 case TGSI_OPCODE_KIL: 2486 exec_kil (mach, inst); 2487 break; 2488 2489 case TGSI_OPCODE_PK2H: 2490 assert (0); 2491 break; 2492 2493 case TGSI_OPCODE_PK2US: 2494 assert (0); 2495 break; 2496 2497 case TGSI_OPCODE_PK4B: 2498 assert (0); 2499 break; 2500 2501 case TGSI_OPCODE_PK4UB: 2502 assert (0); 2503 break; 2504 2505 case TGSI_OPCODE_RFL: 2506 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2507 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2508 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2509 /* r0 = dp3(src0, src0) */ 2510 FETCH(&r[2], 0, CHAN_X); 2511 micro_mul(&r[0], &r[2], &r[2]); 2512 FETCH(&r[4], 0, CHAN_Y); 2513 micro_mul(&r[8], &r[4], &r[4]); 2514 micro_add(&r[0], &r[0], &r[8]); 2515 FETCH(&r[6], 0, CHAN_Z); 2516 micro_mul(&r[8], &r[6], &r[6]); 2517 micro_add(&r[0], &r[0], &r[8]); 2518 2519 /* r1 = dp3(src0, src1) */ 2520 FETCH(&r[3], 1, CHAN_X); 2521 micro_mul(&r[1], &r[2], &r[3]); 2522 FETCH(&r[5], 1, CHAN_Y); 2523 micro_mul(&r[8], &r[4], &r[5]); 2524 micro_add(&r[1], &r[1], &r[8]); 2525 FETCH(&r[7], 1, CHAN_Z); 2526 micro_mul(&r[8], &r[6], &r[7]); 2527 micro_add(&r[1], &r[1], &r[8]); 2528 2529 /* r1 = 2 * r1 / r0 */ 2530 micro_add(&r[1], &r[1], &r[1]); 2531 micro_div(&r[1], &r[1], &r[0]); 2532 2533 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2534 micro_mul(&r[2], &r[2], &r[1]); 2535 micro_sub(&r[2], &r[2], &r[3]); 2536 STORE(&r[2], 0, CHAN_X); 2537 } 2538 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2539 micro_mul(&r[4], &r[4], &r[1]); 2540 micro_sub(&r[4], &r[4], &r[5]); 2541 STORE(&r[4], 0, CHAN_Y); 2542 } 2543 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2544 micro_mul(&r[6], &r[6], &r[1]); 2545 micro_sub(&r[6], &r[6], &r[7]); 2546 STORE(&r[6], 0, CHAN_Z); 2547 } 2548 } 2549 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2550 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2551 } 2552 break; 2553 2554 case TGSI_OPCODE_SEQ: 2555 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2556 FETCH( &r[0], 0, chan_index ); 2557 FETCH( &r[1], 1, chan_index ); 2558 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2559 } 2560 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2561 STORE(&d[chan_index], 0, chan_index); 2562 } 2563 break; 2564 2565 case TGSI_OPCODE_SFL: 2566 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2567 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2568 } 2569 break; 2570 2571 case TGSI_OPCODE_SGT: 2572 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2573 FETCH( &r[0], 0, chan_index ); 2574 FETCH( &r[1], 1, chan_index ); 2575 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2576 } 2577 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2578 STORE(&d[chan_index], 0, chan_index); 2579 } 2580 break; 2581 2582 case TGSI_OPCODE_SIN: 2583 FETCH( &r[0], 0, CHAN_X ); 2584 micro_sin( &r[0], &r[0] ); 2585 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2586 STORE( &r[0], 0, chan_index ); 2587 } 2588 break; 2589 2590 case TGSI_OPCODE_SLE: 2591 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2592 FETCH( &r[0], 0, chan_index ); 2593 FETCH( &r[1], 1, chan_index ); 2594 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2595 } 2596 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2597 STORE(&d[chan_index], 0, chan_index); 2598 } 2599 break; 2600 2601 case TGSI_OPCODE_SNE: 2602 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2603 FETCH( &r[0], 0, chan_index ); 2604 FETCH( &r[1], 1, chan_index ); 2605 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2606 } 2607 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2608 STORE(&d[chan_index], 0, chan_index); 2609 } 2610 break; 2611 2612 case TGSI_OPCODE_STR: 2613 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2614 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2615 } 2616 break; 2617 2618 case TGSI_OPCODE_TEX: 2619 /* simple texture lookup */ 2620 /* src[0] = texcoord */ 2621 /* src[1] = sampler unit */ 2622 exec_tex(mach, inst, FALSE, FALSE); 2623 break; 2624 2625 case TGSI_OPCODE_TXB: 2626 /* Texture lookup with lod bias */ 2627 /* src[0] = texcoord (src[0].w = LOD bias) */ 2628 /* src[1] = sampler unit */ 2629 exec_tex(mach, inst, TRUE, FALSE); 2630 break; 2631 2632 case TGSI_OPCODE_TXD: 2633 /* Texture lookup with explict partial derivatives */ 2634 /* src[0] = texcoord */ 2635 /* src[1] = d[strq]/dx */ 2636 /* src[2] = d[strq]/dy */ 2637 /* src[3] = sampler unit */ 2638 exec_txd(mach, inst); 2639 break; 2640 2641 case TGSI_OPCODE_TXL: 2642 /* Texture lookup with explit LOD */ 2643 /* src[0] = texcoord (src[0].w = LOD) */ 2644 /* src[1] = sampler unit */ 2645 exec_tex(mach, inst, TRUE, FALSE); 2646 break; 2647 2648 case TGSI_OPCODE_TXP: 2649 /* Texture lookup with projection */ 2650 /* src[0] = texcoord (src[0].w = projection) */ 2651 /* src[1] = sampler unit */ 2652 exec_tex(mach, inst, FALSE, TRUE); 2653 break; 2654 2655 case TGSI_OPCODE_UP2H: 2656 assert (0); 2657 break; 2658 2659 case TGSI_OPCODE_UP2US: 2660 assert (0); 2661 break; 2662 2663 case TGSI_OPCODE_UP4B: 2664 assert (0); 2665 break; 2666 2667 case TGSI_OPCODE_UP4UB: 2668 assert (0); 2669 break; 2670 2671 case TGSI_OPCODE_X2D: 2672 FETCH(&r[0], 1, CHAN_X); 2673 FETCH(&r[1], 1, CHAN_Y); 2674 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2675 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2676 FETCH(&r[2], 2, CHAN_X); 2677 micro_mul(&r[2], &r[2], &r[0]); 2678 FETCH(&r[3], 2, CHAN_Y); 2679 micro_mul(&r[3], &r[3], &r[1]); 2680 micro_add(&r[2], &r[2], &r[3]); 2681 FETCH(&r[3], 0, CHAN_X); 2682 micro_add(&d[CHAN_X], &r[2], &r[3]); 2683 2684 } 2685 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2686 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2687 FETCH(&r[2], 2, CHAN_Z); 2688 micro_mul(&r[2], &r[2], &r[0]); 2689 FETCH(&r[3], 2, CHAN_W); 2690 micro_mul(&r[3], &r[3], &r[1]); 2691 micro_add(&r[2], &r[2], &r[3]); 2692 FETCH(&r[3], 0, CHAN_Y); 2693 micro_add(&d[CHAN_Y], &r[2], &r[3]); 2694 2695 } 2696 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2697 STORE(&d[CHAN_X], 0, CHAN_X); 2698 } 2699 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2700 STORE(&d[CHAN_Y], 0, CHAN_Y); 2701 } 2702 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2703 STORE(&d[CHAN_X], 0, CHAN_Z); 2704 } 2705 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2706 STORE(&d[CHAN_Y], 0, CHAN_W); 2707 } 2708 break; 2709 2710 case TGSI_OPCODE_ARA: 2711 assert (0); 2712 break; 2713 2714 case TGSI_OPCODE_BRA: 2715 assert (0); 2716 break; 2717 2718 case TGSI_OPCODE_CAL: 2719 /* skip the call if no execution channels are enabled */ 2720 if (mach->ExecMask) { 2721 /* do the call */ 2722 2723 /* First, record the depths of the execution stacks. 2724 * This is important for deeply nested/looped return statements. 2725 * We have to unwind the stacks by the correct amount. For a 2726 * real code generator, we could determine the number of entries 2727 * to pop off each stack with simple static analysis and avoid 2728 * implementing this data structure at run time. 2729 */ 2730 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 2731 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 2732 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 2733 /* note that PC was already incremented above */ 2734 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 2735 2736 mach->CallStackTop++; 2737 2738 /* Second, push the Cond, Loop, Cont, Func stacks */ 2739 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2740 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2741 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2742 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2743 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2744 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2745 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2746 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2747 2748 /* Finally, jump to the subroutine */ 2749 *pc = inst->Label.Label; 2750 } 2751 break; 2752 2753 case TGSI_OPCODE_RET: 2754 mach->FuncMask &= ~mach->ExecMask; 2755 UPDATE_EXEC_MASK(mach); 2756 2757 if (mach->FuncMask == 0x0) { 2758 /* really return now (otherwise, keep executing */ 2759 2760 if (mach->CallStackTop == 0) { 2761 /* returning from main() */ 2762 *pc = -1; 2763 return; 2764 } 2765 2766 assert(mach->CallStackTop > 0); 2767 mach->CallStackTop--; 2768 2769 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 2770 mach->CondMask = mach->CondStack[mach->CondStackTop]; 2771 2772 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 2773 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 2774 2775 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 2776 mach->ContMask = mach->ContStack[mach->ContStackTop]; 2777 2778 assert(mach->FuncStackTop > 0); 2779 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2780 2781 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 2782 2783 UPDATE_EXEC_MASK(mach); 2784 } 2785 break; 2786 2787 case TGSI_OPCODE_SSG: 2788 /* TGSI_OPCODE_SGN */ 2789 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2790 FETCH( &r[0], 0, chan_index ); 2791 micro_sgn(&d[chan_index], &r[0]); 2792 } 2793 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2794 STORE(&d[chan_index], 0, chan_index); 2795 } 2796 break; 2797 2798 case TGSI_OPCODE_CMP: 2799 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2800 FETCH(&r[0], 0, chan_index); 2801 FETCH(&r[1], 1, chan_index); 2802 FETCH(&r[2], 2, chan_index); 2803 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); 2804 } 2805 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2806 STORE(&d[chan_index], 0, chan_index); 2807 } 2808 break; 2809 2810 case TGSI_OPCODE_SCS: 2811 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2812 FETCH( &r[0], 0, CHAN_X ); 2813 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2814 micro_cos(&r[1], &r[0]); 2815 STORE(&r[1], 0, CHAN_X); 2816 } 2817 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2818 micro_sin(&r[1], &r[0]); 2819 STORE(&r[1], 0, CHAN_Y); 2820 } 2821 } 2822 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2823 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2824 } 2825 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2826 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2827 } 2828 break; 2829 2830 case TGSI_OPCODE_NRM: 2831 /* 3-component vector normalize */ 2832 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2833 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2834 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2835 /* r3 = sqrt(dp3(src0, src0)) */ 2836 FETCH(&r[0], 0, CHAN_X); 2837 micro_mul(&r[3], &r[0], &r[0]); 2838 FETCH(&r[1], 0, CHAN_Y); 2839 micro_mul(&r[4], &r[1], &r[1]); 2840 micro_add(&r[3], &r[3], &r[4]); 2841 FETCH(&r[2], 0, CHAN_Z); 2842 micro_mul(&r[4], &r[2], &r[2]); 2843 micro_add(&r[3], &r[3], &r[4]); 2844 micro_sqrt(&r[3], &r[3]); 2845 2846 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2847 micro_div(&r[0], &r[0], &r[3]); 2848 STORE(&r[0], 0, CHAN_X); 2849 } 2850 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2851 micro_div(&r[1], &r[1], &r[3]); 2852 STORE(&r[1], 0, CHAN_Y); 2853 } 2854 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2855 micro_div(&r[2], &r[2], &r[3]); 2856 STORE(&r[2], 0, CHAN_Z); 2857 } 2858 } 2859 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2860 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2861 } 2862 break; 2863 2864 case TGSI_OPCODE_NRM4: 2865 /* 4-component vector normalize */ 2866 { 2867 union tgsi_exec_channel tmp, dot; 2868 2869 /* tmp = dp4(src0, src0): */ 2870 FETCH( &r[0], 0, CHAN_X ); 2871 micro_mul( &tmp, &r[0], &r[0] ); 2872 2873 FETCH( &r[1], 0, CHAN_Y ); 2874 micro_mul( &dot, &r[1], &r[1] ); 2875 micro_add( &tmp, &tmp, &dot ); 2876 2877 FETCH( &r[2], 0, CHAN_Z ); 2878 micro_mul( &dot, &r[2], &r[2] ); 2879 micro_add( &tmp, &tmp, &dot ); 2880 2881 FETCH( &r[3], 0, CHAN_W ); 2882 micro_mul( &dot, &r[3], &r[3] ); 2883 micro_add( &tmp, &tmp, &dot ); 2884 2885 /* tmp = 1 / sqrt(tmp) */ 2886 micro_sqrt( &tmp, &tmp ); 2887 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2888 2889 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2890 /* chan = chan * tmp */ 2891 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2892 STORE( &r[chan_index], 0, chan_index ); 2893 } 2894 } 2895 break; 2896 2897 case TGSI_OPCODE_DIV: 2898 assert( 0 ); 2899 break; 2900 2901 case TGSI_OPCODE_DP2: 2902 FETCH( &r[0], 0, CHAN_X ); 2903 FETCH( &r[1], 1, CHAN_X ); 2904 micro_mul( &r[0], &r[0], &r[1] ); 2905 2906 FETCH( &r[1], 0, CHAN_Y ); 2907 FETCH( &r[2], 1, CHAN_Y ); 2908 micro_mul( &r[1], &r[1], &r[2] ); 2909 micro_add( &r[0], &r[0], &r[1] ); 2910 2911 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2912 STORE( &r[0], 0, chan_index ); 2913 } 2914 break; 2915 2916 case TGSI_OPCODE_IF: 2917 /* push CondMask */ 2918 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2919 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2920 FETCH( &r[0], 0, CHAN_X ); 2921 /* update CondMask */ 2922 if( ! r[0].u[0] ) { 2923 mach->CondMask &= ~0x1; 2924 } 2925 if( ! r[0].u[1] ) { 2926 mach->CondMask &= ~0x2; 2927 } 2928 if( ! r[0].u[2] ) { 2929 mach->CondMask &= ~0x4; 2930 } 2931 if( ! r[0].u[3] ) { 2932 mach->CondMask &= ~0x8; 2933 } 2934 UPDATE_EXEC_MASK(mach); 2935 /* Todo: If CondMask==0, jump to ELSE */ 2936 break; 2937 2938 case TGSI_OPCODE_ELSE: 2939 /* invert CondMask wrt previous mask */ 2940 { 2941 uint prevMask; 2942 assert(mach->CondStackTop > 0); 2943 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2944 mach->CondMask = ~mach->CondMask & prevMask; 2945 UPDATE_EXEC_MASK(mach); 2946 /* Todo: If CondMask==0, jump to ENDIF */ 2947 } 2948 break; 2949 2950 case TGSI_OPCODE_ENDIF: 2951 /* pop CondMask */ 2952 assert(mach->CondStackTop > 0); 2953 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2954 UPDATE_EXEC_MASK(mach); 2955 break; 2956 2957 case TGSI_OPCODE_END: 2958 /* halt execution */ 2959 *pc = -1; 2960 break; 2961 2962 case TGSI_OPCODE_REP: 2963 assert (0); 2964 break; 2965 2966 case TGSI_OPCODE_ENDREP: 2967 assert (0); 2968 break; 2969 2970 case TGSI_OPCODE_PUSHA: 2971 assert (0); 2972 break; 2973 2974 case TGSI_OPCODE_POPA: 2975 assert (0); 2976 break; 2977 2978 case TGSI_OPCODE_CEIL: 2979 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2980 FETCH( &r[0], 0, chan_index ); 2981 micro_ceil(&d[chan_index], &r[0]); 2982 } 2983 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2984 STORE(&d[chan_index], 0, chan_index); 2985 } 2986 break; 2987 2988 case TGSI_OPCODE_I2F: 2989 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2990 FETCH( &r[0], 0, chan_index ); 2991 micro_i2f(&d[chan_index], &r[0]); 2992 } 2993 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2994 STORE(&d[chan_index], 0, chan_index); 2995 } 2996 break; 2997 2998 case TGSI_OPCODE_NOT: 2999 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3000 FETCH( &r[0], 0, chan_index ); 3001 micro_not(&d[chan_index], &r[0]); 3002 } 3003 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3004 STORE(&d[chan_index], 0, chan_index); 3005 } 3006 break; 3007 3008 case TGSI_OPCODE_TRUNC: 3009 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3010 FETCH( &r[0], 0, chan_index ); 3011 micro_trunc(&d[chan_index], &r[0]); 3012 } 3013 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3014 STORE(&d[chan_index], 0, chan_index); 3015 } 3016 break; 3017 3018 case TGSI_OPCODE_SHL: 3019 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3020 FETCH( &r[0], 0, chan_index ); 3021 FETCH( &r[1], 1, chan_index ); 3022 micro_shl(&d[chan_index], &r[0], &r[1]); 3023 } 3024 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3025 STORE(&d[chan_index], 0, chan_index); 3026 } 3027 break; 3028 3029 case TGSI_OPCODE_SHR: 3030 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3031 FETCH( &r[0], 0, chan_index ); 3032 FETCH( &r[1], 1, chan_index ); 3033 micro_ishr(&d[chan_index], &r[0], &r[1]); 3034 } 3035 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3036 STORE(&d[chan_index], 0, chan_index); 3037 } 3038 break; 3039 3040 case TGSI_OPCODE_AND: 3041 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3042 FETCH( &r[0], 0, chan_index ); 3043 FETCH( &r[1], 1, chan_index ); 3044 micro_and(&d[chan_index], &r[0], &r[1]); 3045 } 3046 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3047 STORE(&d[chan_index], 0, chan_index); 3048 } 3049 break; 3050 3051 case TGSI_OPCODE_OR: 3052 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3053 FETCH( &r[0], 0, chan_index ); 3054 FETCH( &r[1], 1, chan_index ); 3055 micro_or(&d[chan_index], &r[0], &r[1]); 3056 } 3057 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3058 STORE(&d[chan_index], 0, chan_index); 3059 } 3060 break; 3061 3062 case TGSI_OPCODE_MOD: 3063 assert (0); 3064 break; 3065 3066 case TGSI_OPCODE_XOR: 3067 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3068 FETCH( &r[0], 0, chan_index ); 3069 FETCH( &r[1], 1, chan_index ); 3070 micro_xor(&d[chan_index], &r[0], &r[1]); 3071 } 3072 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3073 STORE(&d[chan_index], 0, chan_index); 3074 } 3075 break; 3076 3077 case TGSI_OPCODE_SAD: 3078 assert (0); 3079 break; 3080 3081 case TGSI_OPCODE_TXF: 3082 assert (0); 3083 break; 3084 3085 case TGSI_OPCODE_TXQ: 3086 assert (0); 3087 break; 3088 3089 case TGSI_OPCODE_EMIT: 3090 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 3091 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 3092 break; 3093 3094 case TGSI_OPCODE_ENDPRIM: 3095 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 3096 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 3097 break; 3098 3099 case TGSI_OPCODE_BGNFOR: 3100 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3101 for (chan_index = 0; chan_index < 3; chan_index++) { 3102 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3103 } 3104 ++mach->LoopCounterStackTop; 3105 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); 3106 /* update LoopMask */ 3107 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3108 mach->LoopMask &= ~0x1; 3109 } 3110 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3111 mach->LoopMask &= ~0x2; 3112 } 3113 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3114 mach->LoopMask &= ~0x4; 3115 } 3116 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3117 mach->LoopMask &= ~0x8; 3118 } 3119 /* TODO: if mach->LoopMask == 0, jump to end of loop */ 3120 UPDATE_EXEC_MASK(mach); 3121 /* fall-through (for now) */ 3122 case TGSI_OPCODE_BGNLOOP: 3123 /* push LoopMask and ContMasks */ 3124 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3125 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3126 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3127 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3128 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3129 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3130 break; 3131 3132 case TGSI_OPCODE_ENDFOR: 3133 assert(mach->LoopCounterStackTop > 0); 3134 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3135 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3136 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 3137 /* update LoopMask */ 3138 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3139 mach->LoopMask &= ~0x1; 3140 } 3141 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3142 mach->LoopMask &= ~0x2; 3143 } 3144 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3145 mach->LoopMask &= ~0x4; 3146 } 3147 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3148 mach->LoopMask &= ~0x8; 3149 } 3150 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3151 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3152 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3153 assert(mach->LoopLabelStackTop > 0); 3154 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3155 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); 3156 /* Restore ContMask, but don't pop */ 3157 assert(mach->ContStackTop > 0); 3158 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3159 UPDATE_EXEC_MASK(mach); 3160 if (mach->ExecMask) { 3161 /* repeat loop: jump to instruction just past BGNLOOP */ 3162 assert(mach->LoopLabelStackTop > 0); 3163 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3164 } 3165 else { 3166 /* exit loop: pop LoopMask */ 3167 assert(mach->LoopStackTop > 0); 3168 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3169 /* pop ContMask */ 3170 assert(mach->ContStackTop > 0); 3171 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3172 assert(mach->LoopLabelStackTop > 0); 3173 --mach->LoopLabelStackTop; 3174 assert(mach->LoopCounterStackTop > 0); 3175 --mach->LoopCounterStackTop; 3176 } 3177 UPDATE_EXEC_MASK(mach); 3178 break; 3179 3180 case TGSI_OPCODE_ENDLOOP: 3181 /* Restore ContMask, but don't pop */ 3182 assert(mach->ContStackTop > 0); 3183 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3184 UPDATE_EXEC_MASK(mach); 3185 if (mach->ExecMask) { 3186 /* repeat loop: jump to instruction just past BGNLOOP */ 3187 assert(mach->LoopLabelStackTop > 0); 3188 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3189 } 3190 else { 3191 /* exit loop: pop LoopMask */ 3192 assert(mach->LoopStackTop > 0); 3193 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3194 /* pop ContMask */ 3195 assert(mach->ContStackTop > 0); 3196 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3197 assert(mach->LoopLabelStackTop > 0); 3198 --mach->LoopLabelStackTop; 3199 } 3200 UPDATE_EXEC_MASK(mach); 3201 break; 3202 3203 case TGSI_OPCODE_BRK: 3204 /* turn off loop channels for each enabled exec channel */ 3205 mach->LoopMask &= ~mach->ExecMask; 3206 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3207 UPDATE_EXEC_MASK(mach); 3208 break; 3209 3210 case TGSI_OPCODE_CONT: 3211 /* turn off cont channels for each enabled exec channel */ 3212 mach->ContMask &= ~mach->ExecMask; 3213 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3214 UPDATE_EXEC_MASK(mach); 3215 break; 3216 3217 case TGSI_OPCODE_BGNSUB: 3218 /* no-op */ 3219 break; 3220 3221 case TGSI_OPCODE_ENDSUB: 3222 /* 3223 * XXX: This really should be a no-op. We should never reach this opcode. 3224 */ 3225 3226 assert(mach->CallStackTop > 0); 3227 mach->CallStackTop--; 3228 3229 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3230 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3231 3232 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3233 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3234 3235 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3236 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3237 3238 assert(mach->FuncStackTop > 0); 3239 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3240 3241 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3242 3243 UPDATE_EXEC_MASK(mach); 3244 break; 3245 3246 case TGSI_OPCODE_NOP: 3247 break; 3248 3249 default: 3250 assert( 0 ); 3251 } 3252} 3253 3254#define DEBUG_EXECUTION 0 3255 3256 3257/** 3258 * Run TGSI interpreter. 3259 * \return bitmask of "alive" quad components 3260 */ 3261uint 3262tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3263{ 3264 uint i; 3265 int pc = 0; 3266 3267 mach->CondMask = 0xf; 3268 mach->LoopMask = 0xf; 3269 mach->ContMask = 0xf; 3270 mach->FuncMask = 0xf; 3271 mach->ExecMask = 0xf; 3272 3273 assert(mach->CondStackTop == 0); 3274 assert(mach->LoopStackTop == 0); 3275 assert(mach->ContStackTop == 0); 3276 assert(mach->CallStackTop == 0); 3277 3278 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3279 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3280 3281 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3282 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3283 mach->Primitives[0] = 0; 3284 } 3285 3286 for (i = 0; i < QUAD_SIZE; i++) { 3287 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3288 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3289 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3290 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3291 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3292 } 3293 3294 /* execute declarations (interpolants) */ 3295 for (i = 0; i < mach->NumDeclarations; i++) { 3296 exec_declaration( mach, mach->Declarations+i ); 3297 } 3298 3299 { 3300#if DEBUG_EXECUTION 3301 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 3302 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 3303 uint inst = 1; 3304 3305 memcpy(temps, mach->Temps, sizeof(temps)); 3306 memcpy(outputs, mach->Outputs, sizeof(outputs)); 3307#endif 3308 3309 /* execute instructions, until pc is set to -1 */ 3310 while (pc != -1) { 3311 3312#if DEBUG_EXECUTION 3313 uint i; 3314 3315 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 3316#endif 3317 3318 assert(pc < (int) mach->NumInstructions); 3319 exec_instruction(mach, mach->Instructions + pc, &pc); 3320 3321#if DEBUG_EXECUTION 3322 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 3323 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 3324 uint j; 3325 3326 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 3327 debug_printf("TEMP[%2u] = ", i); 3328 for (j = 0; j < 4; j++) { 3329 if (j > 0) { 3330 debug_printf(" "); 3331 } 3332 debug_printf("(%6f, %6f, %6f, %6f)\n", 3333 temps[i].xyzw[0].f[j], 3334 temps[i].xyzw[1].f[j], 3335 temps[i].xyzw[2].f[j], 3336 temps[i].xyzw[3].f[j]); 3337 } 3338 } 3339 } 3340 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 3341 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 3342 uint j; 3343 3344 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 3345 debug_printf("OUT[%2u] = ", i); 3346 for (j = 0; j < 4; j++) { 3347 if (j > 0) { 3348 debug_printf(" "); 3349 } 3350 debug_printf("{%6f, %6f, %6f, %6f}\n", 3351 outputs[i].xyzw[0].f[j], 3352 outputs[i].xyzw[1].f[j], 3353 outputs[i].xyzw[2].f[j], 3354 outputs[i].xyzw[3].f[j]); 3355 } 3356 } 3357 } 3358#endif 3359 } 3360 } 3361 3362#if 0 3363 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3364 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3365 /* 3366 * Scale back depth component. 3367 */ 3368 for (i = 0; i < 4; i++) 3369 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3370 } 3371#endif 3372 3373 assert(mach->CondStackTop == 0); 3374 assert(mach->LoopStackTop == 0); 3375 assert(mach->ContStackTop == 0); 3376 assert(mach->CallStackTop == 0); 3377 3378 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3379} 3380