tgsi_exec.c revision 884007546c98b1779bf266ec5111b1e7e2b68b2e
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65#define TILE_TOP_LEFT 0 66#define TILE_TOP_RIGHT 1 67#define TILE_BOTTOM_LEFT 2 68#define TILE_BOTTOM_RIGHT 3 69 70#define CHAN_X 0 71#define CHAN_Y 1 72#define CHAN_Z 2 73#define CHAN_W 3 74 75/* 76 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 77 */ 78#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 79#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 80#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 81#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 82#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 83#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 84#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 85#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 86#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 87#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 88#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 89#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 90#define TEMP_128_I TGSI_EXEC_TEMP_128_I 91#define TEMP_128_C TGSI_EXEC_TEMP_128_C 92#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 93#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 94#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 95#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 96#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 97#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 98#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 99#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 100#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 101#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 102#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 103#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 104#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 105#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 106#define TEMP_R0 TGSI_EXEC_TEMP_R0 107#define TEMP_P0 TGSI_EXEC_TEMP_P0 108 109#define IS_CHANNEL_ENABLED(INST, CHAN)\ 110 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 111 112#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 113 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 114 115#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 117 if (IS_CHANNEL_ENABLED( INST, CHAN )) 118 119#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 121 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 122 123 124/** The execution mask depends on the conditional mask and the loop mask */ 125#define UPDATE_EXEC_MASK(MACH) \ 126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 127 128 129static const union tgsi_exec_channel ZeroVec = 130 { { 0.0, 0.0, 0.0, 0.0 } }; 131 132 133#ifdef DEBUG 134static void 135check_inf_or_nan(const union tgsi_exec_channel *chan) 136{ 137 assert(!util_is_inf_or_nan(chan->f[0])); 138 assert(!util_is_inf_or_nan(chan->f[1])); 139 assert(!util_is_inf_or_nan(chan->f[2])); 140 assert(!util_is_inf_or_nan(chan->f[3])); 141} 142#endif 143 144 145#ifdef DEBUG 146static void 147print_chan(const char *msg, const union tgsi_exec_channel *chan) 148{ 149 debug_printf("%s = {%f, %f, %f, %f}\n", 150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 151} 152#endif 153 154 155#ifdef DEBUG 156static void 157print_temp(const struct tgsi_exec_machine *mach, uint index) 158{ 159 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 160 int i; 161 debug_printf("Temp[%u] =\n", index); 162 for (i = 0; i < 4; i++) { 163 debug_printf(" %c: { %f, %f, %f, %f }\n", 164 "XYZW"[i], 165 tmp->xyzw[i].f[0], 166 tmp->xyzw[i].f[1], 167 tmp->xyzw[i].f[2], 168 tmp->xyzw[i].f[3]); 169 } 170} 171#endif 172 173 174/** 175 * Check if there's a potential src/dst register data dependency when 176 * using SOA execution. 177 * Example: 178 * MOV T, T.yxwz; 179 * This would expand into: 180 * MOV t0, t1; 181 * MOV t1, t0; 182 * MOV t2, t3; 183 * MOV t3, t2; 184 * The second instruction will have the wrong value for t0 if executed as-is. 185 */ 186boolean 187tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 188{ 189 uint i, chan; 190 191 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 192 if (writemask == TGSI_WRITEMASK_X || 193 writemask == TGSI_WRITEMASK_Y || 194 writemask == TGSI_WRITEMASK_Z || 195 writemask == TGSI_WRITEMASK_W || 196 writemask == TGSI_WRITEMASK_NONE) { 197 /* no chance of data dependency */ 198 return FALSE; 199 } 200 201 /* loop over src regs */ 202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 203 if ((inst->FullSrcRegisters[i].SrcRegister.File == 204 inst->FullDstRegisters[0].DstRegister.File) && 205 (inst->FullSrcRegisters[i].SrcRegister.Index == 206 inst->FullDstRegisters[0].DstRegister.Index)) { 207 /* loop over dest channels */ 208 uint channelsWritten = 0x0; 209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 210 /* check if we're reading a channel that's been written */ 211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); 212 if (channelsWritten & (1 << swizzle)) { 213 return TRUE; 214 } 215 216 channelsWritten |= (1 << chan); 217 } 218 } 219 } 220 return FALSE; 221} 222 223 224/** 225 * Initialize machine state by expanding tokens to full instructions, 226 * allocating temporary storage, setting up constants, etc. 227 * After this, we can call tgsi_exec_machine_run() many times. 228 */ 229void 230tgsi_exec_machine_bind_shader( 231 struct tgsi_exec_machine *mach, 232 const struct tgsi_token *tokens, 233 uint numSamplers, 234 struct tgsi_sampler **samplers) 235{ 236 uint k; 237 struct tgsi_parse_context parse; 238 struct tgsi_exec_labels *labels = &mach->Labels; 239 struct tgsi_full_instruction *instructions; 240 struct tgsi_full_declaration *declarations; 241 uint maxInstructions = 10, numInstructions = 0; 242 uint maxDeclarations = 10, numDeclarations = 0; 243 uint instno = 0; 244 245#if 0 246 tgsi_dump(tokens, 0); 247#endif 248 249 util_init_math(); 250 251 mach->Tokens = tokens; 252 mach->Samplers = samplers; 253 254 k = tgsi_parse_init (&parse, mach->Tokens); 255 if (k != TGSI_PARSE_OK) { 256 debug_printf( "Problem parsing!\n" ); 257 return; 258 } 259 260 mach->Processor = parse.FullHeader.Processor.Processor; 261 mach->ImmLimit = 0; 262 labels->count = 0; 263 264 declarations = (struct tgsi_full_declaration *) 265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 266 267 if (!declarations) { 268 return; 269 } 270 271 instructions = (struct tgsi_full_instruction *) 272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 273 274 if (!instructions) { 275 FREE( declarations ); 276 return; 277 } 278 279 while( !tgsi_parse_end_of_tokens( &parse ) ) { 280 uint pointer = parse.Position; 281 uint i; 282 283 tgsi_parse_token( &parse ); 284 switch( parse.FullToken.Token.Type ) { 285 case TGSI_TOKEN_TYPE_DECLARATION: 286 /* save expanded declaration */ 287 if (numDeclarations == maxDeclarations) { 288 declarations = REALLOC(declarations, 289 maxDeclarations 290 * sizeof(struct tgsi_full_declaration), 291 (maxDeclarations + 10) 292 * sizeof(struct tgsi_full_declaration)); 293 maxDeclarations += 10; 294 } 295 memcpy(declarations + numDeclarations, 296 &parse.FullToken.FullDeclaration, 297 sizeof(declarations[0])); 298 numDeclarations++; 299 break; 300 301 case TGSI_TOKEN_TYPE_IMMEDIATE: 302 { 303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 304 assert( size <= 4 ); 305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 306 307 for( i = 0; i < size; i++ ) { 308 mach->Imms[mach->ImmLimit][i] = 309 parse.FullToken.FullImmediate.u[i].Float; 310 } 311 mach->ImmLimit += 1; 312 } 313 break; 314 315 case TGSI_TOKEN_TYPE_INSTRUCTION: 316 assert( labels->count < MAX_LABELS ); 317 318 labels->labels[labels->count][0] = instno; 319 labels->labels[labels->count][1] = pointer; 320 labels->count++; 321 322 /* save expanded instruction */ 323 if (numInstructions == maxInstructions) { 324 instructions = REALLOC(instructions, 325 maxInstructions 326 * sizeof(struct tgsi_full_instruction), 327 (maxInstructions + 10) 328 * sizeof(struct tgsi_full_instruction)); 329 maxInstructions += 10; 330 } 331 332 memcpy(instructions + numInstructions, 333 &parse.FullToken.FullInstruction, 334 sizeof(instructions[0])); 335 336 numInstructions++; 337 break; 338 339 default: 340 assert( 0 ); 341 } 342 } 343 tgsi_parse_free (&parse); 344 345 if (mach->Declarations) { 346 FREE( mach->Declarations ); 347 } 348 mach->Declarations = declarations; 349 mach->NumDeclarations = numDeclarations; 350 351 if (mach->Instructions) { 352 FREE( mach->Instructions ); 353 } 354 mach->Instructions = instructions; 355 mach->NumInstructions = numInstructions; 356} 357 358 359struct tgsi_exec_machine * 360tgsi_exec_machine_create( void ) 361{ 362 struct tgsi_exec_machine *mach; 363 uint i; 364 365 mach = align_malloc( sizeof *mach, 16 ); 366 if (!mach) 367 goto fail; 368 369 memset(mach, 0, sizeof(*mach)); 370 371 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 372 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 373 374 /* Setup constants. */ 375 for( i = 0; i < 4; i++ ) { 376 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 377 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 378 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 379 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 380 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 381 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 382 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 383 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 384 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 385 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 386 } 387 388#ifdef DEBUG 389 /* silence warnings */ 390 (void) print_chan; 391 (void) print_temp; 392#endif 393 394 return mach; 395 396fail: 397 align_free(mach); 398 return NULL; 399} 400 401 402void 403tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 404{ 405 if (mach) { 406 FREE(mach->Instructions); 407 FREE(mach->Declarations); 408 } 409 410 align_free(mach); 411} 412 413 414static void 415micro_abs( 416 union tgsi_exec_channel *dst, 417 const union tgsi_exec_channel *src ) 418{ 419 dst->f[0] = fabsf( src->f[0] ); 420 dst->f[1] = fabsf( src->f[1] ); 421 dst->f[2] = fabsf( src->f[2] ); 422 dst->f[3] = fabsf( src->f[3] ); 423} 424 425static void 426micro_add( 427 union tgsi_exec_channel *dst, 428 const union tgsi_exec_channel *src0, 429 const union tgsi_exec_channel *src1 ) 430{ 431 dst->f[0] = src0->f[0] + src1->f[0]; 432 dst->f[1] = src0->f[1] + src1->f[1]; 433 dst->f[2] = src0->f[2] + src1->f[2]; 434 dst->f[3] = src0->f[3] + src1->f[3]; 435} 436 437#if 0 438static void 439micro_iadd( 440 union tgsi_exec_channel *dst, 441 const union tgsi_exec_channel *src0, 442 const union tgsi_exec_channel *src1 ) 443{ 444 dst->i[0] = src0->i[0] + src1->i[0]; 445 dst->i[1] = src0->i[1] + src1->i[1]; 446 dst->i[2] = src0->i[2] + src1->i[2]; 447 dst->i[3] = src0->i[3] + src1->i[3]; 448} 449#endif 450 451static void 452micro_and( 453 union tgsi_exec_channel *dst, 454 const union tgsi_exec_channel *src0, 455 const union tgsi_exec_channel *src1 ) 456{ 457 dst->u[0] = src0->u[0] & src1->u[0]; 458 dst->u[1] = src0->u[1] & src1->u[1]; 459 dst->u[2] = src0->u[2] & src1->u[2]; 460 dst->u[3] = src0->u[3] & src1->u[3]; 461} 462 463static void 464micro_ceil( 465 union tgsi_exec_channel *dst, 466 const union tgsi_exec_channel *src ) 467{ 468 dst->f[0] = ceilf( src->f[0] ); 469 dst->f[1] = ceilf( src->f[1] ); 470 dst->f[2] = ceilf( src->f[2] ); 471 dst->f[3] = ceilf( src->f[3] ); 472} 473 474static void 475micro_cos( 476 union tgsi_exec_channel *dst, 477 const union tgsi_exec_channel *src ) 478{ 479 dst->f[0] = cosf( src->f[0] ); 480 dst->f[1] = cosf( src->f[1] ); 481 dst->f[2] = cosf( src->f[2] ); 482 dst->f[3] = cosf( src->f[3] ); 483} 484 485static void 486micro_ddx( 487 union tgsi_exec_channel *dst, 488 const union tgsi_exec_channel *src ) 489{ 490 dst->f[0] = 491 dst->f[1] = 492 dst->f[2] = 493 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 494} 495 496static void 497micro_ddy( 498 union tgsi_exec_channel *dst, 499 const union tgsi_exec_channel *src ) 500{ 501 dst->f[0] = 502 dst->f[1] = 503 dst->f[2] = 504 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 505} 506 507static void 508micro_div( 509 union tgsi_exec_channel *dst, 510 const union tgsi_exec_channel *src0, 511 const union tgsi_exec_channel *src1 ) 512{ 513 if (src1->f[0] != 0) { 514 dst->f[0] = src0->f[0] / src1->f[0]; 515 } 516 if (src1->f[1] != 0) { 517 dst->f[1] = src0->f[1] / src1->f[1]; 518 } 519 if (src1->f[2] != 0) { 520 dst->f[2] = src0->f[2] / src1->f[2]; 521 } 522 if (src1->f[3] != 0) { 523 dst->f[3] = src0->f[3] / src1->f[3]; 524 } 525} 526 527#if 0 528static void 529micro_udiv( 530 union tgsi_exec_channel *dst, 531 const union tgsi_exec_channel *src0, 532 const union tgsi_exec_channel *src1 ) 533{ 534 dst->u[0] = src0->u[0] / src1->u[0]; 535 dst->u[1] = src0->u[1] / src1->u[1]; 536 dst->u[2] = src0->u[2] / src1->u[2]; 537 dst->u[3] = src0->u[3] / src1->u[3]; 538} 539#endif 540 541static void 542micro_eq( 543 union tgsi_exec_channel *dst, 544 const union tgsi_exec_channel *src0, 545 const union tgsi_exec_channel *src1, 546 const union tgsi_exec_channel *src2, 547 const union tgsi_exec_channel *src3 ) 548{ 549 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 550 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 551 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 552 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 553} 554 555#if 0 556static void 557micro_ieq( 558 union tgsi_exec_channel *dst, 559 const union tgsi_exec_channel *src0, 560 const union tgsi_exec_channel *src1, 561 const union tgsi_exec_channel *src2, 562 const union tgsi_exec_channel *src3 ) 563{ 564 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 565 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 566 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 567 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 568} 569#endif 570 571static void 572micro_exp2( 573 union tgsi_exec_channel *dst, 574 const union tgsi_exec_channel *src) 575{ 576#if FAST_MATH 577 dst->f[0] = util_fast_exp2( src->f[0] ); 578 dst->f[1] = util_fast_exp2( src->f[1] ); 579 dst->f[2] = util_fast_exp2( src->f[2] ); 580 dst->f[3] = util_fast_exp2( src->f[3] ); 581#else 582 583#if DEBUG 584 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 585 uint i; 586 union tgsi_exec_channel clamped; 587 588 for (i = 0; i < 4; i++) { 589 if (src->f[i] > 127.99999f) { 590 clamped.f[i] = 127.99999f; 591 } else if (src->f[i] < -126.99999f) { 592 clamped.f[i] = -126.99999f; 593 } else { 594 clamped.f[i] = src->f[i]; 595 } 596 } 597 src = &clamped; 598#endif 599 600 dst->f[0] = powf( 2.0f, src->f[0] ); 601 dst->f[1] = powf( 2.0f, src->f[1] ); 602 dst->f[2] = powf( 2.0f, src->f[2] ); 603 dst->f[3] = powf( 2.0f, src->f[3] ); 604#endif 605} 606 607#if 0 608static void 609micro_f2ut( 610 union tgsi_exec_channel *dst, 611 const union tgsi_exec_channel *src ) 612{ 613 dst->u[0] = (uint) src->f[0]; 614 dst->u[1] = (uint) src->f[1]; 615 dst->u[2] = (uint) src->f[2]; 616 dst->u[3] = (uint) src->f[3]; 617} 618#endif 619 620static void 621micro_float_clamp(union tgsi_exec_channel *dst, 622 const union tgsi_exec_channel *src) 623{ 624 uint i; 625 626 for (i = 0; i < 4; i++) { 627 if (src->f[i] > 0.0f) { 628 if (src->f[i] > 1.884467e+019f) 629 dst->f[i] = 1.884467e+019f; 630 else if (src->f[i] < 5.42101e-020f) 631 dst->f[i] = 5.42101e-020f; 632 else 633 dst->f[i] = src->f[i]; 634 } 635 else { 636 if (src->f[i] < -1.884467e+019f) 637 dst->f[i] = -1.884467e+019f; 638 else if (src->f[i] > -5.42101e-020f) 639 dst->f[i] = -5.42101e-020f; 640 else 641 dst->f[i] = src->f[i]; 642 } 643 } 644} 645 646static void 647micro_flr( 648 union tgsi_exec_channel *dst, 649 const union tgsi_exec_channel *src ) 650{ 651 dst->f[0] = floorf( src->f[0] ); 652 dst->f[1] = floorf( src->f[1] ); 653 dst->f[2] = floorf( src->f[2] ); 654 dst->f[3] = floorf( src->f[3] ); 655} 656 657static void 658micro_frc( 659 union tgsi_exec_channel *dst, 660 const union tgsi_exec_channel *src ) 661{ 662 dst->f[0] = src->f[0] - floorf( src->f[0] ); 663 dst->f[1] = src->f[1] - floorf( src->f[1] ); 664 dst->f[2] = src->f[2] - floorf( src->f[2] ); 665 dst->f[3] = src->f[3] - floorf( src->f[3] ); 666} 667 668static void 669micro_i2f( 670 union tgsi_exec_channel *dst, 671 const union tgsi_exec_channel *src ) 672{ 673 dst->f[0] = (float) src->i[0]; 674 dst->f[1] = (float) src->i[1]; 675 dst->f[2] = (float) src->i[2]; 676 dst->f[3] = (float) src->i[3]; 677} 678 679static void 680micro_lg2( 681 union tgsi_exec_channel *dst, 682 const union tgsi_exec_channel *src ) 683{ 684#if FAST_MATH 685 dst->f[0] = util_fast_log2( src->f[0] ); 686 dst->f[1] = util_fast_log2( src->f[1] ); 687 dst->f[2] = util_fast_log2( src->f[2] ); 688 dst->f[3] = util_fast_log2( src->f[3] ); 689#else 690 dst->f[0] = logf( src->f[0] ) * 1.442695f; 691 dst->f[1] = logf( src->f[1] ) * 1.442695f; 692 dst->f[2] = logf( src->f[2] ) * 1.442695f; 693 dst->f[3] = logf( src->f[3] ) * 1.442695f; 694#endif 695} 696 697static void 698micro_le( 699 union tgsi_exec_channel *dst, 700 const union tgsi_exec_channel *src0, 701 const union tgsi_exec_channel *src1, 702 const union tgsi_exec_channel *src2, 703 const union tgsi_exec_channel *src3 ) 704{ 705 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 706 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 707 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 708 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 709} 710 711static void 712micro_lt( 713 union tgsi_exec_channel *dst, 714 const union tgsi_exec_channel *src0, 715 const union tgsi_exec_channel *src1, 716 const union tgsi_exec_channel *src2, 717 const union tgsi_exec_channel *src3 ) 718{ 719 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 720 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 721 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 722 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 723} 724 725#if 0 726static void 727micro_ilt( 728 union tgsi_exec_channel *dst, 729 const union tgsi_exec_channel *src0, 730 const union tgsi_exec_channel *src1, 731 const union tgsi_exec_channel *src2, 732 const union tgsi_exec_channel *src3 ) 733{ 734 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 735 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 736 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 737 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 738} 739#endif 740 741#if 0 742static void 743micro_ult( 744 union tgsi_exec_channel *dst, 745 const union tgsi_exec_channel *src0, 746 const union tgsi_exec_channel *src1, 747 const union tgsi_exec_channel *src2, 748 const union tgsi_exec_channel *src3 ) 749{ 750 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 751 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 752 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 753 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 754} 755#endif 756 757static void 758micro_max( 759 union tgsi_exec_channel *dst, 760 const union tgsi_exec_channel *src0, 761 const union tgsi_exec_channel *src1 ) 762{ 763 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 764 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 765 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 766 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 767} 768 769#if 0 770static void 771micro_imax( 772 union tgsi_exec_channel *dst, 773 const union tgsi_exec_channel *src0, 774 const union tgsi_exec_channel *src1 ) 775{ 776 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 777 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 778 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 779 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 780} 781#endif 782 783#if 0 784static void 785micro_umax( 786 union tgsi_exec_channel *dst, 787 const union tgsi_exec_channel *src0, 788 const union tgsi_exec_channel *src1 ) 789{ 790 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 791 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 792 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 793 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 794} 795#endif 796 797static void 798micro_min( 799 union tgsi_exec_channel *dst, 800 const union tgsi_exec_channel *src0, 801 const union tgsi_exec_channel *src1 ) 802{ 803 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 804 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 805 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 806 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 807} 808 809#if 0 810static void 811micro_imin( 812 union tgsi_exec_channel *dst, 813 const union tgsi_exec_channel *src0, 814 const union tgsi_exec_channel *src1 ) 815{ 816 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 817 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 818 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 819 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 820} 821#endif 822 823#if 0 824static void 825micro_umin( 826 union tgsi_exec_channel *dst, 827 const union tgsi_exec_channel *src0, 828 const union tgsi_exec_channel *src1 ) 829{ 830 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 831 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 832 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 833 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 834} 835#endif 836 837#if 0 838static void 839micro_umod( 840 union tgsi_exec_channel *dst, 841 const union tgsi_exec_channel *src0, 842 const union tgsi_exec_channel *src1 ) 843{ 844 dst->u[0] = src0->u[0] % src1->u[0]; 845 dst->u[1] = src0->u[1] % src1->u[1]; 846 dst->u[2] = src0->u[2] % src1->u[2]; 847 dst->u[3] = src0->u[3] % src1->u[3]; 848} 849#endif 850 851static void 852micro_mul( 853 union tgsi_exec_channel *dst, 854 const union tgsi_exec_channel *src0, 855 const union tgsi_exec_channel *src1 ) 856{ 857 dst->f[0] = src0->f[0] * src1->f[0]; 858 dst->f[1] = src0->f[1] * src1->f[1]; 859 dst->f[2] = src0->f[2] * src1->f[2]; 860 dst->f[3] = src0->f[3] * src1->f[3]; 861} 862 863#if 0 864static void 865micro_imul( 866 union tgsi_exec_channel *dst, 867 const union tgsi_exec_channel *src0, 868 const union tgsi_exec_channel *src1 ) 869{ 870 dst->i[0] = src0->i[0] * src1->i[0]; 871 dst->i[1] = src0->i[1] * src1->i[1]; 872 dst->i[2] = src0->i[2] * src1->i[2]; 873 dst->i[3] = src0->i[3] * src1->i[3]; 874} 875#endif 876 877#if 0 878static void 879micro_imul64( 880 union tgsi_exec_channel *dst0, 881 union tgsi_exec_channel *dst1, 882 const union tgsi_exec_channel *src0, 883 const union tgsi_exec_channel *src1 ) 884{ 885 dst1->i[0] = src0->i[0] * src1->i[0]; 886 dst1->i[1] = src0->i[1] * src1->i[1]; 887 dst1->i[2] = src0->i[2] * src1->i[2]; 888 dst1->i[3] = src0->i[3] * src1->i[3]; 889 dst0->i[0] = 0; 890 dst0->i[1] = 0; 891 dst0->i[2] = 0; 892 dst0->i[3] = 0; 893} 894#endif 895 896#if 0 897static void 898micro_umul64( 899 union tgsi_exec_channel *dst0, 900 union tgsi_exec_channel *dst1, 901 const union tgsi_exec_channel *src0, 902 const union tgsi_exec_channel *src1 ) 903{ 904 dst1->u[0] = src0->u[0] * src1->u[0]; 905 dst1->u[1] = src0->u[1] * src1->u[1]; 906 dst1->u[2] = src0->u[2] * src1->u[2]; 907 dst1->u[3] = src0->u[3] * src1->u[3]; 908 dst0->u[0] = 0; 909 dst0->u[1] = 0; 910 dst0->u[2] = 0; 911 dst0->u[3] = 0; 912} 913#endif 914 915 916#if 0 917static void 918micro_movc( 919 union tgsi_exec_channel *dst, 920 const union tgsi_exec_channel *src0, 921 const union tgsi_exec_channel *src1, 922 const union tgsi_exec_channel *src2 ) 923{ 924 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 925 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 926 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 927 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 928} 929#endif 930 931static void 932micro_neg( 933 union tgsi_exec_channel *dst, 934 const union tgsi_exec_channel *src ) 935{ 936 dst->f[0] = -src->f[0]; 937 dst->f[1] = -src->f[1]; 938 dst->f[2] = -src->f[2]; 939 dst->f[3] = -src->f[3]; 940} 941 942#if 0 943static void 944micro_ineg( 945 union tgsi_exec_channel *dst, 946 const union tgsi_exec_channel *src ) 947{ 948 dst->i[0] = -src->i[0]; 949 dst->i[1] = -src->i[1]; 950 dst->i[2] = -src->i[2]; 951 dst->i[3] = -src->i[3]; 952} 953#endif 954 955static void 956micro_not( 957 union tgsi_exec_channel *dst, 958 const union tgsi_exec_channel *src ) 959{ 960 dst->u[0] = ~src->u[0]; 961 dst->u[1] = ~src->u[1]; 962 dst->u[2] = ~src->u[2]; 963 dst->u[3] = ~src->u[3]; 964} 965 966static void 967micro_or( 968 union tgsi_exec_channel *dst, 969 const union tgsi_exec_channel *src0, 970 const union tgsi_exec_channel *src1 ) 971{ 972 dst->u[0] = src0->u[0] | src1->u[0]; 973 dst->u[1] = src0->u[1] | src1->u[1]; 974 dst->u[2] = src0->u[2] | src1->u[2]; 975 dst->u[3] = src0->u[3] | src1->u[3]; 976} 977 978static void 979micro_pow( 980 union tgsi_exec_channel *dst, 981 const union tgsi_exec_channel *src0, 982 const union tgsi_exec_channel *src1 ) 983{ 984#if FAST_MATH 985 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 986 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 987 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 988 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 989#else 990 dst->f[0] = powf( src0->f[0], src1->f[0] ); 991 dst->f[1] = powf( src0->f[1], src1->f[1] ); 992 dst->f[2] = powf( src0->f[2], src1->f[2] ); 993 dst->f[3] = powf( src0->f[3], src1->f[3] ); 994#endif 995} 996 997static void 998micro_rnd( 999 union tgsi_exec_channel *dst, 1000 const union tgsi_exec_channel *src ) 1001{ 1002 dst->f[0] = floorf( src->f[0] + 0.5f ); 1003 dst->f[1] = floorf( src->f[1] + 0.5f ); 1004 dst->f[2] = floorf( src->f[2] + 0.5f ); 1005 dst->f[3] = floorf( src->f[3] + 0.5f ); 1006} 1007 1008static void 1009micro_sgn( 1010 union tgsi_exec_channel *dst, 1011 const union tgsi_exec_channel *src ) 1012{ 1013 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 1014 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 1015 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 1016 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1017} 1018 1019static void 1020micro_shl( 1021 union tgsi_exec_channel *dst, 1022 const union tgsi_exec_channel *src0, 1023 const union tgsi_exec_channel *src1 ) 1024{ 1025 dst->i[0] = src0->i[0] << src1->i[0]; 1026 dst->i[1] = src0->i[1] << src1->i[1]; 1027 dst->i[2] = src0->i[2] << src1->i[2]; 1028 dst->i[3] = src0->i[3] << src1->i[3]; 1029} 1030 1031static void 1032micro_ishr( 1033 union tgsi_exec_channel *dst, 1034 const union tgsi_exec_channel *src0, 1035 const union tgsi_exec_channel *src1 ) 1036{ 1037 dst->i[0] = src0->i[0] >> src1->i[0]; 1038 dst->i[1] = src0->i[1] >> src1->i[1]; 1039 dst->i[2] = src0->i[2] >> src1->i[2]; 1040 dst->i[3] = src0->i[3] >> src1->i[3]; 1041} 1042 1043static void 1044micro_trunc( 1045 union tgsi_exec_channel *dst, 1046 const union tgsi_exec_channel *src0 ) 1047{ 1048 dst->f[0] = (float) (int) src0->f[0]; 1049 dst->f[1] = (float) (int) src0->f[1]; 1050 dst->f[2] = (float) (int) src0->f[2]; 1051 dst->f[3] = (float) (int) src0->f[3]; 1052} 1053 1054#if 0 1055static void 1056micro_ushr( 1057 union tgsi_exec_channel *dst, 1058 const union tgsi_exec_channel *src0, 1059 const union tgsi_exec_channel *src1 ) 1060{ 1061 dst->u[0] = src0->u[0] >> src1->u[0]; 1062 dst->u[1] = src0->u[1] >> src1->u[1]; 1063 dst->u[2] = src0->u[2] >> src1->u[2]; 1064 dst->u[3] = src0->u[3] >> src1->u[3]; 1065} 1066#endif 1067 1068static void 1069micro_sin( 1070 union tgsi_exec_channel *dst, 1071 const union tgsi_exec_channel *src ) 1072{ 1073 dst->f[0] = sinf( src->f[0] ); 1074 dst->f[1] = sinf( src->f[1] ); 1075 dst->f[2] = sinf( src->f[2] ); 1076 dst->f[3] = sinf( src->f[3] ); 1077} 1078 1079static void 1080micro_sqrt( union tgsi_exec_channel *dst, 1081 const union tgsi_exec_channel *src ) 1082{ 1083 dst->f[0] = sqrtf( src->f[0] ); 1084 dst->f[1] = sqrtf( src->f[1] ); 1085 dst->f[2] = sqrtf( src->f[2] ); 1086 dst->f[3] = sqrtf( src->f[3] ); 1087} 1088 1089static void 1090micro_sub( 1091 union tgsi_exec_channel *dst, 1092 const union tgsi_exec_channel *src0, 1093 const union tgsi_exec_channel *src1 ) 1094{ 1095 dst->f[0] = src0->f[0] - src1->f[0]; 1096 dst->f[1] = src0->f[1] - src1->f[1]; 1097 dst->f[2] = src0->f[2] - src1->f[2]; 1098 dst->f[3] = src0->f[3] - src1->f[3]; 1099} 1100 1101#if 0 1102static void 1103micro_u2f( 1104 union tgsi_exec_channel *dst, 1105 const union tgsi_exec_channel *src ) 1106{ 1107 dst->f[0] = (float) src->u[0]; 1108 dst->f[1] = (float) src->u[1]; 1109 dst->f[2] = (float) src->u[2]; 1110 dst->f[3] = (float) src->u[3]; 1111} 1112#endif 1113 1114static void 1115micro_xor( 1116 union tgsi_exec_channel *dst, 1117 const union tgsi_exec_channel *src0, 1118 const union tgsi_exec_channel *src1 ) 1119{ 1120 dst->u[0] = src0->u[0] ^ src1->u[0]; 1121 dst->u[1] = src0->u[1] ^ src1->u[1]; 1122 dst->u[2] = src0->u[2] ^ src1->u[2]; 1123 dst->u[3] = src0->u[3] ^ src1->u[3]; 1124} 1125 1126static void 1127fetch_src_file_channel( 1128 const struct tgsi_exec_machine *mach, 1129 const uint file, 1130 const uint swizzle, 1131 const union tgsi_exec_channel *index, 1132 union tgsi_exec_channel *chan ) 1133{ 1134 switch( swizzle ) { 1135 case TGSI_SWIZZLE_X: 1136 case TGSI_SWIZZLE_Y: 1137 case TGSI_SWIZZLE_Z: 1138 case TGSI_SWIZZLE_W: 1139 switch( file ) { 1140 case TGSI_FILE_CONSTANT: 1141 assert(mach->Consts); 1142 if (index->i[0] < 0) 1143 chan->f[0] = 0.0f; 1144 else 1145 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1146 if (index->i[1] < 0) 1147 chan->f[1] = 0.0f; 1148 else 1149 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1150 if (index->i[2] < 0) 1151 chan->f[2] = 0.0f; 1152 else 1153 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1154 if (index->i[3] < 0) 1155 chan->f[3] = 0.0f; 1156 else 1157 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1158 break; 1159 1160 case TGSI_FILE_INPUT: 1161 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1162 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1163 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1164 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1165 break; 1166 1167 case TGSI_FILE_TEMPORARY: 1168 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1169 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1170 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1171 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1172 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1173 break; 1174 1175 case TGSI_FILE_IMMEDIATE: 1176 assert( index->i[0] < (int) mach->ImmLimit ); 1177 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1178 assert( index->i[1] < (int) mach->ImmLimit ); 1179 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1180 assert( index->i[2] < (int) mach->ImmLimit ); 1181 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1182 assert( index->i[3] < (int) mach->ImmLimit ); 1183 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1184 break; 1185 1186 case TGSI_FILE_ADDRESS: 1187 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1188 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1189 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1190 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1191 break; 1192 1193 case TGSI_FILE_PREDICATE: 1194 assert(index->i[0] < TGSI_EXEC_NUM_PREDS); 1195 assert(index->i[1] < TGSI_EXEC_NUM_PREDS); 1196 assert(index->i[2] < TGSI_EXEC_NUM_PREDS); 1197 assert(index->i[3] < TGSI_EXEC_NUM_PREDS); 1198 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; 1199 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; 1200 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; 1201 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; 1202 break; 1203 1204 case TGSI_FILE_OUTPUT: 1205 /* vertex/fragment output vars can be read too */ 1206 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1207 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1208 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1209 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1210 break; 1211 1212 default: 1213 assert( 0 ); 1214 } 1215 break; 1216 1217 default: 1218 assert( 0 ); 1219 } 1220} 1221 1222static void 1223fetch_source( 1224 const struct tgsi_exec_machine *mach, 1225 union tgsi_exec_channel *chan, 1226 const struct tgsi_full_src_register *reg, 1227 const uint chan_index ) 1228{ 1229 union tgsi_exec_channel index; 1230 uint swizzle; 1231 1232 /* We start with a direct index into a register file. 1233 * 1234 * file[1], 1235 * where: 1236 * file = SrcRegister.File 1237 * [1] = SrcRegister.Index 1238 */ 1239 index.i[0] = 1240 index.i[1] = 1241 index.i[2] = 1242 index.i[3] = reg->SrcRegister.Index; 1243 1244 /* There is an extra source register that indirectly subscripts 1245 * a register file. The direct index now becomes an offset 1246 * that is being added to the indirect register. 1247 * 1248 * file[ind[2].x+1], 1249 * where: 1250 * ind = SrcRegisterInd.File 1251 * [2] = SrcRegisterInd.Index 1252 * .x = SrcRegisterInd.SwizzleX 1253 */ 1254 if (reg->SrcRegister.Indirect) { 1255 union tgsi_exec_channel index2; 1256 union tgsi_exec_channel indir_index; 1257 const uint execmask = mach->ExecMask; 1258 uint i; 1259 1260 /* which address register (always zero now) */ 1261 index2.i[0] = 1262 index2.i[1] = 1263 index2.i[2] = 1264 index2.i[3] = reg->SrcRegisterInd.Index; 1265 1266 /* get current value of address register[swizzle] */ 1267 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1268 fetch_src_file_channel( 1269 mach, 1270 reg->SrcRegisterInd.File, 1271 swizzle, 1272 &index2, 1273 &indir_index ); 1274 1275 /* add value of address register to the offset */ 1276 index.i[0] += (int) indir_index.f[0]; 1277 index.i[1] += (int) indir_index.f[1]; 1278 index.i[2] += (int) indir_index.f[2]; 1279 index.i[3] += (int) indir_index.f[3]; 1280 1281 /* for disabled execution channels, zero-out the index to 1282 * avoid using a potential garbage value. 1283 */ 1284 for (i = 0; i < QUAD_SIZE; i++) { 1285 if ((execmask & (1 << i)) == 0) 1286 index.i[i] = 0; 1287 } 1288 } 1289 1290 /* There is an extra source register that is a second 1291 * subscript to a register file. Effectively it means that 1292 * the register file is actually a 2D array of registers. 1293 * 1294 * file[1][3] == file[1*sizeof(file[1])+3], 1295 * where: 1296 * [3] = SrcRegisterDim.Index 1297 */ 1298 if (reg->SrcRegister.Dimension) { 1299 /* The size of the first-order array depends on the register file type. 1300 * We need to multiply the index to the first array to get an effective, 1301 * "flat" index that points to the beginning of the second-order array. 1302 */ 1303 switch (reg->SrcRegister.File) { 1304 case TGSI_FILE_INPUT: 1305 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1306 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1307 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1308 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1309 break; 1310 case TGSI_FILE_CONSTANT: 1311 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1312 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1313 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1314 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1315 break; 1316 default: 1317 assert( 0 ); 1318 } 1319 1320 index.i[0] += reg->SrcRegisterDim.Index; 1321 index.i[1] += reg->SrcRegisterDim.Index; 1322 index.i[2] += reg->SrcRegisterDim.Index; 1323 index.i[3] += reg->SrcRegisterDim.Index; 1324 1325 /* Again, the second subscript index can be addressed indirectly 1326 * identically to the first one. 1327 * Nothing stops us from indirectly addressing the indirect register, 1328 * but there is no need for that, so we won't exercise it. 1329 * 1330 * file[1][ind[4].y+3], 1331 * where: 1332 * ind = SrcRegisterDimInd.File 1333 * [4] = SrcRegisterDimInd.Index 1334 * .y = SrcRegisterDimInd.SwizzleX 1335 */ 1336 if (reg->SrcRegisterDim.Indirect) { 1337 union tgsi_exec_channel index2; 1338 union tgsi_exec_channel indir_index; 1339 const uint execmask = mach->ExecMask; 1340 uint i; 1341 1342 index2.i[0] = 1343 index2.i[1] = 1344 index2.i[2] = 1345 index2.i[3] = reg->SrcRegisterDimInd.Index; 1346 1347 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1348 fetch_src_file_channel( 1349 mach, 1350 reg->SrcRegisterDimInd.File, 1351 swizzle, 1352 &index2, 1353 &indir_index ); 1354 1355 index.i[0] += (int) indir_index.f[0]; 1356 index.i[1] += (int) indir_index.f[1]; 1357 index.i[2] += (int) indir_index.f[2]; 1358 index.i[3] += (int) indir_index.f[3]; 1359 1360 /* for disabled execution channels, zero-out the index to 1361 * avoid using a potential garbage value. 1362 */ 1363 for (i = 0; i < QUAD_SIZE; i++) { 1364 if ((execmask & (1 << i)) == 0) 1365 index.i[i] = 0; 1366 } 1367 } 1368 1369 /* If by any chance there was a need for a 3D array of register 1370 * files, we would have to check whether SrcRegisterDim is followed 1371 * by a dimension register and continue the saga. 1372 */ 1373 } 1374 1375 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1376 fetch_src_file_channel( 1377 mach, 1378 reg->SrcRegister.File, 1379 swizzle, 1380 &index, 1381 chan ); 1382 1383 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1384 case TGSI_UTIL_SIGN_CLEAR: 1385 micro_abs( chan, chan ); 1386 break; 1387 1388 case TGSI_UTIL_SIGN_SET: 1389 micro_abs( chan, chan ); 1390 micro_neg( chan, chan ); 1391 break; 1392 1393 case TGSI_UTIL_SIGN_TOGGLE: 1394 micro_neg( chan, chan ); 1395 break; 1396 1397 case TGSI_UTIL_SIGN_KEEP: 1398 break; 1399 } 1400 1401 if (reg->SrcRegisterExtMod.Complement) { 1402 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1403 } 1404} 1405 1406static void 1407store_dest( 1408 struct tgsi_exec_machine *mach, 1409 const union tgsi_exec_channel *chan, 1410 const struct tgsi_full_dst_register *reg, 1411 const struct tgsi_full_instruction *inst, 1412 uint chan_index ) 1413{ 1414 uint i; 1415 union tgsi_exec_channel null; 1416 union tgsi_exec_channel *dst; 1417 uint execmask = mach->ExecMask; 1418 int offset = 0; /* indirection offset */ 1419 int index; 1420 1421#ifdef DEBUG 1422 check_inf_or_nan(chan); 1423#endif 1424 1425 /* There is an extra source register that indirectly subscripts 1426 * a register file. The direct index now becomes an offset 1427 * that is being added to the indirect register. 1428 * 1429 * file[ind[2].x+1], 1430 * where: 1431 * ind = DstRegisterInd.File 1432 * [2] = DstRegisterInd.Index 1433 * .x = DstRegisterInd.SwizzleX 1434 */ 1435 if (reg->DstRegister.Indirect) { 1436 union tgsi_exec_channel index; 1437 union tgsi_exec_channel indir_index; 1438 uint swizzle; 1439 1440 /* which address register (always zero for now) */ 1441 index.i[0] = 1442 index.i[1] = 1443 index.i[2] = 1444 index.i[3] = reg->DstRegisterInd.Index; 1445 1446 /* get current value of address register[swizzle] */ 1447 swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); 1448 1449 /* fetch values from the address/indirection register */ 1450 fetch_src_file_channel( 1451 mach, 1452 reg->DstRegisterInd.File, 1453 swizzle, 1454 &index, 1455 &indir_index ); 1456 1457 /* save indirection offset */ 1458 offset = (int) indir_index.f[0]; 1459 } 1460 1461 switch (reg->DstRegister.File) { 1462 case TGSI_FILE_NULL: 1463 dst = &null; 1464 break; 1465 1466 case TGSI_FILE_OUTPUT: 1467 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1468 + reg->DstRegister.Index; 1469 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1470 break; 1471 1472 case TGSI_FILE_TEMPORARY: 1473 index = reg->DstRegister.Index; 1474 assert( index < TGSI_EXEC_NUM_TEMPS ); 1475 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1476 break; 1477 1478 case TGSI_FILE_ADDRESS: 1479 index = reg->DstRegister.Index; 1480 dst = &mach->Addrs[index].xyzw[chan_index]; 1481 break; 1482 1483 case TGSI_FILE_LOOP: 1484 assert(reg->DstRegister.Index == 0); 1485 assert(mach->LoopCounterStackTop > 0); 1486 assert(chan_index == CHAN_X); 1487 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; 1488 break; 1489 1490 case TGSI_FILE_PREDICATE: 1491 index = reg->DstRegister.Index; 1492 assert(index < TGSI_EXEC_NUM_PREDS); 1493 dst = &mach->Predicates[index].xyzw[chan_index]; 1494 break; 1495 1496 default: 1497 assert( 0 ); 1498 return; 1499 } 1500 1501 if (inst->Instruction.Predicate) { 1502 uint swizzle; 1503 union tgsi_exec_channel *pred; 1504 1505 switch (chan_index) { 1506 case CHAN_X: 1507 swizzle = inst->InstructionPredicate.SwizzleX; 1508 break; 1509 case CHAN_Y: 1510 swizzle = inst->InstructionPredicate.SwizzleY; 1511 break; 1512 case CHAN_Z: 1513 swizzle = inst->InstructionPredicate.SwizzleZ; 1514 break; 1515 case CHAN_W: 1516 swizzle = inst->InstructionPredicate.SwizzleW; 1517 break; 1518 default: 1519 assert(0); 1520 return; 1521 } 1522 1523 assert(inst->InstructionPredicate.Index == 0); 1524 1525 pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle]; 1526 1527 if (inst->InstructionPredicate.Negate) { 1528 for (i = 0; i < QUAD_SIZE; i++) { 1529 if (pred->u[i]) { 1530 execmask &= ~(1 << i); 1531 } 1532 } 1533 } else { 1534 for (i = 0; i < QUAD_SIZE; i++) { 1535 if (!pred->u[i]) { 1536 execmask &= ~(1 << i); 1537 } 1538 } 1539 } 1540 } 1541 1542 switch (inst->Instruction.Saturate) { 1543 case TGSI_SAT_NONE: 1544 for (i = 0; i < QUAD_SIZE; i++) 1545 if (execmask & (1 << i)) 1546 dst->i[i] = chan->i[i]; 1547 break; 1548 1549 case TGSI_SAT_ZERO_ONE: 1550 for (i = 0; i < QUAD_SIZE; i++) 1551 if (execmask & (1 << i)) { 1552 if (chan->f[i] < 0.0f) 1553 dst->f[i] = 0.0f; 1554 else if (chan->f[i] > 1.0f) 1555 dst->f[i] = 1.0f; 1556 else 1557 dst->i[i] = chan->i[i]; 1558 } 1559 break; 1560 1561 case TGSI_SAT_MINUS_PLUS_ONE: 1562 for (i = 0; i < QUAD_SIZE; i++) 1563 if (execmask & (1 << i)) { 1564 if (chan->f[i] < -1.0f) 1565 dst->f[i] = -1.0f; 1566 else if (chan->f[i] > 1.0f) 1567 dst->f[i] = 1.0f; 1568 else 1569 dst->i[i] = chan->i[i]; 1570 } 1571 break; 1572 1573 default: 1574 assert( 0 ); 1575 } 1576} 1577 1578#define FETCH(VAL,INDEX,CHAN)\ 1579 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1580 1581#define STORE(VAL,INDEX,CHAN)\ 1582 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1583 1584 1585/** 1586 * Execute ARB-style KIL which is predicated by a src register. 1587 * Kill fragment if any of the four values is less than zero. 1588 */ 1589static void 1590exec_kil(struct tgsi_exec_machine *mach, 1591 const struct tgsi_full_instruction *inst) 1592{ 1593 uint uniquemask; 1594 uint chan_index; 1595 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1596 union tgsi_exec_channel r[1]; 1597 1598 /* This mask stores component bits that were already tested. */ 1599 uniquemask = 0; 1600 1601 for (chan_index = 0; chan_index < 4; chan_index++) 1602 { 1603 uint swizzle; 1604 uint i; 1605 1606 /* unswizzle channel */ 1607 swizzle = tgsi_util_get_full_src_register_swizzle ( 1608 &inst->FullSrcRegisters[0], 1609 chan_index); 1610 1611 /* check if the component has not been already tested */ 1612 if (uniquemask & (1 << swizzle)) 1613 continue; 1614 uniquemask |= 1 << swizzle; 1615 1616 FETCH(&r[0], 0, chan_index); 1617 for (i = 0; i < 4; i++) 1618 if (r[0].f[i] < 0.0f) 1619 kilmask |= 1 << i; 1620 } 1621 1622 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1623} 1624 1625/** 1626 * Execute NVIDIA-style KIL which is predicated by a condition code. 1627 * Kill fragment if the condition code is TRUE. 1628 */ 1629static void 1630exec_kilp(struct tgsi_exec_machine *mach, 1631 const struct tgsi_full_instruction *inst) 1632{ 1633 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1634 1635 /* "unconditional" kil */ 1636 kilmask = mach->ExecMask; 1637 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1638} 1639 1640 1641/* 1642 * Fetch a four texture samples using STR texture coordinates. 1643 */ 1644static void 1645fetch_texel( struct tgsi_sampler *sampler, 1646 const union tgsi_exec_channel *s, 1647 const union tgsi_exec_channel *t, 1648 const union tgsi_exec_channel *p, 1649 float lodbias, /* XXX should be float[4] */ 1650 union tgsi_exec_channel *r, 1651 union tgsi_exec_channel *g, 1652 union tgsi_exec_channel *b, 1653 union tgsi_exec_channel *a ) 1654{ 1655 uint j; 1656 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1657 1658 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1659 1660 for (j = 0; j < 4; j++) { 1661 r->f[j] = rgba[0][j]; 1662 g->f[j] = rgba[1][j]; 1663 b->f[j] = rgba[2][j]; 1664 a->f[j] = rgba[3][j]; 1665 } 1666} 1667 1668 1669static void 1670exec_tex(struct tgsi_exec_machine *mach, 1671 const struct tgsi_full_instruction *inst, 1672 boolean biasLod, 1673 boolean projected) 1674{ 1675 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1676 union tgsi_exec_channel r[4]; 1677 uint chan_index; 1678 float lodBias; 1679 1680 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1681 1682 switch (inst->InstructionExtTexture.Texture) { 1683 case TGSI_TEXTURE_1D: 1684 case TGSI_TEXTURE_SHADOW1D: 1685 1686 FETCH(&r[0], 0, CHAN_X); 1687 1688 if (projected) { 1689 FETCH(&r[1], 0, CHAN_W); 1690 micro_div( &r[0], &r[0], &r[1] ); 1691 } 1692 1693 if (biasLod) { 1694 FETCH(&r[1], 0, CHAN_W); 1695 lodBias = r[2].f[0]; 1696 } 1697 else 1698 lodBias = 0.0; 1699 1700 fetch_texel(mach->Samplers[unit], 1701 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1702 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1703 break; 1704 1705 case TGSI_TEXTURE_2D: 1706 case TGSI_TEXTURE_RECT: 1707 case TGSI_TEXTURE_SHADOW2D: 1708 case TGSI_TEXTURE_SHADOWRECT: 1709 1710 FETCH(&r[0], 0, CHAN_X); 1711 FETCH(&r[1], 0, CHAN_Y); 1712 FETCH(&r[2], 0, CHAN_Z); 1713 1714 if (projected) { 1715 FETCH(&r[3], 0, CHAN_W); 1716 micro_div( &r[0], &r[0], &r[3] ); 1717 micro_div( &r[1], &r[1], &r[3] ); 1718 micro_div( &r[2], &r[2], &r[3] ); 1719 } 1720 1721 if (biasLod) { 1722 FETCH(&r[3], 0, CHAN_W); 1723 lodBias = r[3].f[0]; 1724 } 1725 else 1726 lodBias = 0.0; 1727 1728 fetch_texel(mach->Samplers[unit], 1729 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1730 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1731 break; 1732 1733 case TGSI_TEXTURE_3D: 1734 case TGSI_TEXTURE_CUBE: 1735 1736 FETCH(&r[0], 0, CHAN_X); 1737 FETCH(&r[1], 0, CHAN_Y); 1738 FETCH(&r[2], 0, CHAN_Z); 1739 1740 if (projected) { 1741 FETCH(&r[3], 0, CHAN_W); 1742 micro_div( &r[0], &r[0], &r[3] ); 1743 micro_div( &r[1], &r[1], &r[3] ); 1744 micro_div( &r[2], &r[2], &r[3] ); 1745 } 1746 1747 if (biasLod) { 1748 FETCH(&r[3], 0, CHAN_W); 1749 lodBias = r[3].f[0]; 1750 } 1751 else 1752 lodBias = 0.0; 1753 1754 fetch_texel(mach->Samplers[unit], 1755 &r[0], &r[1], &r[2], lodBias, 1756 &r[0], &r[1], &r[2], &r[3]); 1757 break; 1758 1759 default: 1760 assert (0); 1761 } 1762 1763 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1764 STORE( &r[chan_index], 0, chan_index ); 1765 } 1766} 1767 1768static void 1769exec_txd(struct tgsi_exec_machine *mach, 1770 const struct tgsi_full_instruction *inst) 1771{ 1772 const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index; 1773 union tgsi_exec_channel r[4]; 1774 uint chan_index; 1775 1776 /* 1777 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1778 */ 1779 1780 switch (inst->InstructionExtTexture.Texture) { 1781 case TGSI_TEXTURE_1D: 1782 case TGSI_TEXTURE_SHADOW1D: 1783 1784 FETCH(&r[0], 0, CHAN_X); 1785 1786 fetch_texel(mach->Samplers[unit], 1787 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ 1788 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1789 break; 1790 1791 case TGSI_TEXTURE_2D: 1792 case TGSI_TEXTURE_RECT: 1793 case TGSI_TEXTURE_SHADOW2D: 1794 case TGSI_TEXTURE_SHADOWRECT: 1795 1796 FETCH(&r[0], 0, CHAN_X); 1797 FETCH(&r[1], 0, CHAN_Y); 1798 FETCH(&r[2], 0, CHAN_Z); 1799 1800 fetch_texel(mach->Samplers[unit], 1801 &r[0], &r[1], &r[2], 0.0f, /* inputs */ 1802 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1803 break; 1804 1805 case TGSI_TEXTURE_3D: 1806 case TGSI_TEXTURE_CUBE: 1807 1808 FETCH(&r[0], 0, CHAN_X); 1809 FETCH(&r[1], 0, CHAN_Y); 1810 FETCH(&r[2], 0, CHAN_Z); 1811 1812 fetch_texel(mach->Samplers[unit], 1813 &r[0], &r[1], &r[2], 0.0f, 1814 &r[0], &r[1], &r[2], &r[3]); 1815 break; 1816 1817 default: 1818 assert(0); 1819 } 1820 1821 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1822 STORE(&r[chan_index], 0, chan_index); 1823 } 1824} 1825 1826 1827/** 1828 * Evaluate a constant-valued coefficient at the position of the 1829 * current quad. 1830 */ 1831static void 1832eval_constant_coef( 1833 struct tgsi_exec_machine *mach, 1834 unsigned attrib, 1835 unsigned chan ) 1836{ 1837 unsigned i; 1838 1839 for( i = 0; i < QUAD_SIZE; i++ ) { 1840 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1841 } 1842} 1843 1844/** 1845 * Evaluate a linear-valued coefficient at the position of the 1846 * current quad. 1847 */ 1848static void 1849eval_linear_coef( 1850 struct tgsi_exec_machine *mach, 1851 unsigned attrib, 1852 unsigned chan ) 1853{ 1854 const float x = mach->QuadPos.xyzw[0].f[0]; 1855 const float y = mach->QuadPos.xyzw[1].f[0]; 1856 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1857 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1858 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1859 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1860 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1861 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1862 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1863} 1864 1865/** 1866 * Evaluate a perspective-valued coefficient at the position of the 1867 * current quad. 1868 */ 1869static void 1870eval_perspective_coef( 1871 struct tgsi_exec_machine *mach, 1872 unsigned attrib, 1873 unsigned chan ) 1874{ 1875 const float x = mach->QuadPos.xyzw[0].f[0]; 1876 const float y = mach->QuadPos.xyzw[1].f[0]; 1877 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1878 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1879 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1880 const float *w = mach->QuadPos.xyzw[3].f; 1881 /* divide by W here */ 1882 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1883 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1884 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1885 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1886} 1887 1888 1889typedef void (* eval_coef_func)( 1890 struct tgsi_exec_machine *mach, 1891 unsigned attrib, 1892 unsigned chan ); 1893 1894static void 1895exec_declaration(struct tgsi_exec_machine *mach, 1896 const struct tgsi_full_declaration *decl) 1897{ 1898 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 1899 if (decl->Declaration.File == TGSI_FILE_INPUT) { 1900 uint first, last, mask; 1901 1902 first = decl->DeclarationRange.First; 1903 last = decl->DeclarationRange.Last; 1904 mask = decl->Declaration.UsageMask; 1905 1906 if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { 1907 assert(decl->Semantic.SemanticIndex == 0); 1908 assert(first == last); 1909 assert(mask = TGSI_WRITEMASK_XYZW); 1910 1911 mach->Inputs[first] = mach->QuadPos; 1912 } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) { 1913 uint i; 1914 1915 assert(decl->Semantic.SemanticIndex == 0); 1916 assert(first == last); 1917 1918 for (i = 0; i < QUAD_SIZE; i++) { 1919 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 1920 } 1921 } else { 1922 eval_coef_func eval; 1923 uint i, j; 1924 1925 switch (decl->Declaration.Interpolate) { 1926 case TGSI_INTERPOLATE_CONSTANT: 1927 eval = eval_constant_coef; 1928 break; 1929 1930 case TGSI_INTERPOLATE_LINEAR: 1931 eval = eval_linear_coef; 1932 break; 1933 1934 case TGSI_INTERPOLATE_PERSPECTIVE: 1935 eval = eval_perspective_coef; 1936 break; 1937 1938 default: 1939 assert(0); 1940 return; 1941 } 1942 1943 for (j = 0; j < NUM_CHANNELS; j++) { 1944 if (mask & (1 << j)) { 1945 for (i = first; i <= last; i++) { 1946 eval(mach, i, j); 1947 } 1948 } 1949 } 1950 } 1951 } 1952 } 1953} 1954 1955static void 1956exec_instruction( 1957 struct tgsi_exec_machine *mach, 1958 const struct tgsi_full_instruction *inst, 1959 int *pc ) 1960{ 1961 uint chan_index; 1962 union tgsi_exec_channel r[10]; 1963 union tgsi_exec_channel d[8]; 1964 1965 (*pc)++; 1966 1967 switch (inst->Instruction.Opcode) { 1968 case TGSI_OPCODE_ARL: 1969 case TGSI_OPCODE_FLR: 1970 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1971 FETCH( &r[0], 0, chan_index ); 1972 micro_flr(&d[chan_index], &r[0]); 1973 } 1974 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1975 STORE(&d[chan_index], 0, chan_index); 1976 } 1977 break; 1978 1979 case TGSI_OPCODE_MOV: 1980 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1981 FETCH(&d[chan_index], 0, chan_index); 1982 } 1983 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1984 STORE(&d[chan_index], 0, chan_index); 1985 } 1986 break; 1987 1988 case TGSI_OPCODE_LIT: 1989 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1990 FETCH( &r[0], 0, CHAN_X ); 1991 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1992 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 1993 } 1994 1995 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1996 FETCH( &r[1], 0, CHAN_Y ); 1997 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1998 1999 FETCH( &r[2], 0, CHAN_W ); 2000 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2001 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2002 micro_pow( &r[1], &r[1], &r[2] ); 2003 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2004 } 2005 2006 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2007 STORE(&d[CHAN_Y], 0, CHAN_Y); 2008 } 2009 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2010 STORE(&d[CHAN_Z], 0, CHAN_Z); 2011 } 2012 } 2013 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2014 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2015 } 2016 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2017 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2018 } 2019 break; 2020 2021 case TGSI_OPCODE_RCP: 2022 /* TGSI_OPCODE_RECIP */ 2023 FETCH( &r[0], 0, CHAN_X ); 2024 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2025 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2026 STORE( &r[0], 0, chan_index ); 2027 } 2028 break; 2029 2030 case TGSI_OPCODE_RSQ: 2031 /* TGSI_OPCODE_RECIPSQRT */ 2032 FETCH( &r[0], 0, CHAN_X ); 2033 micro_abs( &r[0], &r[0] ); 2034 micro_sqrt( &r[0], &r[0] ); 2035 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2037 STORE( &r[0], 0, chan_index ); 2038 } 2039 break; 2040 2041 case TGSI_OPCODE_EXP: 2042 FETCH( &r[0], 0, CHAN_X ); 2043 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2044 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2045 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2046 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2047 } 2048 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2049 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2050 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2051 } 2052 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2053 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2054 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2055 } 2056 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2057 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2058 } 2059 break; 2060 2061 case TGSI_OPCODE_LOG: 2062 FETCH( &r[0], 0, CHAN_X ); 2063 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2064 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2065 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2066 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2067 STORE( &r[0], 0, CHAN_X ); 2068 } 2069 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2070 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2071 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2072 STORE( &r[0], 0, CHAN_Y ); 2073 } 2074 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2075 STORE( &r[1], 0, CHAN_Z ); 2076 } 2077 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2078 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2079 } 2080 break; 2081 2082 case TGSI_OPCODE_MUL: 2083 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2084 FETCH(&r[0], 0, chan_index); 2085 FETCH(&r[1], 1, chan_index); 2086 micro_mul(&d[chan_index], &r[0], &r[1]); 2087 } 2088 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2089 STORE(&d[chan_index], 0, chan_index); 2090 } 2091 break; 2092 2093 case TGSI_OPCODE_ADD: 2094 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2095 FETCH( &r[0], 0, chan_index ); 2096 FETCH( &r[1], 1, chan_index ); 2097 micro_add(&d[chan_index], &r[0], &r[1]); 2098 } 2099 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2100 STORE(&d[chan_index], 0, chan_index); 2101 } 2102 break; 2103 2104 case TGSI_OPCODE_DP3: 2105 /* TGSI_OPCODE_DOT3 */ 2106 FETCH( &r[0], 0, CHAN_X ); 2107 FETCH( &r[1], 1, CHAN_X ); 2108 micro_mul( &r[0], &r[0], &r[1] ); 2109 2110 FETCH( &r[1], 0, CHAN_Y ); 2111 FETCH( &r[2], 1, CHAN_Y ); 2112 micro_mul( &r[1], &r[1], &r[2] ); 2113 micro_add( &r[0], &r[0], &r[1] ); 2114 2115 FETCH( &r[1], 0, CHAN_Z ); 2116 FETCH( &r[2], 1, CHAN_Z ); 2117 micro_mul( &r[1], &r[1], &r[2] ); 2118 micro_add( &r[0], &r[0], &r[1] ); 2119 2120 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2121 STORE( &r[0], 0, chan_index ); 2122 } 2123 break; 2124 2125 case TGSI_OPCODE_DP4: 2126 /* TGSI_OPCODE_DOT4 */ 2127 FETCH(&r[0], 0, CHAN_X); 2128 FETCH(&r[1], 1, CHAN_X); 2129 2130 micro_mul( &r[0], &r[0], &r[1] ); 2131 2132 FETCH(&r[1], 0, CHAN_Y); 2133 FETCH(&r[2], 1, CHAN_Y); 2134 2135 micro_mul( &r[1], &r[1], &r[2] ); 2136 micro_add( &r[0], &r[0], &r[1] ); 2137 2138 FETCH(&r[1], 0, CHAN_Z); 2139 FETCH(&r[2], 1, CHAN_Z); 2140 2141 micro_mul( &r[1], &r[1], &r[2] ); 2142 micro_add( &r[0], &r[0], &r[1] ); 2143 2144 FETCH(&r[1], 0, CHAN_W); 2145 FETCH(&r[2], 1, CHAN_W); 2146 2147 micro_mul( &r[1], &r[1], &r[2] ); 2148 micro_add( &r[0], &r[0], &r[1] ); 2149 2150 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2151 STORE( &r[0], 0, chan_index ); 2152 } 2153 break; 2154 2155 case TGSI_OPCODE_DST: 2156 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2157 FETCH( &r[0], 0, CHAN_Y ); 2158 FETCH( &r[1], 1, CHAN_Y); 2159 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2160 } 2161 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2162 FETCH(&d[CHAN_Z], 0, CHAN_Z); 2163 } 2164 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2165 FETCH(&d[CHAN_W], 1, CHAN_W); 2166 } 2167 2168 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2169 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); 2170 } 2171 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2172 STORE(&d[CHAN_Y], 0, CHAN_Y); 2173 } 2174 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2175 STORE(&d[CHAN_Z], 0, CHAN_Z); 2176 } 2177 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2178 STORE(&d[CHAN_W], 0, CHAN_W); 2179 } 2180 break; 2181 2182 case TGSI_OPCODE_MIN: 2183 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2184 FETCH(&r[0], 0, chan_index); 2185 FETCH(&r[1], 1, chan_index); 2186 2187 /* XXX use micro_min()?? */ 2188 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); 2189 } 2190 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2191 STORE(&d[chan_index], 0, chan_index); 2192 } 2193 break; 2194 2195 case TGSI_OPCODE_MAX: 2196 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2197 FETCH(&r[0], 0, chan_index); 2198 FETCH(&r[1], 1, chan_index); 2199 2200 /* XXX use micro_max()?? */ 2201 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); 2202 } 2203 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2204 STORE(&d[chan_index], 0, chan_index); 2205 } 2206 break; 2207 2208 case TGSI_OPCODE_SLT: 2209 /* TGSI_OPCODE_SETLT */ 2210 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2211 FETCH( &r[0], 0, chan_index ); 2212 FETCH( &r[1], 1, chan_index ); 2213 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2214 } 2215 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2216 STORE(&d[chan_index], 0, chan_index); 2217 } 2218 break; 2219 2220 case TGSI_OPCODE_SGE: 2221 /* TGSI_OPCODE_SETGE */ 2222 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2223 FETCH( &r[0], 0, chan_index ); 2224 FETCH( &r[1], 1, chan_index ); 2225 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2226 } 2227 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2228 STORE(&d[chan_index], 0, chan_index); 2229 } 2230 break; 2231 2232 case TGSI_OPCODE_MAD: 2233 /* TGSI_OPCODE_MADD */ 2234 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2235 FETCH( &r[0], 0, chan_index ); 2236 FETCH( &r[1], 1, chan_index ); 2237 micro_mul( &r[0], &r[0], &r[1] ); 2238 FETCH( &r[1], 2, chan_index ); 2239 micro_add(&d[chan_index], &r[0], &r[1]); 2240 } 2241 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2242 STORE(&d[chan_index], 0, chan_index); 2243 } 2244 break; 2245 2246 case TGSI_OPCODE_SUB: 2247 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2248 FETCH(&r[0], 0, chan_index); 2249 FETCH(&r[1], 1, chan_index); 2250 micro_sub(&d[chan_index], &r[0], &r[1]); 2251 } 2252 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2253 STORE(&d[chan_index], 0, chan_index); 2254 } 2255 break; 2256 2257 case TGSI_OPCODE_LRP: 2258 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2259 FETCH(&r[0], 0, chan_index); 2260 FETCH(&r[1], 1, chan_index); 2261 FETCH(&r[2], 2, chan_index); 2262 micro_sub( &r[1], &r[1], &r[2] ); 2263 micro_mul( &r[0], &r[0], &r[1] ); 2264 micro_add(&d[chan_index], &r[0], &r[2]); 2265 } 2266 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2267 STORE(&d[chan_index], 0, chan_index); 2268 } 2269 break; 2270 2271 case TGSI_OPCODE_CND: 2272 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2273 FETCH(&r[0], 0, chan_index); 2274 FETCH(&r[1], 1, chan_index); 2275 FETCH(&r[2], 2, chan_index); 2276 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2277 } 2278 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2279 STORE(&d[chan_index], 0, chan_index); 2280 } 2281 break; 2282 2283 case TGSI_OPCODE_DP2A: 2284 FETCH( &r[0], 0, CHAN_X ); 2285 FETCH( &r[1], 1, CHAN_X ); 2286 micro_mul( &r[0], &r[0], &r[1] ); 2287 2288 FETCH( &r[1], 0, CHAN_Y ); 2289 FETCH( &r[2], 1, CHAN_Y ); 2290 micro_mul( &r[1], &r[1], &r[2] ); 2291 micro_add( &r[0], &r[0], &r[1] ); 2292 2293 FETCH( &r[2], 2, CHAN_X ); 2294 micro_add( &r[0], &r[0], &r[2] ); 2295 2296 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2297 STORE( &r[0], 0, chan_index ); 2298 } 2299 break; 2300 2301 case TGSI_OPCODE_FRC: 2302 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2303 FETCH( &r[0], 0, chan_index ); 2304 micro_frc(&d[chan_index], &r[0]); 2305 } 2306 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2307 STORE(&d[chan_index], 0, chan_index); 2308 } 2309 break; 2310 2311 case TGSI_OPCODE_CLAMP: 2312 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2313 FETCH(&r[0], 0, chan_index); 2314 FETCH(&r[1], 1, chan_index); 2315 micro_max(&r[0], &r[0], &r[1]); 2316 FETCH(&r[1], 2, chan_index); 2317 micro_min(&d[chan_index], &r[0], &r[1]); 2318 } 2319 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2320 STORE(&d[chan_index], 0, chan_index); 2321 } 2322 break; 2323 2324 case TGSI_OPCODE_ROUND: 2325 case TGSI_OPCODE_ARR: 2326 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2327 FETCH( &r[0], 0, chan_index ); 2328 micro_rnd(&d[chan_index], &r[0]); 2329 } 2330 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2331 STORE(&d[chan_index], 0, chan_index); 2332 } 2333 break; 2334 2335 case TGSI_OPCODE_EX2: 2336 FETCH(&r[0], 0, CHAN_X); 2337 2338 micro_exp2( &r[0], &r[0] ); 2339 2340 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2341 STORE( &r[0], 0, chan_index ); 2342 } 2343 break; 2344 2345 case TGSI_OPCODE_LG2: 2346 FETCH( &r[0], 0, CHAN_X ); 2347 micro_lg2( &r[0], &r[0] ); 2348 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2349 STORE( &r[0], 0, chan_index ); 2350 } 2351 break; 2352 2353 case TGSI_OPCODE_POW: 2354 FETCH(&r[0], 0, CHAN_X); 2355 FETCH(&r[1], 1, CHAN_X); 2356 2357 micro_pow( &r[0], &r[0], &r[1] ); 2358 2359 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2360 STORE( &r[0], 0, chan_index ); 2361 } 2362 break; 2363 2364 case TGSI_OPCODE_XPD: 2365 FETCH(&r[0], 0, CHAN_Y); 2366 FETCH(&r[1], 1, CHAN_Z); 2367 2368 micro_mul( &r[2], &r[0], &r[1] ); 2369 2370 FETCH(&r[3], 0, CHAN_Z); 2371 FETCH(&r[4], 1, CHAN_Y); 2372 2373 micro_mul( &r[5], &r[3], &r[4] ); 2374 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2375 2376 FETCH(&r[2], 1, CHAN_X); 2377 2378 micro_mul( &r[3], &r[3], &r[2] ); 2379 2380 FETCH(&r[5], 0, CHAN_X); 2381 2382 micro_mul( &r[1], &r[1], &r[5] ); 2383 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2384 2385 micro_mul( &r[5], &r[5], &r[4] ); 2386 micro_mul( &r[0], &r[0], &r[2] ); 2387 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2388 2389 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2390 STORE(&d[CHAN_X], 0, CHAN_X); 2391 } 2392 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2393 STORE(&d[CHAN_Y], 0, CHAN_Y); 2394 } 2395 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2396 STORE(&d[CHAN_Z], 0, CHAN_Z); 2397 } 2398 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2399 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2400 } 2401 break; 2402 2403 case TGSI_OPCODE_ABS: 2404 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2405 FETCH(&r[0], 0, chan_index); 2406 micro_abs(&d[chan_index], &r[0]); 2407 } 2408 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2409 STORE(&d[chan_index], 0, chan_index); 2410 } 2411 break; 2412 2413 case TGSI_OPCODE_RCC: 2414 FETCH(&r[0], 0, CHAN_X); 2415 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2416 micro_float_clamp(&r[0], &r[0]); 2417 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2418 STORE(&r[0], 0, chan_index); 2419 } 2420 break; 2421 2422 case TGSI_OPCODE_DPH: 2423 FETCH(&r[0], 0, CHAN_X); 2424 FETCH(&r[1], 1, CHAN_X); 2425 2426 micro_mul( &r[0], &r[0], &r[1] ); 2427 2428 FETCH(&r[1], 0, CHAN_Y); 2429 FETCH(&r[2], 1, CHAN_Y); 2430 2431 micro_mul( &r[1], &r[1], &r[2] ); 2432 micro_add( &r[0], &r[0], &r[1] ); 2433 2434 FETCH(&r[1], 0, CHAN_Z); 2435 FETCH(&r[2], 1, CHAN_Z); 2436 2437 micro_mul( &r[1], &r[1], &r[2] ); 2438 micro_add( &r[0], &r[0], &r[1] ); 2439 2440 FETCH(&r[1], 1, CHAN_W); 2441 2442 micro_add( &r[0], &r[0], &r[1] ); 2443 2444 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2445 STORE( &r[0], 0, chan_index ); 2446 } 2447 break; 2448 2449 case TGSI_OPCODE_COS: 2450 FETCH(&r[0], 0, CHAN_X); 2451 2452 micro_cos( &r[0], &r[0] ); 2453 2454 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2455 STORE( &r[0], 0, chan_index ); 2456 } 2457 break; 2458 2459 case TGSI_OPCODE_DDX: 2460 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2461 FETCH( &r[0], 0, chan_index ); 2462 micro_ddx(&d[chan_index], &r[0]); 2463 } 2464 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2465 STORE(&d[chan_index], 0, chan_index); 2466 } 2467 break; 2468 2469 case TGSI_OPCODE_DDY: 2470 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2471 FETCH( &r[0], 0, chan_index ); 2472 micro_ddy(&d[chan_index], &r[0]); 2473 } 2474 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2475 STORE(&d[chan_index], 0, chan_index); 2476 } 2477 break; 2478 2479 case TGSI_OPCODE_KILP: 2480 exec_kilp (mach, inst); 2481 break; 2482 2483 case TGSI_OPCODE_KIL: 2484 exec_kil (mach, inst); 2485 break; 2486 2487 case TGSI_OPCODE_PK2H: 2488 assert (0); 2489 break; 2490 2491 case TGSI_OPCODE_PK2US: 2492 assert (0); 2493 break; 2494 2495 case TGSI_OPCODE_PK4B: 2496 assert (0); 2497 break; 2498 2499 case TGSI_OPCODE_PK4UB: 2500 assert (0); 2501 break; 2502 2503 case TGSI_OPCODE_RFL: 2504 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2505 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2506 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2507 /* r0 = dp3(src0, src0) */ 2508 FETCH(&r[2], 0, CHAN_X); 2509 micro_mul(&r[0], &r[2], &r[2]); 2510 FETCH(&r[4], 0, CHAN_Y); 2511 micro_mul(&r[8], &r[4], &r[4]); 2512 micro_add(&r[0], &r[0], &r[8]); 2513 FETCH(&r[6], 0, CHAN_Z); 2514 micro_mul(&r[8], &r[6], &r[6]); 2515 micro_add(&r[0], &r[0], &r[8]); 2516 2517 /* r1 = dp3(src0, src1) */ 2518 FETCH(&r[3], 1, CHAN_X); 2519 micro_mul(&r[1], &r[2], &r[3]); 2520 FETCH(&r[5], 1, CHAN_Y); 2521 micro_mul(&r[8], &r[4], &r[5]); 2522 micro_add(&r[1], &r[1], &r[8]); 2523 FETCH(&r[7], 1, CHAN_Z); 2524 micro_mul(&r[8], &r[6], &r[7]); 2525 micro_add(&r[1], &r[1], &r[8]); 2526 2527 /* r1 = 2 * r1 / r0 */ 2528 micro_add(&r[1], &r[1], &r[1]); 2529 micro_div(&r[1], &r[1], &r[0]); 2530 2531 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2532 micro_mul(&r[2], &r[2], &r[1]); 2533 micro_sub(&r[2], &r[2], &r[3]); 2534 STORE(&r[2], 0, CHAN_X); 2535 } 2536 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2537 micro_mul(&r[4], &r[4], &r[1]); 2538 micro_sub(&r[4], &r[4], &r[5]); 2539 STORE(&r[4], 0, CHAN_Y); 2540 } 2541 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2542 micro_mul(&r[6], &r[6], &r[1]); 2543 micro_sub(&r[6], &r[6], &r[7]); 2544 STORE(&r[6], 0, CHAN_Z); 2545 } 2546 } 2547 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2548 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2549 } 2550 break; 2551 2552 case TGSI_OPCODE_SEQ: 2553 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2554 FETCH( &r[0], 0, chan_index ); 2555 FETCH( &r[1], 1, chan_index ); 2556 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2557 } 2558 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2559 STORE(&d[chan_index], 0, chan_index); 2560 } 2561 break; 2562 2563 case TGSI_OPCODE_SFL: 2564 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2565 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2566 } 2567 break; 2568 2569 case TGSI_OPCODE_SGT: 2570 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2571 FETCH( &r[0], 0, chan_index ); 2572 FETCH( &r[1], 1, chan_index ); 2573 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2574 } 2575 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2576 STORE(&d[chan_index], 0, chan_index); 2577 } 2578 break; 2579 2580 case TGSI_OPCODE_SIN: 2581 FETCH( &r[0], 0, CHAN_X ); 2582 micro_sin( &r[0], &r[0] ); 2583 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2584 STORE( &r[0], 0, chan_index ); 2585 } 2586 break; 2587 2588 case TGSI_OPCODE_SLE: 2589 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2590 FETCH( &r[0], 0, chan_index ); 2591 FETCH( &r[1], 1, chan_index ); 2592 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2593 } 2594 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2595 STORE(&d[chan_index], 0, chan_index); 2596 } 2597 break; 2598 2599 case TGSI_OPCODE_SNE: 2600 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2601 FETCH( &r[0], 0, chan_index ); 2602 FETCH( &r[1], 1, chan_index ); 2603 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2604 } 2605 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2606 STORE(&d[chan_index], 0, chan_index); 2607 } 2608 break; 2609 2610 case TGSI_OPCODE_STR: 2611 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2612 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2613 } 2614 break; 2615 2616 case TGSI_OPCODE_TEX: 2617 /* simple texture lookup */ 2618 /* src[0] = texcoord */ 2619 /* src[1] = sampler unit */ 2620 exec_tex(mach, inst, FALSE, FALSE); 2621 break; 2622 2623 case TGSI_OPCODE_TXB: 2624 /* Texture lookup with lod bias */ 2625 /* src[0] = texcoord (src[0].w = LOD bias) */ 2626 /* src[1] = sampler unit */ 2627 exec_tex(mach, inst, TRUE, FALSE); 2628 break; 2629 2630 case TGSI_OPCODE_TXD: 2631 /* Texture lookup with explict partial derivatives */ 2632 /* src[0] = texcoord */ 2633 /* src[1] = d[strq]/dx */ 2634 /* src[2] = d[strq]/dy */ 2635 /* src[3] = sampler unit */ 2636 exec_txd(mach, inst); 2637 break; 2638 2639 case TGSI_OPCODE_TXL: 2640 /* Texture lookup with explit LOD */ 2641 /* src[0] = texcoord (src[0].w = LOD) */ 2642 /* src[1] = sampler unit */ 2643 exec_tex(mach, inst, TRUE, FALSE); 2644 break; 2645 2646 case TGSI_OPCODE_TXP: 2647 /* Texture lookup with projection */ 2648 /* src[0] = texcoord (src[0].w = projection) */ 2649 /* src[1] = sampler unit */ 2650 exec_tex(mach, inst, FALSE, TRUE); 2651 break; 2652 2653 case TGSI_OPCODE_UP2H: 2654 assert (0); 2655 break; 2656 2657 case TGSI_OPCODE_UP2US: 2658 assert (0); 2659 break; 2660 2661 case TGSI_OPCODE_UP4B: 2662 assert (0); 2663 break; 2664 2665 case TGSI_OPCODE_UP4UB: 2666 assert (0); 2667 break; 2668 2669 case TGSI_OPCODE_X2D: 2670 FETCH(&r[0], 1, CHAN_X); 2671 FETCH(&r[1], 1, CHAN_Y); 2672 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2673 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2674 FETCH(&r[2], 2, CHAN_X); 2675 micro_mul(&r[2], &r[2], &r[0]); 2676 FETCH(&r[3], 2, CHAN_Y); 2677 micro_mul(&r[3], &r[3], &r[1]); 2678 micro_add(&r[2], &r[2], &r[3]); 2679 FETCH(&r[3], 0, CHAN_X); 2680 micro_add(&d[CHAN_X], &r[2], &r[3]); 2681 2682 } 2683 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2684 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2685 FETCH(&r[2], 2, CHAN_Z); 2686 micro_mul(&r[2], &r[2], &r[0]); 2687 FETCH(&r[3], 2, CHAN_W); 2688 micro_mul(&r[3], &r[3], &r[1]); 2689 micro_add(&r[2], &r[2], &r[3]); 2690 FETCH(&r[3], 0, CHAN_Y); 2691 micro_add(&d[CHAN_Y], &r[2], &r[3]); 2692 2693 } 2694 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2695 STORE(&d[CHAN_X], 0, CHAN_X); 2696 } 2697 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2698 STORE(&d[CHAN_Y], 0, CHAN_Y); 2699 } 2700 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2701 STORE(&d[CHAN_X], 0, CHAN_Z); 2702 } 2703 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2704 STORE(&d[CHAN_Y], 0, CHAN_W); 2705 } 2706 break; 2707 2708 case TGSI_OPCODE_ARA: 2709 assert (0); 2710 break; 2711 2712 case TGSI_OPCODE_BRA: 2713 assert (0); 2714 break; 2715 2716 case TGSI_OPCODE_CAL: 2717 /* skip the call if no execution channels are enabled */ 2718 if (mach->ExecMask) { 2719 /* do the call */ 2720 2721 /* First, record the depths of the execution stacks. 2722 * This is important for deeply nested/looped return statements. 2723 * We have to unwind the stacks by the correct amount. For a 2724 * real code generator, we could determine the number of entries 2725 * to pop off each stack with simple static analysis and avoid 2726 * implementing this data structure at run time. 2727 */ 2728 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 2729 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 2730 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 2731 /* note that PC was already incremented above */ 2732 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 2733 2734 mach->CallStackTop++; 2735 2736 /* Second, push the Cond, Loop, Cont, Func stacks */ 2737 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2738 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2739 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2740 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2741 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2742 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2743 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2744 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2745 2746 /* Finally, jump to the subroutine */ 2747 *pc = inst->InstructionExtLabel.Label; 2748 } 2749 break; 2750 2751 case TGSI_OPCODE_RET: 2752 mach->FuncMask &= ~mach->ExecMask; 2753 UPDATE_EXEC_MASK(mach); 2754 2755 if (mach->FuncMask == 0x0) { 2756 /* really return now (otherwise, keep executing */ 2757 2758 if (mach->CallStackTop == 0) { 2759 /* returning from main() */ 2760 *pc = -1; 2761 return; 2762 } 2763 2764 assert(mach->CallStackTop > 0); 2765 mach->CallStackTop--; 2766 2767 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 2768 mach->CondMask = mach->CondStack[mach->CondStackTop]; 2769 2770 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 2771 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 2772 2773 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 2774 mach->ContMask = mach->ContStack[mach->ContStackTop]; 2775 2776 assert(mach->FuncStackTop > 0); 2777 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2778 2779 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 2780 2781 UPDATE_EXEC_MASK(mach); 2782 } 2783 break; 2784 2785 case TGSI_OPCODE_SSG: 2786 /* TGSI_OPCODE_SGN */ 2787 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2788 FETCH( &r[0], 0, chan_index ); 2789 micro_sgn(&d[chan_index], &r[0]); 2790 } 2791 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2792 STORE(&d[chan_index], 0, chan_index); 2793 } 2794 break; 2795 2796 case TGSI_OPCODE_CMP: 2797 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2798 FETCH(&r[0], 0, chan_index); 2799 FETCH(&r[1], 1, chan_index); 2800 FETCH(&r[2], 2, chan_index); 2801 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); 2802 } 2803 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2804 STORE(&d[chan_index], 0, chan_index); 2805 } 2806 break; 2807 2808 case TGSI_OPCODE_SCS: 2809 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2810 FETCH( &r[0], 0, CHAN_X ); 2811 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2812 micro_cos(&r[1], &r[0]); 2813 STORE(&r[1], 0, CHAN_X); 2814 } 2815 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2816 micro_sin(&r[1], &r[0]); 2817 STORE(&r[1], 0, CHAN_Y); 2818 } 2819 } 2820 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2821 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2822 } 2823 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2824 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2825 } 2826 break; 2827 2828 case TGSI_OPCODE_NRM: 2829 /* 3-component vector normalize */ 2830 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2831 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2832 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2833 /* r3 = sqrt(dp3(src0, src0)) */ 2834 FETCH(&r[0], 0, CHAN_X); 2835 micro_mul(&r[3], &r[0], &r[0]); 2836 FETCH(&r[1], 0, CHAN_Y); 2837 micro_mul(&r[4], &r[1], &r[1]); 2838 micro_add(&r[3], &r[3], &r[4]); 2839 FETCH(&r[2], 0, CHAN_Z); 2840 micro_mul(&r[4], &r[2], &r[2]); 2841 micro_add(&r[3], &r[3], &r[4]); 2842 micro_sqrt(&r[3], &r[3]); 2843 2844 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2845 micro_div(&r[0], &r[0], &r[3]); 2846 STORE(&r[0], 0, CHAN_X); 2847 } 2848 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2849 micro_div(&r[1], &r[1], &r[3]); 2850 STORE(&r[1], 0, CHAN_Y); 2851 } 2852 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2853 micro_div(&r[2], &r[2], &r[3]); 2854 STORE(&r[2], 0, CHAN_Z); 2855 } 2856 } 2857 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2858 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2859 } 2860 break; 2861 2862 case TGSI_OPCODE_NRM4: 2863 /* 4-component vector normalize */ 2864 { 2865 union tgsi_exec_channel tmp, dot; 2866 2867 /* tmp = dp4(src0, src0): */ 2868 FETCH( &r[0], 0, CHAN_X ); 2869 micro_mul( &tmp, &r[0], &r[0] ); 2870 2871 FETCH( &r[1], 0, CHAN_Y ); 2872 micro_mul( &dot, &r[1], &r[1] ); 2873 micro_add( &tmp, &tmp, &dot ); 2874 2875 FETCH( &r[2], 0, CHAN_Z ); 2876 micro_mul( &dot, &r[2], &r[2] ); 2877 micro_add( &tmp, &tmp, &dot ); 2878 2879 FETCH( &r[3], 0, CHAN_W ); 2880 micro_mul( &dot, &r[3], &r[3] ); 2881 micro_add( &tmp, &tmp, &dot ); 2882 2883 /* tmp = 1 / sqrt(tmp) */ 2884 micro_sqrt( &tmp, &tmp ); 2885 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2886 2887 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2888 /* chan = chan * tmp */ 2889 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2890 STORE( &r[chan_index], 0, chan_index ); 2891 } 2892 } 2893 break; 2894 2895 case TGSI_OPCODE_DIV: 2896 assert( 0 ); 2897 break; 2898 2899 case TGSI_OPCODE_DP2: 2900 FETCH( &r[0], 0, CHAN_X ); 2901 FETCH( &r[1], 1, CHAN_X ); 2902 micro_mul( &r[0], &r[0], &r[1] ); 2903 2904 FETCH( &r[1], 0, CHAN_Y ); 2905 FETCH( &r[2], 1, CHAN_Y ); 2906 micro_mul( &r[1], &r[1], &r[2] ); 2907 micro_add( &r[0], &r[0], &r[1] ); 2908 2909 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2910 STORE( &r[0], 0, chan_index ); 2911 } 2912 break; 2913 2914 case TGSI_OPCODE_IF: 2915 /* push CondMask */ 2916 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2917 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2918 FETCH( &r[0], 0, CHAN_X ); 2919 /* update CondMask */ 2920 if( ! r[0].u[0] ) { 2921 mach->CondMask &= ~0x1; 2922 } 2923 if( ! r[0].u[1] ) { 2924 mach->CondMask &= ~0x2; 2925 } 2926 if( ! r[0].u[2] ) { 2927 mach->CondMask &= ~0x4; 2928 } 2929 if( ! r[0].u[3] ) { 2930 mach->CondMask &= ~0x8; 2931 } 2932 UPDATE_EXEC_MASK(mach); 2933 /* Todo: If CondMask==0, jump to ELSE */ 2934 break; 2935 2936 case TGSI_OPCODE_ELSE: 2937 /* invert CondMask wrt previous mask */ 2938 { 2939 uint prevMask; 2940 assert(mach->CondStackTop > 0); 2941 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2942 mach->CondMask = ~mach->CondMask & prevMask; 2943 UPDATE_EXEC_MASK(mach); 2944 /* Todo: If CondMask==0, jump to ENDIF */ 2945 } 2946 break; 2947 2948 case TGSI_OPCODE_ENDIF: 2949 /* pop CondMask */ 2950 assert(mach->CondStackTop > 0); 2951 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2952 UPDATE_EXEC_MASK(mach); 2953 break; 2954 2955 case TGSI_OPCODE_END: 2956 /* halt execution */ 2957 *pc = -1; 2958 break; 2959 2960 case TGSI_OPCODE_REP: 2961 assert (0); 2962 break; 2963 2964 case TGSI_OPCODE_ENDREP: 2965 assert (0); 2966 break; 2967 2968 case TGSI_OPCODE_PUSHA: 2969 assert (0); 2970 break; 2971 2972 case TGSI_OPCODE_POPA: 2973 assert (0); 2974 break; 2975 2976 case TGSI_OPCODE_CEIL: 2977 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2978 FETCH( &r[0], 0, chan_index ); 2979 micro_ceil(&d[chan_index], &r[0]); 2980 } 2981 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2982 STORE(&d[chan_index], 0, chan_index); 2983 } 2984 break; 2985 2986 case TGSI_OPCODE_I2F: 2987 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2988 FETCH( &r[0], 0, chan_index ); 2989 micro_i2f(&d[chan_index], &r[0]); 2990 } 2991 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2992 STORE(&d[chan_index], 0, chan_index); 2993 } 2994 break; 2995 2996 case TGSI_OPCODE_NOT: 2997 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2998 FETCH( &r[0], 0, chan_index ); 2999 micro_not(&d[chan_index], &r[0]); 3000 } 3001 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3002 STORE(&d[chan_index], 0, chan_index); 3003 } 3004 break; 3005 3006 case TGSI_OPCODE_TRUNC: 3007 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3008 FETCH( &r[0], 0, chan_index ); 3009 micro_trunc(&d[chan_index], &r[0]); 3010 } 3011 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3012 STORE(&d[chan_index], 0, chan_index); 3013 } 3014 break; 3015 3016 case TGSI_OPCODE_SHL: 3017 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3018 FETCH( &r[0], 0, chan_index ); 3019 FETCH( &r[1], 1, chan_index ); 3020 micro_shl(&d[chan_index], &r[0], &r[1]); 3021 } 3022 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3023 STORE(&d[chan_index], 0, chan_index); 3024 } 3025 break; 3026 3027 case TGSI_OPCODE_SHR: 3028 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3029 FETCH( &r[0], 0, chan_index ); 3030 FETCH( &r[1], 1, chan_index ); 3031 micro_ishr(&d[chan_index], &r[0], &r[1]); 3032 } 3033 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3034 STORE(&d[chan_index], 0, chan_index); 3035 } 3036 break; 3037 3038 case TGSI_OPCODE_AND: 3039 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3040 FETCH( &r[0], 0, chan_index ); 3041 FETCH( &r[1], 1, chan_index ); 3042 micro_and(&d[chan_index], &r[0], &r[1]); 3043 } 3044 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3045 STORE(&d[chan_index], 0, chan_index); 3046 } 3047 break; 3048 3049 case TGSI_OPCODE_OR: 3050 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3051 FETCH( &r[0], 0, chan_index ); 3052 FETCH( &r[1], 1, chan_index ); 3053 micro_or(&d[chan_index], &r[0], &r[1]); 3054 } 3055 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3056 STORE(&d[chan_index], 0, chan_index); 3057 } 3058 break; 3059 3060 case TGSI_OPCODE_MOD: 3061 assert (0); 3062 break; 3063 3064 case TGSI_OPCODE_XOR: 3065 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3066 FETCH( &r[0], 0, chan_index ); 3067 FETCH( &r[1], 1, chan_index ); 3068 micro_xor(&d[chan_index], &r[0], &r[1]); 3069 } 3070 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3071 STORE(&d[chan_index], 0, chan_index); 3072 } 3073 break; 3074 3075 case TGSI_OPCODE_SAD: 3076 assert (0); 3077 break; 3078 3079 case TGSI_OPCODE_TXF: 3080 assert (0); 3081 break; 3082 3083 case TGSI_OPCODE_TXQ: 3084 assert (0); 3085 break; 3086 3087 case TGSI_OPCODE_EMIT: 3088 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 3089 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 3090 break; 3091 3092 case TGSI_OPCODE_ENDPRIM: 3093 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 3094 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 3095 break; 3096 3097 case TGSI_OPCODE_BGNFOR: 3098 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3099 for (chan_index = 0; chan_index < 3; chan_index++) { 3100 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3101 } 3102 ++mach->LoopCounterStackTop; 3103 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); 3104 /* update LoopMask */ 3105 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3106 mach->LoopMask &= ~0x1; 3107 } 3108 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3109 mach->LoopMask &= ~0x2; 3110 } 3111 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3112 mach->LoopMask &= ~0x4; 3113 } 3114 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3115 mach->LoopMask &= ~0x8; 3116 } 3117 /* TODO: if mach->LoopMask == 0, jump to end of loop */ 3118 UPDATE_EXEC_MASK(mach); 3119 /* fall-through (for now) */ 3120 case TGSI_OPCODE_BGNLOOP: 3121 /* push LoopMask and ContMasks */ 3122 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3123 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3124 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3125 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3126 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3127 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3128 break; 3129 3130 case TGSI_OPCODE_ENDFOR: 3131 assert(mach->LoopCounterStackTop > 0); 3132 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3133 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3134 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 3135 /* update LoopMask */ 3136 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3137 mach->LoopMask &= ~0x1; 3138 } 3139 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3140 mach->LoopMask &= ~0x2; 3141 } 3142 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3143 mach->LoopMask &= ~0x4; 3144 } 3145 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3146 mach->LoopMask &= ~0x8; 3147 } 3148 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3149 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3150 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3151 assert(mach->LoopLabelStackTop > 0); 3152 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3153 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); 3154 /* Restore ContMask, but don't pop */ 3155 assert(mach->ContStackTop > 0); 3156 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3157 UPDATE_EXEC_MASK(mach); 3158 if (mach->ExecMask) { 3159 /* repeat loop: jump to instruction just past BGNLOOP */ 3160 assert(mach->LoopLabelStackTop > 0); 3161 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3162 } 3163 else { 3164 /* exit loop: pop LoopMask */ 3165 assert(mach->LoopStackTop > 0); 3166 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3167 /* pop ContMask */ 3168 assert(mach->ContStackTop > 0); 3169 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3170 assert(mach->LoopLabelStackTop > 0); 3171 --mach->LoopLabelStackTop; 3172 assert(mach->LoopCounterStackTop > 0); 3173 --mach->LoopCounterStackTop; 3174 } 3175 UPDATE_EXEC_MASK(mach); 3176 break; 3177 3178 case TGSI_OPCODE_ENDLOOP: 3179 /* Restore ContMask, but don't pop */ 3180 assert(mach->ContStackTop > 0); 3181 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3182 UPDATE_EXEC_MASK(mach); 3183 if (mach->ExecMask) { 3184 /* repeat loop: jump to instruction just past BGNLOOP */ 3185 assert(mach->LoopLabelStackTop > 0); 3186 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3187 } 3188 else { 3189 /* exit loop: pop LoopMask */ 3190 assert(mach->LoopStackTop > 0); 3191 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3192 /* pop ContMask */ 3193 assert(mach->ContStackTop > 0); 3194 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3195 assert(mach->LoopLabelStackTop > 0); 3196 --mach->LoopLabelStackTop; 3197 } 3198 UPDATE_EXEC_MASK(mach); 3199 break; 3200 3201 case TGSI_OPCODE_BRK: 3202 /* turn off loop channels for each enabled exec channel */ 3203 mach->LoopMask &= ~mach->ExecMask; 3204 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3205 UPDATE_EXEC_MASK(mach); 3206 break; 3207 3208 case TGSI_OPCODE_CONT: 3209 /* turn off cont channels for each enabled exec channel */ 3210 mach->ContMask &= ~mach->ExecMask; 3211 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3212 UPDATE_EXEC_MASK(mach); 3213 break; 3214 3215 case TGSI_OPCODE_BGNSUB: 3216 /* no-op */ 3217 break; 3218 3219 case TGSI_OPCODE_ENDSUB: 3220 /* 3221 * XXX: This really should be a no-op. We should never reach this opcode. 3222 */ 3223 3224 assert(mach->CallStackTop > 0); 3225 mach->CallStackTop--; 3226 3227 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3228 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3229 3230 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3231 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3232 3233 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3234 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3235 3236 assert(mach->FuncStackTop > 0); 3237 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3238 3239 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3240 3241 UPDATE_EXEC_MASK(mach); 3242 break; 3243 3244 case TGSI_OPCODE_NOP: 3245 break; 3246 3247 default: 3248 assert( 0 ); 3249 } 3250} 3251 3252#define DEBUG_EXECUTION 0 3253 3254 3255/** 3256 * Run TGSI interpreter. 3257 * \return bitmask of "alive" quad components 3258 */ 3259uint 3260tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3261{ 3262 uint i; 3263 int pc = 0; 3264 3265 mach->CondMask = 0xf; 3266 mach->LoopMask = 0xf; 3267 mach->ContMask = 0xf; 3268 mach->FuncMask = 0xf; 3269 mach->ExecMask = 0xf; 3270 3271 assert(mach->CondStackTop == 0); 3272 assert(mach->LoopStackTop == 0); 3273 assert(mach->ContStackTop == 0); 3274 assert(mach->CallStackTop == 0); 3275 3276 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3277 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3278 3279 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3280 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3281 mach->Primitives[0] = 0; 3282 } 3283 3284 for (i = 0; i < QUAD_SIZE; i++) { 3285 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3286 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3287 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3288 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3289 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3290 } 3291 3292 /* execute declarations (interpolants) */ 3293 for (i = 0; i < mach->NumDeclarations; i++) { 3294 exec_declaration( mach, mach->Declarations+i ); 3295 } 3296 3297 { 3298#if DEBUG_EXECUTION 3299 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 3300 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 3301 uint inst = 1; 3302 3303 memcpy(temps, mach->Temps, sizeof(temps)); 3304 memcpy(outputs, mach->Outputs, sizeof(outputs)); 3305#endif 3306 3307 /* execute instructions, until pc is set to -1 */ 3308 while (pc != -1) { 3309 3310#if DEBUG_EXECUTION 3311 uint i; 3312 3313 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 3314#endif 3315 3316 assert(pc < (int) mach->NumInstructions); 3317 exec_instruction(mach, mach->Instructions + pc, &pc); 3318 3319#if DEBUG_EXECUTION 3320 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 3321 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 3322 uint j; 3323 3324 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 3325 debug_printf("TEMP[%2u] = ", i); 3326 for (j = 0; j < 4; j++) { 3327 if (j > 0) { 3328 debug_printf(" "); 3329 } 3330 debug_printf("(%6f, %6f, %6f, %6f)\n", 3331 temps[i].xyzw[0].f[j], 3332 temps[i].xyzw[1].f[j], 3333 temps[i].xyzw[2].f[j], 3334 temps[i].xyzw[3].f[j]); 3335 } 3336 } 3337 } 3338 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 3339 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 3340 uint j; 3341 3342 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 3343 debug_printf("OUT[%2u] = ", i); 3344 for (j = 0; j < 4; j++) { 3345 if (j > 0) { 3346 debug_printf(" "); 3347 } 3348 debug_printf("{%6f, %6f, %6f, %6f}\n", 3349 outputs[i].xyzw[0].f[j], 3350 outputs[i].xyzw[1].f[j], 3351 outputs[i].xyzw[2].f[j], 3352 outputs[i].xyzw[3].f[j]); 3353 } 3354 } 3355 } 3356#endif 3357 } 3358 } 3359 3360#if 0 3361 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3362 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3363 /* 3364 * Scale back depth component. 3365 */ 3366 for (i = 0; i < 4; i++) 3367 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3368 } 3369#endif 3370 3371 assert(mach->CondStackTop == 0); 3372 assert(mach->LoopStackTop == 0); 3373 assert(mach->ContStackTop == 0); 3374 assert(mach->CallStackTop == 0); 3375 3376 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3377} 3378