tgsi_exec.c revision 8fa6c1ac9299402c1faf75b264cf70b1b83d1eff
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65#define TILE_TOP_LEFT 0 66#define TILE_TOP_RIGHT 1 67#define TILE_BOTTOM_LEFT 2 68#define TILE_BOTTOM_RIGHT 3 69 70#define CHAN_X 0 71#define CHAN_Y 1 72#define CHAN_Z 2 73#define CHAN_W 3 74 75/* 76 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 77 */ 78#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 79#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 80#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 81#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 82#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 83#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 84#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 85#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 86#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 87#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 88#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 89#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 90#define TEMP_128_I TGSI_EXEC_TEMP_128_I 91#define TEMP_128_C TGSI_EXEC_TEMP_128_C 92#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 93#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 94#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 95#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 96#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 97#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 98#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 99#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 100#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 101#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 102#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 103#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 104#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 105#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 106#define TEMP_R0 TGSI_EXEC_TEMP_R0 107 108#define IS_CHANNEL_ENABLED(INST, CHAN)\ 109 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 110 111#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 112 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 113 114#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 115 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 116 if (IS_CHANNEL_ENABLED( INST, CHAN )) 117 118#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 119 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 120 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 121 122 123/** The execution mask depends on the conditional mask and the loop mask */ 124#define UPDATE_EXEC_MASK(MACH) \ 125 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 126 127 128static const union tgsi_exec_channel ZeroVec = 129 { { 0.0, 0.0, 0.0, 0.0 } }; 130 131 132#ifdef DEBUG 133static void 134check_inf_or_nan(const union tgsi_exec_channel *chan) 135{ 136 assert(!util_is_inf_or_nan(chan->f[0])); 137 assert(!util_is_inf_or_nan(chan->f[1])); 138 assert(!util_is_inf_or_nan(chan->f[2])); 139 assert(!util_is_inf_or_nan(chan->f[3])); 140} 141#endif 142 143 144#ifdef DEBUG 145static void 146print_chan(const char *msg, const union tgsi_exec_channel *chan) 147{ 148 debug_printf("%s = {%f, %f, %f, %f}\n", 149 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 150} 151#endif 152 153 154#ifdef DEBUG 155static void 156print_temp(const struct tgsi_exec_machine *mach, uint index) 157{ 158 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 159 int i; 160 debug_printf("Temp[%u] =\n", index); 161 for (i = 0; i < 4; i++) { 162 debug_printf(" %c: { %f, %f, %f, %f }\n", 163 "XYZW"[i], 164 tmp->xyzw[i].f[0], 165 tmp->xyzw[i].f[1], 166 tmp->xyzw[i].f[2], 167 tmp->xyzw[i].f[3]); 168 } 169} 170#endif 171 172 173/** 174 * Check if there's a potential src/dst register data dependency when 175 * using SOA execution. 176 * Example: 177 * MOV T, T.yxwz; 178 * This would expand into: 179 * MOV t0, t1; 180 * MOV t1, t0; 181 * MOV t2, t3; 182 * MOV t3, t2; 183 * The second instruction will have the wrong value for t0 if executed as-is. 184 */ 185static boolean 186tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 187{ 188 uint i, chan; 189 190 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 191 if (writemask == TGSI_WRITEMASK_X || 192 writemask == TGSI_WRITEMASK_Y || 193 writemask == TGSI_WRITEMASK_Z || 194 writemask == TGSI_WRITEMASK_W || 195 writemask == TGSI_WRITEMASK_NONE) { 196 /* no chance of data dependency */ 197 return FALSE; 198 } 199 200 /* loop over src regs */ 201 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 202 if ((inst->FullSrcRegisters[i].SrcRegister.File == 203 inst->FullDstRegisters[0].DstRegister.File) && 204 (inst->FullSrcRegisters[i].SrcRegister.Index == 205 inst->FullDstRegisters[0].DstRegister.Index)) { 206 /* loop over dest channels */ 207 uint channelsWritten = 0x0; 208 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 209 /* check if we're reading a channel that's been written */ 210 uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan); 211 if (swizzle <= TGSI_SWIZZLE_W && 212 (channelsWritten & (1 << swizzle))) { 213 return TRUE; 214 } 215 216 channelsWritten |= (1 << chan); 217 } 218 } 219 } 220 return FALSE; 221} 222 223 224/** 225 * Initialize machine state by expanding tokens to full instructions, 226 * allocating temporary storage, setting up constants, etc. 227 * After this, we can call tgsi_exec_machine_run() many times. 228 */ 229void 230tgsi_exec_machine_bind_shader( 231 struct tgsi_exec_machine *mach, 232 const struct tgsi_token *tokens, 233 uint numSamplers, 234 struct tgsi_sampler **samplers) 235{ 236 uint k; 237 struct tgsi_parse_context parse; 238 struct tgsi_exec_labels *labels = &mach->Labels; 239 struct tgsi_full_instruction *instructions; 240 struct tgsi_full_declaration *declarations; 241 uint maxInstructions = 10, numInstructions = 0; 242 uint maxDeclarations = 10, numDeclarations = 0; 243 uint instno = 0; 244 245#if 0 246 tgsi_dump(tokens, 0); 247#endif 248 249 util_init_math(); 250 251 mach->Tokens = tokens; 252 mach->Samplers = samplers; 253 254 k = tgsi_parse_init (&parse, mach->Tokens); 255 if (k != TGSI_PARSE_OK) { 256 debug_printf( "Problem parsing!\n" ); 257 return; 258 } 259 260 mach->Processor = parse.FullHeader.Processor.Processor; 261 mach->ImmLimit = 0; 262 labels->count = 0; 263 264 declarations = (struct tgsi_full_declaration *) 265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 266 267 if (!declarations) { 268 return; 269 } 270 271 instructions = (struct tgsi_full_instruction *) 272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 273 274 if (!instructions) { 275 FREE( declarations ); 276 return; 277 } 278 279 while( !tgsi_parse_end_of_tokens( &parse ) ) { 280 uint pointer = parse.Position; 281 uint i; 282 283 tgsi_parse_token( &parse ); 284 switch( parse.FullToken.Token.Type ) { 285 case TGSI_TOKEN_TYPE_DECLARATION: 286 /* save expanded declaration */ 287 if (numDeclarations == maxDeclarations) { 288 declarations = REALLOC(declarations, 289 maxDeclarations 290 * sizeof(struct tgsi_full_declaration), 291 (maxDeclarations + 10) 292 * sizeof(struct tgsi_full_declaration)); 293 maxDeclarations += 10; 294 } 295 memcpy(declarations + numDeclarations, 296 &parse.FullToken.FullDeclaration, 297 sizeof(declarations[0])); 298 numDeclarations++; 299 break; 300 301 case TGSI_TOKEN_TYPE_IMMEDIATE: 302 { 303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 304 assert( size % 4 == 0 ); 305 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); 306 307 for( i = 0; i < size; i++ ) { 308 mach->Imms[mach->ImmLimit + i / 4][i % 4] = 309 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 310 } 311 mach->ImmLimit += size / 4; 312 } 313 break; 314 315 case TGSI_TOKEN_TYPE_INSTRUCTION: 316 assert( labels->count < MAX_LABELS ); 317 318 labels->labels[labels->count][0] = instno; 319 labels->labels[labels->count][1] = pointer; 320 labels->count++; 321 322 /* save expanded instruction */ 323 if (numInstructions == maxInstructions) { 324 instructions = REALLOC(instructions, 325 maxInstructions 326 * sizeof(struct tgsi_full_instruction), 327 (maxInstructions + 10) 328 * sizeof(struct tgsi_full_instruction)); 329 maxInstructions += 10; 330 } 331 memcpy(instructions + numInstructions, 332 &parse.FullToken.FullInstruction, 333 sizeof(instructions[0])); 334 335#if 0 336 if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { 337 debug_printf("SOA dependency in instruction:\n"); 338 tgsi_dump_instruction(&parse.FullToken.FullInstruction, 339 numInstructions); 340 } 341#else 342 (void) tgsi_check_soa_dependencies; 343#endif 344 345 numInstructions++; 346 break; 347 348 default: 349 assert( 0 ); 350 } 351 } 352 tgsi_parse_free (&parse); 353 354 if (mach->Declarations) { 355 FREE( mach->Declarations ); 356 } 357 mach->Declarations = declarations; 358 mach->NumDeclarations = numDeclarations; 359 360 if (mach->Instructions) { 361 FREE( mach->Instructions ); 362 } 363 mach->Instructions = instructions; 364 mach->NumInstructions = numInstructions; 365} 366 367 368void 369tgsi_exec_machine_init( 370 struct tgsi_exec_machine *mach ) 371{ 372 uint i; 373 374 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); 375 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 376 377 /* Setup constants. */ 378 for( i = 0; i < 4; i++ ) { 379 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 380 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 381 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 382 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 383 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 384 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 385 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 386 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 387 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 388 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 389 } 390 391#ifdef DEBUG 392 /* silence warnings */ 393 (void) print_chan; 394 (void) print_temp; 395#endif 396} 397 398 399void 400tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) 401{ 402 if (mach->Instructions) { 403 FREE(mach->Instructions); 404 mach->Instructions = NULL; 405 mach->NumInstructions = 0; 406 } 407 if (mach->Declarations) { 408 FREE(mach->Declarations); 409 mach->Declarations = NULL; 410 mach->NumDeclarations = 0; 411 } 412} 413 414 415static void 416micro_abs( 417 union tgsi_exec_channel *dst, 418 const union tgsi_exec_channel *src ) 419{ 420 dst->f[0] = fabsf( src->f[0] ); 421 dst->f[1] = fabsf( src->f[1] ); 422 dst->f[2] = fabsf( src->f[2] ); 423 dst->f[3] = fabsf( src->f[3] ); 424} 425 426static void 427micro_add( 428 union tgsi_exec_channel *dst, 429 const union tgsi_exec_channel *src0, 430 const union tgsi_exec_channel *src1 ) 431{ 432 dst->f[0] = src0->f[0] + src1->f[0]; 433 dst->f[1] = src0->f[1] + src1->f[1]; 434 dst->f[2] = src0->f[2] + src1->f[2]; 435 dst->f[3] = src0->f[3] + src1->f[3]; 436} 437 438#if 0 439static void 440micro_iadd( 441 union tgsi_exec_channel *dst, 442 const union tgsi_exec_channel *src0, 443 const union tgsi_exec_channel *src1 ) 444{ 445 dst->i[0] = src0->i[0] + src1->i[0]; 446 dst->i[1] = src0->i[1] + src1->i[1]; 447 dst->i[2] = src0->i[2] + src1->i[2]; 448 dst->i[3] = src0->i[3] + src1->i[3]; 449} 450#endif 451 452static void 453micro_and( 454 union tgsi_exec_channel *dst, 455 const union tgsi_exec_channel *src0, 456 const union tgsi_exec_channel *src1 ) 457{ 458 dst->u[0] = src0->u[0] & src1->u[0]; 459 dst->u[1] = src0->u[1] & src1->u[1]; 460 dst->u[2] = src0->u[2] & src1->u[2]; 461 dst->u[3] = src0->u[3] & src1->u[3]; 462} 463 464static void 465micro_ceil( 466 union tgsi_exec_channel *dst, 467 const union tgsi_exec_channel *src ) 468{ 469 dst->f[0] = ceilf( src->f[0] ); 470 dst->f[1] = ceilf( src->f[1] ); 471 dst->f[2] = ceilf( src->f[2] ); 472 dst->f[3] = ceilf( src->f[3] ); 473} 474 475static void 476micro_cos( 477 union tgsi_exec_channel *dst, 478 const union tgsi_exec_channel *src ) 479{ 480 dst->f[0] = cosf( src->f[0] ); 481 dst->f[1] = cosf( src->f[1] ); 482 dst->f[2] = cosf( src->f[2] ); 483 dst->f[3] = cosf( src->f[3] ); 484} 485 486static void 487micro_ddx( 488 union tgsi_exec_channel *dst, 489 const union tgsi_exec_channel *src ) 490{ 491 dst->f[0] = 492 dst->f[1] = 493 dst->f[2] = 494 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 495} 496 497static void 498micro_ddy( 499 union tgsi_exec_channel *dst, 500 const union tgsi_exec_channel *src ) 501{ 502 dst->f[0] = 503 dst->f[1] = 504 dst->f[2] = 505 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 506} 507 508static void 509micro_div( 510 union tgsi_exec_channel *dst, 511 const union tgsi_exec_channel *src0, 512 const union tgsi_exec_channel *src1 ) 513{ 514 if (src1->f[0] != 0) { 515 dst->f[0] = src0->f[0] / src1->f[0]; 516 } 517 if (src1->f[1] != 0) { 518 dst->f[1] = src0->f[1] / src1->f[1]; 519 } 520 if (src1->f[2] != 0) { 521 dst->f[2] = src0->f[2] / src1->f[2]; 522 } 523 if (src1->f[3] != 0) { 524 dst->f[3] = src0->f[3] / src1->f[3]; 525 } 526} 527 528#if 0 529static void 530micro_udiv( 531 union tgsi_exec_channel *dst, 532 const union tgsi_exec_channel *src0, 533 const union tgsi_exec_channel *src1 ) 534{ 535 dst->u[0] = src0->u[0] / src1->u[0]; 536 dst->u[1] = src0->u[1] / src1->u[1]; 537 dst->u[2] = src0->u[2] / src1->u[2]; 538 dst->u[3] = src0->u[3] / src1->u[3]; 539} 540#endif 541 542static void 543micro_eq( 544 union tgsi_exec_channel *dst, 545 const union tgsi_exec_channel *src0, 546 const union tgsi_exec_channel *src1, 547 const union tgsi_exec_channel *src2, 548 const union tgsi_exec_channel *src3 ) 549{ 550 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 551 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 552 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 553 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 554} 555 556#if 0 557static void 558micro_ieq( 559 union tgsi_exec_channel *dst, 560 const union tgsi_exec_channel *src0, 561 const union tgsi_exec_channel *src1, 562 const union tgsi_exec_channel *src2, 563 const union tgsi_exec_channel *src3 ) 564{ 565 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 566 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 567 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 568 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 569} 570#endif 571 572static void 573micro_exp2( 574 union tgsi_exec_channel *dst, 575 const union tgsi_exec_channel *src) 576{ 577#if FAST_MATH 578 dst->f[0] = util_fast_exp2( src->f[0] ); 579 dst->f[1] = util_fast_exp2( src->f[1] ); 580 dst->f[2] = util_fast_exp2( src->f[2] ); 581 dst->f[3] = util_fast_exp2( src->f[3] ); 582#else 583 dst->f[0] = powf( 2.0f, src->f[0] ); 584 dst->f[1] = powf( 2.0f, src->f[1] ); 585 dst->f[2] = powf( 2.0f, src->f[2] ); 586 dst->f[3] = powf( 2.0f, src->f[3] ); 587#endif 588} 589 590#if 0 591static void 592micro_f2ut( 593 union tgsi_exec_channel *dst, 594 const union tgsi_exec_channel *src ) 595{ 596 dst->u[0] = (uint) src->f[0]; 597 dst->u[1] = (uint) src->f[1]; 598 dst->u[2] = (uint) src->f[2]; 599 dst->u[3] = (uint) src->f[3]; 600} 601#endif 602 603static void 604micro_float_clamp(union tgsi_exec_channel *dst, 605 const union tgsi_exec_channel *src) 606{ 607 uint i; 608 609 for (i = 0; i < 4; i++) { 610 if (src->f[i] > 0.0f) { 611 if (src->f[i] > 1.884467e+019f) 612 dst->f[i] = 1.884467e+019f; 613 else if (src->f[i] < 5.42101e-020f) 614 dst->f[i] = 5.42101e-020f; 615 else 616 dst->f[i] = src->f[i]; 617 } 618 else { 619 if (src->f[i] < -1.884467e+019f) 620 dst->f[i] = -1.884467e+019f; 621 else if (src->f[i] > -5.42101e-020f) 622 dst->f[i] = -5.42101e-020f; 623 else 624 dst->f[i] = src->f[i]; 625 } 626 } 627} 628 629static void 630micro_flr( 631 union tgsi_exec_channel *dst, 632 const union tgsi_exec_channel *src ) 633{ 634 dst->f[0] = floorf( src->f[0] ); 635 dst->f[1] = floorf( src->f[1] ); 636 dst->f[2] = floorf( src->f[2] ); 637 dst->f[3] = floorf( src->f[3] ); 638} 639 640static void 641micro_frc( 642 union tgsi_exec_channel *dst, 643 const union tgsi_exec_channel *src ) 644{ 645 dst->f[0] = src->f[0] - floorf( src->f[0] ); 646 dst->f[1] = src->f[1] - floorf( src->f[1] ); 647 dst->f[2] = src->f[2] - floorf( src->f[2] ); 648 dst->f[3] = src->f[3] - floorf( src->f[3] ); 649} 650 651static void 652micro_i2f( 653 union tgsi_exec_channel *dst, 654 const union tgsi_exec_channel *src ) 655{ 656 dst->f[0] = (float) src->i[0]; 657 dst->f[1] = (float) src->i[1]; 658 dst->f[2] = (float) src->i[2]; 659 dst->f[3] = (float) src->i[3]; 660} 661 662static void 663micro_lg2( 664 union tgsi_exec_channel *dst, 665 const union tgsi_exec_channel *src ) 666{ 667#if FAST_MATH 668 dst->f[0] = util_fast_log2( src->f[0] ); 669 dst->f[1] = util_fast_log2( src->f[1] ); 670 dst->f[2] = util_fast_log2( src->f[2] ); 671 dst->f[3] = util_fast_log2( src->f[3] ); 672#else 673 dst->f[0] = logf( src->f[0] ) * 1.442695f; 674 dst->f[1] = logf( src->f[1] ) * 1.442695f; 675 dst->f[2] = logf( src->f[2] ) * 1.442695f; 676 dst->f[3] = logf( src->f[3] ) * 1.442695f; 677#endif 678} 679 680static void 681micro_le( 682 union tgsi_exec_channel *dst, 683 const union tgsi_exec_channel *src0, 684 const union tgsi_exec_channel *src1, 685 const union tgsi_exec_channel *src2, 686 const union tgsi_exec_channel *src3 ) 687{ 688 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 689 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 690 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 691 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 692} 693 694static void 695micro_lt( 696 union tgsi_exec_channel *dst, 697 const union tgsi_exec_channel *src0, 698 const union tgsi_exec_channel *src1, 699 const union tgsi_exec_channel *src2, 700 const union tgsi_exec_channel *src3 ) 701{ 702 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 703 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 704 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 705 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 706} 707 708#if 0 709static void 710micro_ilt( 711 union tgsi_exec_channel *dst, 712 const union tgsi_exec_channel *src0, 713 const union tgsi_exec_channel *src1, 714 const union tgsi_exec_channel *src2, 715 const union tgsi_exec_channel *src3 ) 716{ 717 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 718 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 719 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 720 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 721} 722#endif 723 724#if 0 725static void 726micro_ult( 727 union tgsi_exec_channel *dst, 728 const union tgsi_exec_channel *src0, 729 const union tgsi_exec_channel *src1, 730 const union tgsi_exec_channel *src2, 731 const union tgsi_exec_channel *src3 ) 732{ 733 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 734 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 735 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 736 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 737} 738#endif 739 740static void 741micro_max( 742 union tgsi_exec_channel *dst, 743 const union tgsi_exec_channel *src0, 744 const union tgsi_exec_channel *src1 ) 745{ 746 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 747 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 748 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 749 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 750} 751 752#if 0 753static void 754micro_imax( 755 union tgsi_exec_channel *dst, 756 const union tgsi_exec_channel *src0, 757 const union tgsi_exec_channel *src1 ) 758{ 759 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 760 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 761 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 762 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 763} 764#endif 765 766#if 0 767static void 768micro_umax( 769 union tgsi_exec_channel *dst, 770 const union tgsi_exec_channel *src0, 771 const union tgsi_exec_channel *src1 ) 772{ 773 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 774 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 775 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 776 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 777} 778#endif 779 780static void 781micro_min( 782 union tgsi_exec_channel *dst, 783 const union tgsi_exec_channel *src0, 784 const union tgsi_exec_channel *src1 ) 785{ 786 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 787 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 788 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 789 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 790} 791 792#if 0 793static void 794micro_imin( 795 union tgsi_exec_channel *dst, 796 const union tgsi_exec_channel *src0, 797 const union tgsi_exec_channel *src1 ) 798{ 799 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 800 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 801 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 802 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 803} 804#endif 805 806#if 0 807static void 808micro_umin( 809 union tgsi_exec_channel *dst, 810 const union tgsi_exec_channel *src0, 811 const union tgsi_exec_channel *src1 ) 812{ 813 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 814 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 815 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 816 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 817} 818#endif 819 820#if 0 821static void 822micro_umod( 823 union tgsi_exec_channel *dst, 824 const union tgsi_exec_channel *src0, 825 const union tgsi_exec_channel *src1 ) 826{ 827 dst->u[0] = src0->u[0] % src1->u[0]; 828 dst->u[1] = src0->u[1] % src1->u[1]; 829 dst->u[2] = src0->u[2] % src1->u[2]; 830 dst->u[3] = src0->u[3] % src1->u[3]; 831} 832#endif 833 834static void 835micro_mul( 836 union tgsi_exec_channel *dst, 837 const union tgsi_exec_channel *src0, 838 const union tgsi_exec_channel *src1 ) 839{ 840 dst->f[0] = src0->f[0] * src1->f[0]; 841 dst->f[1] = src0->f[1] * src1->f[1]; 842 dst->f[2] = src0->f[2] * src1->f[2]; 843 dst->f[3] = src0->f[3] * src1->f[3]; 844} 845 846#if 0 847static void 848micro_imul( 849 union tgsi_exec_channel *dst, 850 const union tgsi_exec_channel *src0, 851 const union tgsi_exec_channel *src1 ) 852{ 853 dst->i[0] = src0->i[0] * src1->i[0]; 854 dst->i[1] = src0->i[1] * src1->i[1]; 855 dst->i[2] = src0->i[2] * src1->i[2]; 856 dst->i[3] = src0->i[3] * src1->i[3]; 857} 858#endif 859 860#if 0 861static void 862micro_imul64( 863 union tgsi_exec_channel *dst0, 864 union tgsi_exec_channel *dst1, 865 const union tgsi_exec_channel *src0, 866 const union tgsi_exec_channel *src1 ) 867{ 868 dst1->i[0] = src0->i[0] * src1->i[0]; 869 dst1->i[1] = src0->i[1] * src1->i[1]; 870 dst1->i[2] = src0->i[2] * src1->i[2]; 871 dst1->i[3] = src0->i[3] * src1->i[3]; 872 dst0->i[0] = 0; 873 dst0->i[1] = 0; 874 dst0->i[2] = 0; 875 dst0->i[3] = 0; 876} 877#endif 878 879#if 0 880static void 881micro_umul64( 882 union tgsi_exec_channel *dst0, 883 union tgsi_exec_channel *dst1, 884 const union tgsi_exec_channel *src0, 885 const union tgsi_exec_channel *src1 ) 886{ 887 dst1->u[0] = src0->u[0] * src1->u[0]; 888 dst1->u[1] = src0->u[1] * src1->u[1]; 889 dst1->u[2] = src0->u[2] * src1->u[2]; 890 dst1->u[3] = src0->u[3] * src1->u[3]; 891 dst0->u[0] = 0; 892 dst0->u[1] = 0; 893 dst0->u[2] = 0; 894 dst0->u[3] = 0; 895} 896#endif 897 898 899#if 0 900static void 901micro_movc( 902 union tgsi_exec_channel *dst, 903 const union tgsi_exec_channel *src0, 904 const union tgsi_exec_channel *src1, 905 const union tgsi_exec_channel *src2 ) 906{ 907 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 908 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 909 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 910 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 911} 912#endif 913 914static void 915micro_neg( 916 union tgsi_exec_channel *dst, 917 const union tgsi_exec_channel *src ) 918{ 919 dst->f[0] = -src->f[0]; 920 dst->f[1] = -src->f[1]; 921 dst->f[2] = -src->f[2]; 922 dst->f[3] = -src->f[3]; 923} 924 925#if 0 926static void 927micro_ineg( 928 union tgsi_exec_channel *dst, 929 const union tgsi_exec_channel *src ) 930{ 931 dst->i[0] = -src->i[0]; 932 dst->i[1] = -src->i[1]; 933 dst->i[2] = -src->i[2]; 934 dst->i[3] = -src->i[3]; 935} 936#endif 937 938static void 939micro_not( 940 union tgsi_exec_channel *dst, 941 const union tgsi_exec_channel *src ) 942{ 943 dst->u[0] = ~src->u[0]; 944 dst->u[1] = ~src->u[1]; 945 dst->u[2] = ~src->u[2]; 946 dst->u[3] = ~src->u[3]; 947} 948 949static void 950micro_or( 951 union tgsi_exec_channel *dst, 952 const union tgsi_exec_channel *src0, 953 const union tgsi_exec_channel *src1 ) 954{ 955 dst->u[0] = src0->u[0] | src1->u[0]; 956 dst->u[1] = src0->u[1] | src1->u[1]; 957 dst->u[2] = src0->u[2] | src1->u[2]; 958 dst->u[3] = src0->u[3] | src1->u[3]; 959} 960 961static void 962micro_pow( 963 union tgsi_exec_channel *dst, 964 const union tgsi_exec_channel *src0, 965 const union tgsi_exec_channel *src1 ) 966{ 967#if FAST_MATH 968 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 969 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 970 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 971 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 972#else 973 dst->f[0] = powf( src0->f[0], src1->f[0] ); 974 dst->f[1] = powf( src0->f[1], src1->f[1] ); 975 dst->f[2] = powf( src0->f[2], src1->f[2] ); 976 dst->f[3] = powf( src0->f[3], src1->f[3] ); 977#endif 978} 979 980static void 981micro_rnd( 982 union tgsi_exec_channel *dst, 983 const union tgsi_exec_channel *src ) 984{ 985 dst->f[0] = floorf( src->f[0] + 0.5f ); 986 dst->f[1] = floorf( src->f[1] + 0.5f ); 987 dst->f[2] = floorf( src->f[2] + 0.5f ); 988 dst->f[3] = floorf( src->f[3] + 0.5f ); 989} 990 991static void 992micro_sgn( 993 union tgsi_exec_channel *dst, 994 const union tgsi_exec_channel *src ) 995{ 996 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 997 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 998 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 999 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1000} 1001 1002static void 1003micro_shl( 1004 union tgsi_exec_channel *dst, 1005 const union tgsi_exec_channel *src0, 1006 const union tgsi_exec_channel *src1 ) 1007{ 1008 dst->i[0] = src0->i[0] << src1->i[0]; 1009 dst->i[1] = src0->i[1] << src1->i[1]; 1010 dst->i[2] = src0->i[2] << src1->i[2]; 1011 dst->i[3] = src0->i[3] << src1->i[3]; 1012} 1013 1014static void 1015micro_ishr( 1016 union tgsi_exec_channel *dst, 1017 const union tgsi_exec_channel *src0, 1018 const union tgsi_exec_channel *src1 ) 1019{ 1020 dst->i[0] = src0->i[0] >> src1->i[0]; 1021 dst->i[1] = src0->i[1] >> src1->i[1]; 1022 dst->i[2] = src0->i[2] >> src1->i[2]; 1023 dst->i[3] = src0->i[3] >> src1->i[3]; 1024} 1025 1026static void 1027micro_trunc( 1028 union tgsi_exec_channel *dst, 1029 const union tgsi_exec_channel *src0 ) 1030{ 1031 dst->f[0] = (float) (int) src0->f[0]; 1032 dst->f[1] = (float) (int) src0->f[1]; 1033 dst->f[2] = (float) (int) src0->f[2]; 1034 dst->f[3] = (float) (int) src0->f[3]; 1035} 1036 1037#if 0 1038static void 1039micro_ushr( 1040 union tgsi_exec_channel *dst, 1041 const union tgsi_exec_channel *src0, 1042 const union tgsi_exec_channel *src1 ) 1043{ 1044 dst->u[0] = src0->u[0] >> src1->u[0]; 1045 dst->u[1] = src0->u[1] >> src1->u[1]; 1046 dst->u[2] = src0->u[2] >> src1->u[2]; 1047 dst->u[3] = src0->u[3] >> src1->u[3]; 1048} 1049#endif 1050 1051static void 1052micro_sin( 1053 union tgsi_exec_channel *dst, 1054 const union tgsi_exec_channel *src ) 1055{ 1056 dst->f[0] = sinf( src->f[0] ); 1057 dst->f[1] = sinf( src->f[1] ); 1058 dst->f[2] = sinf( src->f[2] ); 1059 dst->f[3] = sinf( src->f[3] ); 1060} 1061 1062static void 1063micro_sqrt( union tgsi_exec_channel *dst, 1064 const union tgsi_exec_channel *src ) 1065{ 1066 dst->f[0] = sqrtf( src->f[0] ); 1067 dst->f[1] = sqrtf( src->f[1] ); 1068 dst->f[2] = sqrtf( src->f[2] ); 1069 dst->f[3] = sqrtf( src->f[3] ); 1070} 1071 1072static void 1073micro_sub( 1074 union tgsi_exec_channel *dst, 1075 const union tgsi_exec_channel *src0, 1076 const union tgsi_exec_channel *src1 ) 1077{ 1078 dst->f[0] = src0->f[0] - src1->f[0]; 1079 dst->f[1] = src0->f[1] - src1->f[1]; 1080 dst->f[2] = src0->f[2] - src1->f[2]; 1081 dst->f[3] = src0->f[3] - src1->f[3]; 1082} 1083 1084#if 0 1085static void 1086micro_u2f( 1087 union tgsi_exec_channel *dst, 1088 const union tgsi_exec_channel *src ) 1089{ 1090 dst->f[0] = (float) src->u[0]; 1091 dst->f[1] = (float) src->u[1]; 1092 dst->f[2] = (float) src->u[2]; 1093 dst->f[3] = (float) src->u[3]; 1094} 1095#endif 1096 1097static void 1098micro_xor( 1099 union tgsi_exec_channel *dst, 1100 const union tgsi_exec_channel *src0, 1101 const union tgsi_exec_channel *src1 ) 1102{ 1103 dst->u[0] = src0->u[0] ^ src1->u[0]; 1104 dst->u[1] = src0->u[1] ^ src1->u[1]; 1105 dst->u[2] = src0->u[2] ^ src1->u[2]; 1106 dst->u[3] = src0->u[3] ^ src1->u[3]; 1107} 1108 1109static void 1110fetch_src_file_channel( 1111 const struct tgsi_exec_machine *mach, 1112 const uint file, 1113 const uint swizzle, 1114 const union tgsi_exec_channel *index, 1115 union tgsi_exec_channel *chan ) 1116{ 1117 switch( swizzle ) { 1118 case TGSI_EXTSWIZZLE_X: 1119 case TGSI_EXTSWIZZLE_Y: 1120 case TGSI_EXTSWIZZLE_Z: 1121 case TGSI_EXTSWIZZLE_W: 1122 switch( file ) { 1123 case TGSI_FILE_CONSTANT: 1124 assert(mach->Consts); 1125 if (index->i[0] < 0) 1126 chan->f[0] = 0.0f; 1127 else 1128 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1129 if (index->i[1] < 0) 1130 chan->f[1] = 0.0f; 1131 else 1132 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1133 if (index->i[2] < 0) 1134 chan->f[2] = 0.0f; 1135 else 1136 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1137 if (index->i[3] < 0) 1138 chan->f[3] = 0.0f; 1139 else 1140 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1141 break; 1142 1143 case TGSI_FILE_INPUT: 1144 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1145 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1146 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1147 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1148 break; 1149 1150 case TGSI_FILE_TEMPORARY: 1151 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1152 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1153 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1154 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1155 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1156 break; 1157 1158 case TGSI_FILE_IMMEDIATE: 1159 assert( index->i[0] < (int) mach->ImmLimit ); 1160 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1161 assert( index->i[1] < (int) mach->ImmLimit ); 1162 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1163 assert( index->i[2] < (int) mach->ImmLimit ); 1164 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1165 assert( index->i[3] < (int) mach->ImmLimit ); 1166 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1167 break; 1168 1169 case TGSI_FILE_ADDRESS: 1170 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1171 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1172 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1173 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1174 break; 1175 1176 case TGSI_FILE_OUTPUT: 1177 /* vertex/fragment output vars can be read too */ 1178 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1179 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1180 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1181 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1182 break; 1183 1184 default: 1185 assert( 0 ); 1186 } 1187 break; 1188 1189 case TGSI_EXTSWIZZLE_ZERO: 1190 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1191 break; 1192 1193 case TGSI_EXTSWIZZLE_ONE: 1194 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1195 break; 1196 1197 default: 1198 assert( 0 ); 1199 } 1200} 1201 1202static void 1203fetch_source( 1204 const struct tgsi_exec_machine *mach, 1205 union tgsi_exec_channel *chan, 1206 const struct tgsi_full_src_register *reg, 1207 const uint chan_index ) 1208{ 1209 union tgsi_exec_channel index; 1210 uint swizzle; 1211 1212 /* We start with a direct index into a register file. 1213 * 1214 * file[1], 1215 * where: 1216 * file = SrcRegister.File 1217 * [1] = SrcRegister.Index 1218 */ 1219 index.i[0] = 1220 index.i[1] = 1221 index.i[2] = 1222 index.i[3] = reg->SrcRegister.Index; 1223 1224 /* There is an extra source register that indirectly subscripts 1225 * a register file. The direct index now becomes an offset 1226 * that is being added to the indirect register. 1227 * 1228 * file[ind[2].x+1], 1229 * where: 1230 * ind = SrcRegisterInd.File 1231 * [2] = SrcRegisterInd.Index 1232 * .x = SrcRegisterInd.SwizzleX 1233 */ 1234 if (reg->SrcRegister.Indirect) { 1235 union tgsi_exec_channel index2; 1236 union tgsi_exec_channel indir_index; 1237 const uint execmask = mach->ExecMask; 1238 uint i; 1239 1240 /* which address register (always zero now) */ 1241 index2.i[0] = 1242 index2.i[1] = 1243 index2.i[2] = 1244 index2.i[3] = reg->SrcRegisterInd.Index; 1245 1246 /* get current value of address register[swizzle] */ 1247 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1248 fetch_src_file_channel( 1249 mach, 1250 reg->SrcRegisterInd.File, 1251 swizzle, 1252 &index2, 1253 &indir_index ); 1254 1255 /* add value of address register to the offset */ 1256 index.i[0] += (int) indir_index.f[0]; 1257 index.i[1] += (int) indir_index.f[1]; 1258 index.i[2] += (int) indir_index.f[2]; 1259 index.i[3] += (int) indir_index.f[3]; 1260 1261 /* for disabled execution channels, zero-out the index to 1262 * avoid using a potential garbage value. 1263 */ 1264 for (i = 0; i < QUAD_SIZE; i++) { 1265 if ((execmask & (1 << i)) == 0) 1266 index.i[i] = 0; 1267 } 1268 } 1269 1270 /* There is an extra source register that is a second 1271 * subscript to a register file. Effectively it means that 1272 * the register file is actually a 2D array of registers. 1273 * 1274 * file[1][3] == file[1*sizeof(file[1])+3], 1275 * where: 1276 * [3] = SrcRegisterDim.Index 1277 */ 1278 if (reg->SrcRegister.Dimension) { 1279 /* The size of the first-order array depends on the register file type. 1280 * We need to multiply the index to the first array to get an effective, 1281 * "flat" index that points to the beginning of the second-order array. 1282 */ 1283 switch (reg->SrcRegister.File) { 1284 case TGSI_FILE_INPUT: 1285 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1286 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1287 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1288 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1289 break; 1290 case TGSI_FILE_CONSTANT: 1291 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1292 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1293 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1294 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1295 break; 1296 default: 1297 assert( 0 ); 1298 } 1299 1300 index.i[0] += reg->SrcRegisterDim.Index; 1301 index.i[1] += reg->SrcRegisterDim.Index; 1302 index.i[2] += reg->SrcRegisterDim.Index; 1303 index.i[3] += reg->SrcRegisterDim.Index; 1304 1305 /* Again, the second subscript index can be addressed indirectly 1306 * identically to the first one. 1307 * Nothing stops us from indirectly addressing the indirect register, 1308 * but there is no need for that, so we won't exercise it. 1309 * 1310 * file[1][ind[4].y+3], 1311 * where: 1312 * ind = SrcRegisterDimInd.File 1313 * [4] = SrcRegisterDimInd.Index 1314 * .y = SrcRegisterDimInd.SwizzleX 1315 */ 1316 if (reg->SrcRegisterDim.Indirect) { 1317 union tgsi_exec_channel index2; 1318 union tgsi_exec_channel indir_index; 1319 const uint execmask = mach->ExecMask; 1320 uint i; 1321 1322 index2.i[0] = 1323 index2.i[1] = 1324 index2.i[2] = 1325 index2.i[3] = reg->SrcRegisterDimInd.Index; 1326 1327 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1328 fetch_src_file_channel( 1329 mach, 1330 reg->SrcRegisterDimInd.File, 1331 swizzle, 1332 &index2, 1333 &indir_index ); 1334 1335 index.i[0] += (int) indir_index.f[0]; 1336 index.i[1] += (int) indir_index.f[1]; 1337 index.i[2] += (int) indir_index.f[2]; 1338 index.i[3] += (int) indir_index.f[3]; 1339 1340 /* for disabled execution channels, zero-out the index to 1341 * avoid using a potential garbage value. 1342 */ 1343 for (i = 0; i < QUAD_SIZE; i++) { 1344 if ((execmask & (1 << i)) == 0) 1345 index.i[i] = 0; 1346 } 1347 } 1348 1349 /* If by any chance there was a need for a 3D array of register 1350 * files, we would have to check whether SrcRegisterDim is followed 1351 * by a dimension register and continue the saga. 1352 */ 1353 } 1354 1355 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1356 fetch_src_file_channel( 1357 mach, 1358 reg->SrcRegister.File, 1359 swizzle, 1360 &index, 1361 chan ); 1362 1363 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1364 case TGSI_UTIL_SIGN_CLEAR: 1365 micro_abs( chan, chan ); 1366 break; 1367 1368 case TGSI_UTIL_SIGN_SET: 1369 micro_abs( chan, chan ); 1370 micro_neg( chan, chan ); 1371 break; 1372 1373 case TGSI_UTIL_SIGN_TOGGLE: 1374 micro_neg( chan, chan ); 1375 break; 1376 1377 case TGSI_UTIL_SIGN_KEEP: 1378 break; 1379 } 1380 1381 if (reg->SrcRegisterExtMod.Complement) { 1382 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1383 } 1384} 1385 1386static void 1387store_dest( 1388 struct tgsi_exec_machine *mach, 1389 const union tgsi_exec_channel *chan, 1390 const struct tgsi_full_dst_register *reg, 1391 const struct tgsi_full_instruction *inst, 1392 uint chan_index ) 1393{ 1394 uint i; 1395 union tgsi_exec_channel null; 1396 union tgsi_exec_channel *dst; 1397 uint execmask = mach->ExecMask; 1398 1399#ifdef DEBUG 1400 check_inf_or_nan(chan); 1401#endif 1402 1403 switch (reg->DstRegister.File) { 1404 case TGSI_FILE_NULL: 1405 dst = &null; 1406 break; 1407 1408 case TGSI_FILE_OUTPUT: 1409 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1410 + reg->DstRegister.Index].xyzw[chan_index]; 1411 break; 1412 1413 case TGSI_FILE_TEMPORARY: 1414 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); 1415 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 1416 break; 1417 1418 case TGSI_FILE_ADDRESS: 1419 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 1420 break; 1421 1422 default: 1423 assert( 0 ); 1424 return; 1425 } 1426 1427 if (inst->InstructionExtNv.CondFlowEnable) { 1428 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1429 uint swizzle; 1430 uint shift; 1431 uint mask; 1432 uint test; 1433 1434 /* Only CC0 supported. 1435 */ 1436 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1437 1438 switch (chan_index) { 1439 case CHAN_X: 1440 swizzle = inst->InstructionExtNv.CondSwizzleX; 1441 break; 1442 case CHAN_Y: 1443 swizzle = inst->InstructionExtNv.CondSwizzleY; 1444 break; 1445 case CHAN_Z: 1446 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1447 break; 1448 case CHAN_W: 1449 swizzle = inst->InstructionExtNv.CondSwizzleW; 1450 break; 1451 default: 1452 assert( 0 ); 1453 return; 1454 } 1455 1456 switch (swizzle) { 1457 case TGSI_SWIZZLE_X: 1458 shift = TGSI_EXEC_CC_X_SHIFT; 1459 mask = TGSI_EXEC_CC_X_MASK; 1460 break; 1461 case TGSI_SWIZZLE_Y: 1462 shift = TGSI_EXEC_CC_Y_SHIFT; 1463 mask = TGSI_EXEC_CC_Y_MASK; 1464 break; 1465 case TGSI_SWIZZLE_Z: 1466 shift = TGSI_EXEC_CC_Z_SHIFT; 1467 mask = TGSI_EXEC_CC_Z_MASK; 1468 break; 1469 case TGSI_SWIZZLE_W: 1470 shift = TGSI_EXEC_CC_W_SHIFT; 1471 mask = TGSI_EXEC_CC_W_MASK; 1472 break; 1473 default: 1474 assert( 0 ); 1475 return; 1476 } 1477 1478 switch (inst->InstructionExtNv.CondMask) { 1479 case TGSI_CC_GT: 1480 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1481 for (i = 0; i < QUAD_SIZE; i++) 1482 if (cc->u[i] & test) 1483 execmask &= ~(1 << i); 1484 break; 1485 1486 case TGSI_CC_EQ: 1487 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1488 for (i = 0; i < QUAD_SIZE; i++) 1489 if (cc->u[i] & test) 1490 execmask &= ~(1 << i); 1491 break; 1492 1493 case TGSI_CC_LT: 1494 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1495 for (i = 0; i < QUAD_SIZE; i++) 1496 if (cc->u[i] & test) 1497 execmask &= ~(1 << i); 1498 break; 1499 1500 case TGSI_CC_GE: 1501 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1502 for (i = 0; i < QUAD_SIZE; i++) 1503 if (cc->u[i] & test) 1504 execmask &= ~(1 << i); 1505 break; 1506 1507 case TGSI_CC_LE: 1508 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1509 for (i = 0; i < QUAD_SIZE; i++) 1510 if (cc->u[i] & test) 1511 execmask &= ~(1 << i); 1512 break; 1513 1514 case TGSI_CC_NE: 1515 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1516 for (i = 0; i < QUAD_SIZE; i++) 1517 if (cc->u[i] & test) 1518 execmask &= ~(1 << i); 1519 break; 1520 1521 case TGSI_CC_TR: 1522 break; 1523 1524 case TGSI_CC_FL: 1525 for (i = 0; i < QUAD_SIZE; i++) 1526 execmask &= ~(1 << i); 1527 break; 1528 1529 default: 1530 assert( 0 ); 1531 return; 1532 } 1533 } 1534 1535 switch (inst->Instruction.Saturate) { 1536 case TGSI_SAT_NONE: 1537 for (i = 0; i < QUAD_SIZE; i++) 1538 if (execmask & (1 << i)) 1539 dst->i[i] = chan->i[i]; 1540 break; 1541 1542 case TGSI_SAT_ZERO_ONE: 1543 for (i = 0; i < QUAD_SIZE; i++) 1544 if (execmask & (1 << i)) { 1545 if (chan->f[i] < 0.0f) 1546 dst->f[i] = 0.0f; 1547 else if (chan->f[i] > 1.0f) 1548 dst->f[i] = 1.0f; 1549 else 1550 dst->i[i] = chan->i[i]; 1551 } 1552 break; 1553 1554 case TGSI_SAT_MINUS_PLUS_ONE: 1555 for (i = 0; i < QUAD_SIZE; i++) 1556 if (execmask & (1 << i)) { 1557 if (chan->f[i] < -1.0f) 1558 dst->f[i] = -1.0f; 1559 else if (chan->f[i] > 1.0f) 1560 dst->f[i] = 1.0f; 1561 else 1562 dst->i[i] = chan->i[i]; 1563 } 1564 break; 1565 1566 default: 1567 assert( 0 ); 1568 } 1569 1570 if (inst->InstructionExtNv.CondDstUpdate) { 1571 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1572 uint shift; 1573 uint mask; 1574 1575 /* Only CC0 supported. 1576 */ 1577 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1578 1579 switch (chan_index) { 1580 case CHAN_X: 1581 shift = TGSI_EXEC_CC_X_SHIFT; 1582 mask = ~TGSI_EXEC_CC_X_MASK; 1583 break; 1584 case CHAN_Y: 1585 shift = TGSI_EXEC_CC_Y_SHIFT; 1586 mask = ~TGSI_EXEC_CC_Y_MASK; 1587 break; 1588 case CHAN_Z: 1589 shift = TGSI_EXEC_CC_Z_SHIFT; 1590 mask = ~TGSI_EXEC_CC_Z_MASK; 1591 break; 1592 case CHAN_W: 1593 shift = TGSI_EXEC_CC_W_SHIFT; 1594 mask = ~TGSI_EXEC_CC_W_MASK; 1595 break; 1596 default: 1597 assert( 0 ); 1598 return; 1599 } 1600 1601 for (i = 0; i < QUAD_SIZE; i++) 1602 if (execmask & (1 << i)) { 1603 cc->u[i] &= mask; 1604 if (dst->f[i] < 0.0f) 1605 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1606 else if (dst->f[i] > 0.0f) 1607 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1608 else if (dst->f[i] == 0.0f) 1609 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1610 else 1611 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1612 } 1613 } 1614} 1615 1616#define FETCH(VAL,INDEX,CHAN)\ 1617 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1618 1619#define STORE(VAL,INDEX,CHAN)\ 1620 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1621 1622 1623/** 1624 * Execute ARB-style KIL which is predicated by a src register. 1625 * Kill fragment if any of the four values is less than zero. 1626 */ 1627static void 1628exec_kil(struct tgsi_exec_machine *mach, 1629 const struct tgsi_full_instruction *inst) 1630{ 1631 uint uniquemask; 1632 uint chan_index; 1633 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1634 union tgsi_exec_channel r[1]; 1635 1636 /* This mask stores component bits that were already tested. Note that 1637 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1638 * tested. */ 1639 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1640 1641 for (chan_index = 0; chan_index < 4; chan_index++) 1642 { 1643 uint swizzle; 1644 uint i; 1645 1646 /* unswizzle channel */ 1647 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1648 &inst->FullSrcRegisters[0], 1649 chan_index); 1650 1651 /* check if the component has not been already tested */ 1652 if (uniquemask & (1 << swizzle)) 1653 continue; 1654 uniquemask |= 1 << swizzle; 1655 1656 FETCH(&r[0], 0, chan_index); 1657 for (i = 0; i < 4; i++) 1658 if (r[0].f[i] < 0.0f) 1659 kilmask |= 1 << i; 1660 } 1661 1662 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1663} 1664 1665/** 1666 * Execute NVIDIA-style KIL which is predicated by a condition code. 1667 * Kill fragment if the condition code is TRUE. 1668 */ 1669static void 1670exec_kilp(struct tgsi_exec_machine *mach, 1671 const struct tgsi_full_instruction *inst) 1672{ 1673 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1674 1675 if (inst->InstructionExtNv.CondFlowEnable) { 1676 uint swizzle[4]; 1677 uint chan_index; 1678 1679 kilmask = 0x0; 1680 1681 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1682 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1683 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1684 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1685 1686 for (chan_index = 0; chan_index < 4; chan_index++) 1687 { 1688 uint i; 1689 1690 for (i = 0; i < 4; i++) { 1691 /* TODO: evaluate the condition code */ 1692 if (0) 1693 kilmask |= 1 << i; 1694 } 1695 } 1696 } 1697 else { 1698 /* "unconditional" kil */ 1699 kilmask = mach->ExecMask; 1700 } 1701 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1702} 1703 1704 1705/* 1706 * Fetch a four texture samples using STR texture coordinates. 1707 */ 1708static void 1709fetch_texel( struct tgsi_sampler *sampler, 1710 const union tgsi_exec_channel *s, 1711 const union tgsi_exec_channel *t, 1712 const union tgsi_exec_channel *p, 1713 float lodbias, /* XXX should be float[4] */ 1714 union tgsi_exec_channel *r, 1715 union tgsi_exec_channel *g, 1716 union tgsi_exec_channel *b, 1717 union tgsi_exec_channel *a ) 1718{ 1719 uint j; 1720 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1721 1722 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1723 1724 for (j = 0; j < 4; j++) { 1725 r->f[j] = rgba[0][j]; 1726 g->f[j] = rgba[1][j]; 1727 b->f[j] = rgba[2][j]; 1728 a->f[j] = rgba[3][j]; 1729 } 1730} 1731 1732 1733static void 1734exec_tex(struct tgsi_exec_machine *mach, 1735 const struct tgsi_full_instruction *inst, 1736 boolean biasLod, 1737 boolean projected) 1738{ 1739 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1740 union tgsi_exec_channel r[4]; 1741 uint chan_index; 1742 float lodBias; 1743 1744 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1745 1746 switch (inst->InstructionExtTexture.Texture) { 1747 case TGSI_TEXTURE_1D: 1748 case TGSI_TEXTURE_SHADOW1D: 1749 1750 FETCH(&r[0], 0, CHAN_X); 1751 1752 if (projected) { 1753 FETCH(&r[1], 0, CHAN_W); 1754 micro_div( &r[0], &r[0], &r[1] ); 1755 } 1756 1757 if (biasLod) { 1758 FETCH(&r[1], 0, CHAN_W); 1759 lodBias = r[2].f[0]; 1760 } 1761 else 1762 lodBias = 0.0; 1763 1764 fetch_texel(mach->Samplers[unit], 1765 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1766 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1767 break; 1768 1769 case TGSI_TEXTURE_2D: 1770 case TGSI_TEXTURE_RECT: 1771 case TGSI_TEXTURE_SHADOW2D: 1772 case TGSI_TEXTURE_SHADOWRECT: 1773 1774 FETCH(&r[0], 0, CHAN_X); 1775 FETCH(&r[1], 0, CHAN_Y); 1776 FETCH(&r[2], 0, CHAN_Z); 1777 1778 if (projected) { 1779 FETCH(&r[3], 0, CHAN_W); 1780 micro_div( &r[0], &r[0], &r[3] ); 1781 micro_div( &r[1], &r[1], &r[3] ); 1782 micro_div( &r[2], &r[2], &r[3] ); 1783 } 1784 1785 if (biasLod) { 1786 FETCH(&r[3], 0, CHAN_W); 1787 lodBias = r[3].f[0]; 1788 } 1789 else 1790 lodBias = 0.0; 1791 1792 fetch_texel(mach->Samplers[unit], 1793 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1794 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1795 break; 1796 1797 case TGSI_TEXTURE_3D: 1798 case TGSI_TEXTURE_CUBE: 1799 1800 FETCH(&r[0], 0, CHAN_X); 1801 FETCH(&r[1], 0, CHAN_Y); 1802 FETCH(&r[2], 0, CHAN_Z); 1803 1804 if (projected) { 1805 FETCH(&r[3], 0, CHAN_W); 1806 micro_div( &r[0], &r[0], &r[3] ); 1807 micro_div( &r[1], &r[1], &r[3] ); 1808 micro_div( &r[2], &r[2], &r[3] ); 1809 } 1810 1811 if (biasLod) { 1812 FETCH(&r[3], 0, CHAN_W); 1813 lodBias = r[3].f[0]; 1814 } 1815 else 1816 lodBias = 0.0; 1817 1818 fetch_texel(mach->Samplers[unit], 1819 &r[0], &r[1], &r[2], lodBias, 1820 &r[0], &r[1], &r[2], &r[3]); 1821 break; 1822 1823 default: 1824 assert (0); 1825 } 1826 1827 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1828 STORE( &r[chan_index], 0, chan_index ); 1829 } 1830} 1831 1832 1833/** 1834 * Evaluate a constant-valued coefficient at the position of the 1835 * current quad. 1836 */ 1837static void 1838eval_constant_coef( 1839 struct tgsi_exec_machine *mach, 1840 unsigned attrib, 1841 unsigned chan ) 1842{ 1843 unsigned i; 1844 1845 for( i = 0; i < QUAD_SIZE; i++ ) { 1846 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1847 } 1848} 1849 1850/** 1851 * Evaluate a linear-valued coefficient at the position of the 1852 * current quad. 1853 */ 1854static void 1855eval_linear_coef( 1856 struct tgsi_exec_machine *mach, 1857 unsigned attrib, 1858 unsigned chan ) 1859{ 1860 const float x = mach->QuadPos.xyzw[0].f[0]; 1861 const float y = mach->QuadPos.xyzw[1].f[0]; 1862 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1863 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1864 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1865 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1866 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1867 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1868 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1869} 1870 1871/** 1872 * Evaluate a perspective-valued coefficient at the position of the 1873 * current quad. 1874 */ 1875static void 1876eval_perspective_coef( 1877 struct tgsi_exec_machine *mach, 1878 unsigned attrib, 1879 unsigned chan ) 1880{ 1881 const float x = mach->QuadPos.xyzw[0].f[0]; 1882 const float y = mach->QuadPos.xyzw[1].f[0]; 1883 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1884 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1885 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1886 const float *w = mach->QuadPos.xyzw[3].f; 1887 /* divide by W here */ 1888 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1889 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1890 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1891 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1892} 1893 1894 1895typedef void (* eval_coef_func)( 1896 struct tgsi_exec_machine *mach, 1897 unsigned attrib, 1898 unsigned chan ); 1899 1900static void 1901exec_declaration( 1902 struct tgsi_exec_machine *mach, 1903 const struct tgsi_full_declaration *decl ) 1904{ 1905 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1906 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1907 unsigned first, last, mask; 1908 eval_coef_func eval; 1909 1910 first = decl->DeclarationRange.First; 1911 last = decl->DeclarationRange.Last; 1912 mask = decl->Declaration.UsageMask; 1913 1914 switch( decl->Declaration.Interpolate ) { 1915 case TGSI_INTERPOLATE_CONSTANT: 1916 eval = eval_constant_coef; 1917 break; 1918 1919 case TGSI_INTERPOLATE_LINEAR: 1920 eval = eval_linear_coef; 1921 break; 1922 1923 case TGSI_INTERPOLATE_PERSPECTIVE: 1924 eval = eval_perspective_coef; 1925 break; 1926 1927 default: 1928 eval = NULL; 1929 assert( 0 ); 1930 } 1931 1932 if( mask == TGSI_WRITEMASK_XYZW ) { 1933 unsigned i, j; 1934 1935 for( i = first; i <= last; i++ ) { 1936 for( j = 0; j < NUM_CHANNELS; j++ ) { 1937 eval( mach, i, j ); 1938 } 1939 } 1940 } 1941 else { 1942 unsigned i, j; 1943 1944 for( j = 0; j < NUM_CHANNELS; j++ ) { 1945 if( mask & (1 << j) ) { 1946 for( i = first; i <= last; i++ ) { 1947 eval( mach, i, j ); 1948 } 1949 } 1950 } 1951 } 1952 } 1953 } 1954} 1955 1956static void 1957exec_instruction( 1958 struct tgsi_exec_machine *mach, 1959 const struct tgsi_full_instruction *inst, 1960 int *pc ) 1961{ 1962 uint chan_index; 1963 union tgsi_exec_channel r[10]; 1964 1965 (*pc)++; 1966 1967 switch (inst->Instruction.Opcode) { 1968 case TGSI_OPCODE_ARL: 1969 case TGSI_OPCODE_FLOOR: 1970 /* TGSI_OPCODE_FLR */ 1971 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1972 FETCH( &r[0], 0, chan_index ); 1973 micro_flr( &r[0], &r[0] ); 1974 STORE( &r[0], 0, chan_index ); 1975 } 1976 break; 1977 1978 case TGSI_OPCODE_MOV: 1979 case TGSI_OPCODE_SWZ: 1980 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1981 FETCH( &r[0], 0, chan_index ); 1982 STORE( &r[0], 0, chan_index ); 1983 } 1984 break; 1985 1986 case TGSI_OPCODE_LIT: 1987 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1988 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1989 } 1990 1991 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1992 FETCH( &r[0], 0, CHAN_X ); 1993 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1994 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1995 STORE( &r[0], 0, CHAN_Y ); 1996 } 1997 1998 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1999 FETCH( &r[1], 0, CHAN_Y ); 2000 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2001 2002 FETCH( &r[2], 0, CHAN_W ); 2003 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2004 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2005 micro_pow( &r[1], &r[1], &r[2] ); 2006 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2007 STORE( &r[0], 0, CHAN_Z ); 2008 } 2009 } 2010 2011 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2012 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2013 } 2014 break; 2015 2016 case TGSI_OPCODE_RCP: 2017 /* TGSI_OPCODE_RECIP */ 2018 FETCH( &r[0], 0, CHAN_X ); 2019 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2020 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2021 STORE( &r[0], 0, chan_index ); 2022 } 2023 break; 2024 2025 case TGSI_OPCODE_RSQ: 2026 /* TGSI_OPCODE_RECIPSQRT */ 2027 FETCH( &r[0], 0, CHAN_X ); 2028 micro_abs( &r[0], &r[0] ); 2029 micro_sqrt( &r[0], &r[0] ); 2030 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2031 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2032 STORE( &r[0], 0, chan_index ); 2033 } 2034 break; 2035 2036 case TGSI_OPCODE_EXP: 2037 FETCH( &r[0], 0, CHAN_X ); 2038 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2039 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2040 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2041 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2042 } 2043 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2044 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2045 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2046 } 2047 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2048 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2049 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2050 } 2051 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2052 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2053 } 2054 break; 2055 2056 case TGSI_OPCODE_LOG: 2057 FETCH( &r[0], 0, CHAN_X ); 2058 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2059 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2060 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2061 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2062 STORE( &r[0], 0, CHAN_X ); 2063 } 2064 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2065 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2066 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2067 STORE( &r[0], 0, CHAN_Y ); 2068 } 2069 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2070 STORE( &r[1], 0, CHAN_Z ); 2071 } 2072 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2073 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2074 } 2075 break; 2076 2077 case TGSI_OPCODE_MUL: 2078 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 2079 { 2080 FETCH(&r[0], 0, chan_index); 2081 FETCH(&r[1], 1, chan_index); 2082 2083 micro_mul( &r[0], &r[0], &r[1] ); 2084 2085 STORE(&r[0], 0, chan_index); 2086 } 2087 break; 2088 2089 case TGSI_OPCODE_ADD: 2090 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2091 FETCH( &r[0], 0, chan_index ); 2092 FETCH( &r[1], 1, chan_index ); 2093 micro_add( &r[0], &r[0], &r[1] ); 2094 STORE( &r[0], 0, chan_index ); 2095 } 2096 break; 2097 2098 case TGSI_OPCODE_DP3: 2099 /* TGSI_OPCODE_DOT3 */ 2100 FETCH( &r[0], 0, CHAN_X ); 2101 FETCH( &r[1], 1, CHAN_X ); 2102 micro_mul( &r[0], &r[0], &r[1] ); 2103 2104 FETCH( &r[1], 0, CHAN_Y ); 2105 FETCH( &r[2], 1, CHAN_Y ); 2106 micro_mul( &r[1], &r[1], &r[2] ); 2107 micro_add( &r[0], &r[0], &r[1] ); 2108 2109 FETCH( &r[1], 0, CHAN_Z ); 2110 FETCH( &r[2], 1, CHAN_Z ); 2111 micro_mul( &r[1], &r[1], &r[2] ); 2112 micro_add( &r[0], &r[0], &r[1] ); 2113 2114 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2115 STORE( &r[0], 0, chan_index ); 2116 } 2117 break; 2118 2119 case TGSI_OPCODE_DP4: 2120 /* TGSI_OPCODE_DOT4 */ 2121 FETCH(&r[0], 0, CHAN_X); 2122 FETCH(&r[1], 1, CHAN_X); 2123 2124 micro_mul( &r[0], &r[0], &r[1] ); 2125 2126 FETCH(&r[1], 0, CHAN_Y); 2127 FETCH(&r[2], 1, CHAN_Y); 2128 2129 micro_mul( &r[1], &r[1], &r[2] ); 2130 micro_add( &r[0], &r[0], &r[1] ); 2131 2132 FETCH(&r[1], 0, CHAN_Z); 2133 FETCH(&r[2], 1, CHAN_Z); 2134 2135 micro_mul( &r[1], &r[1], &r[2] ); 2136 micro_add( &r[0], &r[0], &r[1] ); 2137 2138 FETCH(&r[1], 0, CHAN_W); 2139 FETCH(&r[2], 1, CHAN_W); 2140 2141 micro_mul( &r[1], &r[1], &r[2] ); 2142 micro_add( &r[0], &r[0], &r[1] ); 2143 2144 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2145 STORE( &r[0], 0, chan_index ); 2146 } 2147 break; 2148 2149 case TGSI_OPCODE_DST: 2150 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2151 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2152 } 2153 2154 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2155 FETCH( &r[0], 0, CHAN_Y ); 2156 FETCH( &r[1], 1, CHAN_Y); 2157 micro_mul( &r[0], &r[0], &r[1] ); 2158 STORE( &r[0], 0, CHAN_Y ); 2159 } 2160 2161 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2162 FETCH( &r[0], 0, CHAN_Z ); 2163 STORE( &r[0], 0, CHAN_Z ); 2164 } 2165 2166 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2167 FETCH( &r[0], 1, CHAN_W ); 2168 STORE( &r[0], 0, CHAN_W ); 2169 } 2170 break; 2171 2172 case TGSI_OPCODE_MIN: 2173 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2174 FETCH(&r[0], 0, chan_index); 2175 FETCH(&r[1], 1, chan_index); 2176 2177 /* XXX use micro_min()?? */ 2178 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2179 2180 STORE(&r[0], 0, chan_index); 2181 } 2182 break; 2183 2184 case TGSI_OPCODE_MAX: 2185 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2186 FETCH(&r[0], 0, chan_index); 2187 FETCH(&r[1], 1, chan_index); 2188 2189 /* XXX use micro_max()?? */ 2190 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2191 2192 STORE(&r[0], 0, chan_index ); 2193 } 2194 break; 2195 2196 case TGSI_OPCODE_SLT: 2197 /* TGSI_OPCODE_SETLT */ 2198 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2199 FETCH( &r[0], 0, chan_index ); 2200 FETCH( &r[1], 1, chan_index ); 2201 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2202 STORE( &r[0], 0, chan_index ); 2203 } 2204 break; 2205 2206 case TGSI_OPCODE_SGE: 2207 /* TGSI_OPCODE_SETGE */ 2208 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2209 FETCH( &r[0], 0, chan_index ); 2210 FETCH( &r[1], 1, chan_index ); 2211 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2212 STORE( &r[0], 0, chan_index ); 2213 } 2214 break; 2215 2216 case TGSI_OPCODE_MAD: 2217 /* TGSI_OPCODE_MADD */ 2218 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2219 FETCH( &r[0], 0, chan_index ); 2220 FETCH( &r[1], 1, chan_index ); 2221 micro_mul( &r[0], &r[0], &r[1] ); 2222 FETCH( &r[1], 2, chan_index ); 2223 micro_add( &r[0], &r[0], &r[1] ); 2224 STORE( &r[0], 0, chan_index ); 2225 } 2226 break; 2227 2228 case TGSI_OPCODE_SUB: 2229 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2230 FETCH(&r[0], 0, chan_index); 2231 FETCH(&r[1], 1, chan_index); 2232 2233 micro_sub( &r[0], &r[0], &r[1] ); 2234 2235 STORE(&r[0], 0, chan_index); 2236 } 2237 break; 2238 2239 case TGSI_OPCODE_LERP: 2240 /* TGSI_OPCODE_LRP */ 2241 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2242 FETCH(&r[0], 0, chan_index); 2243 FETCH(&r[1], 1, chan_index); 2244 FETCH(&r[2], 2, chan_index); 2245 2246 micro_sub( &r[1], &r[1], &r[2] ); 2247 micro_mul( &r[0], &r[0], &r[1] ); 2248 micro_add( &r[0], &r[0], &r[2] ); 2249 2250 STORE(&r[0], 0, chan_index); 2251 } 2252 break; 2253 2254 case TGSI_OPCODE_CND: 2255 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2256 FETCH(&r[0], 0, chan_index); 2257 FETCH(&r[1], 1, chan_index); 2258 FETCH(&r[2], 2, chan_index); 2259 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2260 STORE(&r[0], 0, chan_index); 2261 } 2262 break; 2263 2264 case TGSI_OPCODE_CND0: 2265 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2266 FETCH(&r[0], 0, chan_index); 2267 FETCH(&r[1], 1, chan_index); 2268 FETCH(&r[2], 2, chan_index); 2269 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); 2270 STORE(&r[0], 0, chan_index); 2271 } 2272 break; 2273 2274 case TGSI_OPCODE_DOT2ADD: 2275 /* TGSI_OPCODE_DP2A */ 2276 FETCH( &r[0], 0, CHAN_X ); 2277 FETCH( &r[1], 1, CHAN_X ); 2278 micro_mul( &r[0], &r[0], &r[1] ); 2279 2280 FETCH( &r[1], 0, CHAN_Y ); 2281 FETCH( &r[2], 1, CHAN_Y ); 2282 micro_mul( &r[1], &r[1], &r[2] ); 2283 micro_add( &r[0], &r[0], &r[1] ); 2284 2285 FETCH( &r[2], 2, CHAN_X ); 2286 micro_add( &r[0], &r[0], &r[2] ); 2287 2288 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2289 STORE( &r[0], 0, chan_index ); 2290 } 2291 break; 2292 2293 case TGSI_OPCODE_INDEX: 2294 /* XXX: considered for removal */ 2295 assert (0); 2296 break; 2297 2298 case TGSI_OPCODE_NEGATE: 2299 /* XXX: considered for removal */ 2300 assert (0); 2301 break; 2302 2303 case TGSI_OPCODE_FRAC: 2304 /* TGSI_OPCODE_FRC */ 2305 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2306 FETCH( &r[0], 0, chan_index ); 2307 micro_frc( &r[0], &r[0] ); 2308 STORE( &r[0], 0, chan_index ); 2309 } 2310 break; 2311 2312 case TGSI_OPCODE_CLAMP: 2313 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2314 FETCH(&r[0], 0, chan_index); 2315 FETCH(&r[1], 1, chan_index); 2316 micro_max(&r[0], &r[0], &r[1]); 2317 FETCH(&r[1], 2, chan_index); 2318 micro_min(&r[0], &r[0], &r[1]); 2319 STORE(&r[0], 0, chan_index); 2320 } 2321 break; 2322 2323 case TGSI_OPCODE_ROUND: 2324 case TGSI_OPCODE_ARR: 2325 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2326 FETCH( &r[0], 0, chan_index ); 2327 micro_rnd( &r[0], &r[0] ); 2328 STORE( &r[0], 0, chan_index ); 2329 } 2330 break; 2331 2332 case TGSI_OPCODE_EXPBASE2: 2333 /* TGSI_OPCODE_EX2 */ 2334 FETCH(&r[0], 0, CHAN_X); 2335 2336#if FAST_MATH 2337 micro_exp2( &r[0], &r[0] ); 2338#else 2339 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2340#endif 2341 2342 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2343 STORE( &r[0], 0, chan_index ); 2344 } 2345 break; 2346 2347 case TGSI_OPCODE_LOGBASE2: 2348 /* TGSI_OPCODE_LG2 */ 2349 FETCH( &r[0], 0, CHAN_X ); 2350 micro_lg2( &r[0], &r[0] ); 2351 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2352 STORE( &r[0], 0, chan_index ); 2353 } 2354 break; 2355 2356 case TGSI_OPCODE_POWER: 2357 /* TGSI_OPCODE_POW */ 2358 FETCH(&r[0], 0, CHAN_X); 2359 FETCH(&r[1], 1, CHAN_X); 2360 2361 micro_pow( &r[0], &r[0], &r[1] ); 2362 2363 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2364 STORE( &r[0], 0, chan_index ); 2365 } 2366 break; 2367 2368 case TGSI_OPCODE_CROSSPRODUCT: 2369 /* TGSI_OPCODE_XPD */ 2370 FETCH(&r[0], 0, CHAN_Y); 2371 FETCH(&r[1], 1, CHAN_Z); 2372 2373 micro_mul( &r[2], &r[0], &r[1] ); 2374 2375 FETCH(&r[3], 0, CHAN_Z); 2376 FETCH(&r[4], 1, CHAN_Y); 2377 2378 micro_mul( &r[5], &r[3], &r[4] ); 2379 micro_sub( &r[2], &r[2], &r[5] ); 2380 2381 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2382 STORE( &r[2], 0, CHAN_X ); 2383 } 2384 2385 FETCH(&r[2], 1, CHAN_X); 2386 2387 micro_mul( &r[3], &r[3], &r[2] ); 2388 2389 FETCH(&r[5], 0, CHAN_X); 2390 2391 micro_mul( &r[1], &r[1], &r[5] ); 2392 micro_sub( &r[3], &r[3], &r[1] ); 2393 2394 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2395 STORE( &r[3], 0, CHAN_Y ); 2396 } 2397 2398 micro_mul( &r[5], &r[5], &r[4] ); 2399 micro_mul( &r[0], &r[0], &r[2] ); 2400 micro_sub( &r[5], &r[5], &r[0] ); 2401 2402 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2403 STORE( &r[5], 0, CHAN_Z ); 2404 } 2405 2406 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2407 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2408 } 2409 break; 2410 2411 case TGSI_OPCODE_MULTIPLYMATRIX: 2412 /* XXX: considered for removal */ 2413 assert (0); 2414 break; 2415 2416 case TGSI_OPCODE_ABS: 2417 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2418 FETCH(&r[0], 0, chan_index); 2419 2420 micro_abs( &r[0], &r[0] ); 2421 2422 STORE(&r[0], 0, chan_index); 2423 } 2424 break; 2425 2426 case TGSI_OPCODE_RCC: 2427 FETCH(&r[0], 0, CHAN_X); 2428 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2429 micro_float_clamp(&r[0], &r[0]); 2430 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2431 STORE(&r[0], 0, chan_index); 2432 } 2433 break; 2434 2435 case TGSI_OPCODE_DPH: 2436 FETCH(&r[0], 0, CHAN_X); 2437 FETCH(&r[1], 1, CHAN_X); 2438 2439 micro_mul( &r[0], &r[0], &r[1] ); 2440 2441 FETCH(&r[1], 0, CHAN_Y); 2442 FETCH(&r[2], 1, CHAN_Y); 2443 2444 micro_mul( &r[1], &r[1], &r[2] ); 2445 micro_add( &r[0], &r[0], &r[1] ); 2446 2447 FETCH(&r[1], 0, CHAN_Z); 2448 FETCH(&r[2], 1, CHAN_Z); 2449 2450 micro_mul( &r[1], &r[1], &r[2] ); 2451 micro_add( &r[0], &r[0], &r[1] ); 2452 2453 FETCH(&r[1], 1, CHAN_W); 2454 2455 micro_add( &r[0], &r[0], &r[1] ); 2456 2457 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2458 STORE( &r[0], 0, chan_index ); 2459 } 2460 break; 2461 2462 case TGSI_OPCODE_COS: 2463 FETCH(&r[0], 0, CHAN_X); 2464 2465 micro_cos( &r[0], &r[0] ); 2466 2467 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2468 STORE( &r[0], 0, chan_index ); 2469 } 2470 break; 2471 2472 case TGSI_OPCODE_DDX: 2473 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2474 FETCH( &r[0], 0, chan_index ); 2475 micro_ddx( &r[0], &r[0] ); 2476 STORE( &r[0], 0, chan_index ); 2477 } 2478 break; 2479 2480 case TGSI_OPCODE_DDY: 2481 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2482 FETCH( &r[0], 0, chan_index ); 2483 micro_ddy( &r[0], &r[0] ); 2484 STORE( &r[0], 0, chan_index ); 2485 } 2486 break; 2487 2488 case TGSI_OPCODE_KILP: 2489 exec_kilp (mach, inst); 2490 break; 2491 2492 case TGSI_OPCODE_KIL: 2493 exec_kil (mach, inst); 2494 break; 2495 2496 case TGSI_OPCODE_PK2H: 2497 assert (0); 2498 break; 2499 2500 case TGSI_OPCODE_PK2US: 2501 assert (0); 2502 break; 2503 2504 case TGSI_OPCODE_PK4B: 2505 assert (0); 2506 break; 2507 2508 case TGSI_OPCODE_PK4UB: 2509 assert (0); 2510 break; 2511 2512 case TGSI_OPCODE_RFL: 2513 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2514 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2515 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2516 /* r0 = dp3(src0, src0) */ 2517 FETCH(&r[2], 0, CHAN_X); 2518 micro_mul(&r[0], &r[2], &r[2]); 2519 FETCH(&r[4], 0, CHAN_Y); 2520 micro_mul(&r[8], &r[4], &r[4]); 2521 micro_add(&r[0], &r[0], &r[8]); 2522 FETCH(&r[6], 0, CHAN_Z); 2523 micro_mul(&r[8], &r[6], &r[6]); 2524 micro_add(&r[0], &r[0], &r[8]); 2525 2526 /* r1 = dp3(src0, src1) */ 2527 FETCH(&r[3], 1, CHAN_X); 2528 micro_mul(&r[1], &r[2], &r[3]); 2529 FETCH(&r[5], 1, CHAN_Y); 2530 micro_mul(&r[8], &r[4], &r[5]); 2531 micro_add(&r[1], &r[1], &r[8]); 2532 FETCH(&r[7], 1, CHAN_Z); 2533 micro_mul(&r[8], &r[6], &r[7]); 2534 micro_add(&r[1], &r[1], &r[8]); 2535 2536 /* r1 = 2 * r1 / r0 */ 2537 micro_add(&r[1], &r[1], &r[1]); 2538 micro_div(&r[1], &r[1], &r[0]); 2539 2540 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2541 micro_mul(&r[2], &r[2], &r[1]); 2542 micro_sub(&r[2], &r[2], &r[3]); 2543 STORE(&r[2], 0, CHAN_X); 2544 } 2545 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2546 micro_mul(&r[4], &r[4], &r[1]); 2547 micro_sub(&r[4], &r[4], &r[5]); 2548 STORE(&r[4], 0, CHAN_Y); 2549 } 2550 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2551 micro_mul(&r[6], &r[6], &r[1]); 2552 micro_sub(&r[6], &r[6], &r[7]); 2553 STORE(&r[6], 0, CHAN_Z); 2554 } 2555 } 2556 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2557 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2558 } 2559 break; 2560 2561 case TGSI_OPCODE_SEQ: 2562 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2563 FETCH( &r[0], 0, chan_index ); 2564 FETCH( &r[1], 1, chan_index ); 2565 micro_eq( &r[0], &r[0], &r[1], 2566 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2567 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2568 STORE( &r[0], 0, chan_index ); 2569 } 2570 break; 2571 2572 case TGSI_OPCODE_SFL: 2573 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2574 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2575 } 2576 break; 2577 2578 case TGSI_OPCODE_SGT: 2579 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2580 FETCH( &r[0], 0, chan_index ); 2581 FETCH( &r[1], 1, chan_index ); 2582 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2583 STORE( &r[0], 0, chan_index ); 2584 } 2585 break; 2586 2587 case TGSI_OPCODE_SIN: 2588 FETCH( &r[0], 0, CHAN_X ); 2589 micro_sin( &r[0], &r[0] ); 2590 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2591 STORE( &r[0], 0, chan_index ); 2592 } 2593 break; 2594 2595 case TGSI_OPCODE_SLE: 2596 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2597 FETCH( &r[0], 0, chan_index ); 2598 FETCH( &r[1], 1, chan_index ); 2599 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2600 STORE( &r[0], 0, chan_index ); 2601 } 2602 break; 2603 2604 case TGSI_OPCODE_SNE: 2605 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2606 FETCH( &r[0], 0, chan_index ); 2607 FETCH( &r[1], 1, chan_index ); 2608 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2609 STORE( &r[0], 0, chan_index ); 2610 } 2611 break; 2612 2613 case TGSI_OPCODE_STR: 2614 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2615 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2616 } 2617 break; 2618 2619 case TGSI_OPCODE_TEX: 2620 /* simple texture lookup */ 2621 /* src[0] = texcoord */ 2622 /* src[1] = sampler unit */ 2623 exec_tex(mach, inst, FALSE, FALSE); 2624 break; 2625 2626 case TGSI_OPCODE_TXB: 2627 /* Texture lookup with lod bias */ 2628 /* src[0] = texcoord (src[0].w = LOD bias) */ 2629 /* src[1] = sampler unit */ 2630 exec_tex(mach, inst, TRUE, FALSE); 2631 break; 2632 2633 case TGSI_OPCODE_TXD: 2634 /* Texture lookup with explict partial derivatives */ 2635 /* src[0] = texcoord */ 2636 /* src[1] = d[strq]/dx */ 2637 /* src[2] = d[strq]/dy */ 2638 /* src[3] = sampler unit */ 2639 assert (0); 2640 break; 2641 2642 case TGSI_OPCODE_TXL: 2643 /* Texture lookup with explit LOD */ 2644 /* src[0] = texcoord (src[0].w = LOD) */ 2645 /* src[1] = sampler unit */ 2646 exec_tex(mach, inst, TRUE, FALSE); 2647 break; 2648 2649 case TGSI_OPCODE_TXP: 2650 /* Texture lookup with projection */ 2651 /* src[0] = texcoord (src[0].w = projection) */ 2652 /* src[1] = sampler unit */ 2653 exec_tex(mach, inst, FALSE, TRUE); 2654 break; 2655 2656 case TGSI_OPCODE_UP2H: 2657 assert (0); 2658 break; 2659 2660 case TGSI_OPCODE_UP2US: 2661 assert (0); 2662 break; 2663 2664 case TGSI_OPCODE_UP4B: 2665 assert (0); 2666 break; 2667 2668 case TGSI_OPCODE_UP4UB: 2669 assert (0); 2670 break; 2671 2672 case TGSI_OPCODE_X2D: 2673 FETCH(&r[0], 1, CHAN_X); 2674 FETCH(&r[1], 1, CHAN_Y); 2675 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2676 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2677 FETCH(&r[2], 2, CHAN_X); 2678 micro_mul(&r[2], &r[2], &r[0]); 2679 FETCH(&r[3], 2, CHAN_Y); 2680 micro_mul(&r[3], &r[3], &r[1]); 2681 micro_add(&r[2], &r[2], &r[3]); 2682 FETCH(&r[3], 0, CHAN_X); 2683 micro_add(&r[2], &r[2], &r[3]); 2684 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2685 STORE(&r[2], 0, CHAN_X); 2686 } 2687 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2688 STORE(&r[2], 0, CHAN_Z); 2689 } 2690 } 2691 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2692 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2693 FETCH(&r[2], 2, CHAN_Z); 2694 micro_mul(&r[2], &r[2], &r[0]); 2695 FETCH(&r[3], 2, CHAN_W); 2696 micro_mul(&r[3], &r[3], &r[1]); 2697 micro_add(&r[2], &r[2], &r[3]); 2698 FETCH(&r[3], 0, CHAN_Y); 2699 micro_add(&r[2], &r[2], &r[3]); 2700 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2701 STORE(&r[2], 0, CHAN_Y); 2702 } 2703 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2704 STORE(&r[2], 0, CHAN_W); 2705 } 2706 } 2707 break; 2708 2709 case TGSI_OPCODE_ARA: 2710 assert (0); 2711 break; 2712 2713 case TGSI_OPCODE_BRA: 2714 assert (0); 2715 break; 2716 2717 case TGSI_OPCODE_CAL: 2718 /* skip the call if no execution channels are enabled */ 2719 if (mach->ExecMask) { 2720 /* do the call */ 2721 2722 /* push the Cond, Loop, Cont stacks */ 2723 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2724 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2725 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2726 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2727 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2728 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2729 2730 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2731 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2732 2733 /* note that PC was already incremented above */ 2734 mach->CallStack[mach->CallStackTop++] = *pc; 2735 *pc = inst->InstructionExtLabel.Label; 2736 } 2737 break; 2738 2739 case TGSI_OPCODE_RET: 2740 mach->FuncMask &= ~mach->ExecMask; 2741 UPDATE_EXEC_MASK(mach); 2742 2743 if (mach->FuncMask == 0x0) { 2744 /* really return now (otherwise, keep executing */ 2745 2746 if (mach->CallStackTop == 0) { 2747 /* returning from main() */ 2748 *pc = -1; 2749 return; 2750 } 2751 *pc = mach->CallStack[--mach->CallStackTop]; 2752 2753 /* pop the Cond, Loop, Cont stacks */ 2754 assert(mach->CondStackTop > 0); 2755 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2756 assert(mach->LoopStackTop > 0); 2757 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2758 assert(mach->ContStackTop > 0); 2759 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2760 assert(mach->FuncStackTop > 0); 2761 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2762 2763 UPDATE_EXEC_MASK(mach); 2764 } 2765 break; 2766 2767 case TGSI_OPCODE_SSG: 2768 /* TGSI_OPCODE_SGN */ 2769 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2770 FETCH( &r[0], 0, chan_index ); 2771 micro_sgn( &r[0], &r[0] ); 2772 STORE( &r[0], 0, chan_index ); 2773 } 2774 break; 2775 2776 case TGSI_OPCODE_CMP: 2777 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2778 FETCH(&r[0], 0, chan_index); 2779 FETCH(&r[1], 1, chan_index); 2780 FETCH(&r[2], 2, chan_index); 2781 2782 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2783 2784 STORE(&r[0], 0, chan_index); 2785 } 2786 break; 2787 2788 case TGSI_OPCODE_SCS: 2789 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2790 FETCH( &r[0], 0, CHAN_X ); 2791 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2792 micro_cos(&r[1], &r[0]); 2793 STORE(&r[1], 0, CHAN_X); 2794 } 2795 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2796 micro_sin(&r[1], &r[0]); 2797 STORE(&r[1], 0, CHAN_Y); 2798 } 2799 } 2800 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2801 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2802 } 2803 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2804 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2805 } 2806 break; 2807 2808 case TGSI_OPCODE_NRM: 2809 /* 3-component vector normalize */ 2810 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2811 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2812 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2813 /* r3 = sqrt(dp3(src0, src0)) */ 2814 FETCH(&r[0], 0, CHAN_X); 2815 micro_mul(&r[3], &r[0], &r[0]); 2816 FETCH(&r[1], 0, CHAN_Y); 2817 micro_mul(&r[4], &r[1], &r[1]); 2818 micro_add(&r[3], &r[3], &r[4]); 2819 FETCH(&r[2], 0, CHAN_Z); 2820 micro_mul(&r[4], &r[2], &r[2]); 2821 micro_add(&r[3], &r[3], &r[4]); 2822 micro_sqrt(&r[3], &r[3]); 2823 2824 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2825 micro_div(&r[0], &r[0], &r[3]); 2826 STORE(&r[0], 0, CHAN_X); 2827 } 2828 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2829 micro_div(&r[1], &r[1], &r[3]); 2830 STORE(&r[1], 0, CHAN_Y); 2831 } 2832 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2833 micro_div(&r[2], &r[2], &r[3]); 2834 STORE(&r[2], 0, CHAN_Z); 2835 } 2836 } 2837 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2838 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2839 } 2840 break; 2841 2842 case TGSI_OPCODE_NRM4: 2843 /* 4-component vector normalize */ 2844 { 2845 union tgsi_exec_channel tmp, dot; 2846 2847 /* tmp = dp4(src0, src0): */ 2848 FETCH( &r[0], 0, CHAN_X ); 2849 micro_mul( &tmp, &r[0], &r[0] ); 2850 2851 FETCH( &r[1], 0, CHAN_Y ); 2852 micro_mul( &dot, &r[1], &r[1] ); 2853 micro_add( &tmp, &tmp, &dot ); 2854 2855 FETCH( &r[2], 0, CHAN_Z ); 2856 micro_mul( &dot, &r[2], &r[2] ); 2857 micro_add( &tmp, &tmp, &dot ); 2858 2859 FETCH( &r[3], 0, CHAN_W ); 2860 micro_mul( &dot, &r[3], &r[3] ); 2861 micro_add( &tmp, &tmp, &dot ); 2862 2863 /* tmp = 1 / sqrt(tmp) */ 2864 micro_sqrt( &tmp, &tmp ); 2865 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2866 2867 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2868 /* chan = chan * tmp */ 2869 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2870 STORE( &r[chan_index], 0, chan_index ); 2871 } 2872 } 2873 break; 2874 2875 case TGSI_OPCODE_DIV: 2876 assert( 0 ); 2877 break; 2878 2879 case TGSI_OPCODE_DP2: 2880 FETCH( &r[0], 0, CHAN_X ); 2881 FETCH( &r[1], 1, CHAN_X ); 2882 micro_mul( &r[0], &r[0], &r[1] ); 2883 2884 FETCH( &r[1], 0, CHAN_Y ); 2885 FETCH( &r[2], 1, CHAN_Y ); 2886 micro_mul( &r[1], &r[1], &r[2] ); 2887 micro_add( &r[0], &r[0], &r[1] ); 2888 2889 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2890 STORE( &r[0], 0, chan_index ); 2891 } 2892 break; 2893 2894 case TGSI_OPCODE_IF: 2895 /* push CondMask */ 2896 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2897 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2898 FETCH( &r[0], 0, CHAN_X ); 2899 /* update CondMask */ 2900 if( ! r[0].u[0] ) { 2901 mach->CondMask &= ~0x1; 2902 } 2903 if( ! r[0].u[1] ) { 2904 mach->CondMask &= ~0x2; 2905 } 2906 if( ! r[0].u[2] ) { 2907 mach->CondMask &= ~0x4; 2908 } 2909 if( ! r[0].u[3] ) { 2910 mach->CondMask &= ~0x8; 2911 } 2912 UPDATE_EXEC_MASK(mach); 2913 /* Todo: If CondMask==0, jump to ELSE */ 2914 break; 2915 2916 case TGSI_OPCODE_ELSE: 2917 /* invert CondMask wrt previous mask */ 2918 { 2919 uint prevMask; 2920 assert(mach->CondStackTop > 0); 2921 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2922 mach->CondMask = ~mach->CondMask & prevMask; 2923 UPDATE_EXEC_MASK(mach); 2924 /* Todo: If CondMask==0, jump to ENDIF */ 2925 } 2926 break; 2927 2928 case TGSI_OPCODE_ENDIF: 2929 /* pop CondMask */ 2930 assert(mach->CondStackTop > 0); 2931 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2932 UPDATE_EXEC_MASK(mach); 2933 break; 2934 2935 case TGSI_OPCODE_END: 2936 /* halt execution */ 2937 *pc = -1; 2938 break; 2939 2940 case TGSI_OPCODE_REP: 2941 assert (0); 2942 break; 2943 2944 case TGSI_OPCODE_ENDREP: 2945 assert (0); 2946 break; 2947 2948 case TGSI_OPCODE_PUSHA: 2949 assert (0); 2950 break; 2951 2952 case TGSI_OPCODE_POPA: 2953 assert (0); 2954 break; 2955 2956 case TGSI_OPCODE_CEIL: 2957 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2958 FETCH( &r[0], 0, chan_index ); 2959 micro_ceil( &r[0], &r[0] ); 2960 STORE( &r[0], 0, chan_index ); 2961 } 2962 break; 2963 2964 case TGSI_OPCODE_I2F: 2965 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2966 FETCH( &r[0], 0, chan_index ); 2967 micro_i2f( &r[0], &r[0] ); 2968 STORE( &r[0], 0, chan_index ); 2969 } 2970 break; 2971 2972 case TGSI_OPCODE_NOT: 2973 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2974 FETCH( &r[0], 0, chan_index ); 2975 micro_not( &r[0], &r[0] ); 2976 STORE( &r[0], 0, chan_index ); 2977 } 2978 break; 2979 2980 case TGSI_OPCODE_TRUNC: 2981 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2982 FETCH( &r[0], 0, chan_index ); 2983 micro_trunc( &r[0], &r[0] ); 2984 STORE( &r[0], 0, chan_index ); 2985 } 2986 break; 2987 2988 case TGSI_OPCODE_SHL: 2989 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2990 FETCH( &r[0], 0, chan_index ); 2991 FETCH( &r[1], 1, chan_index ); 2992 micro_shl( &r[0], &r[0], &r[1] ); 2993 STORE( &r[0], 0, chan_index ); 2994 } 2995 break; 2996 2997 case TGSI_OPCODE_SHR: 2998 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2999 FETCH( &r[0], 0, chan_index ); 3000 FETCH( &r[1], 1, chan_index ); 3001 micro_ishr( &r[0], &r[0], &r[1] ); 3002 STORE( &r[0], 0, chan_index ); 3003 } 3004 break; 3005 3006 case TGSI_OPCODE_AND: 3007 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3008 FETCH( &r[0], 0, chan_index ); 3009 FETCH( &r[1], 1, chan_index ); 3010 micro_and( &r[0], &r[0], &r[1] ); 3011 STORE( &r[0], 0, chan_index ); 3012 } 3013 break; 3014 3015 case TGSI_OPCODE_OR: 3016 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3017 FETCH( &r[0], 0, chan_index ); 3018 FETCH( &r[1], 1, chan_index ); 3019 micro_or( &r[0], &r[0], &r[1] ); 3020 STORE( &r[0], 0, chan_index ); 3021 } 3022 break; 3023 3024 case TGSI_OPCODE_MOD: 3025 assert (0); 3026 break; 3027 3028 case TGSI_OPCODE_XOR: 3029 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3030 FETCH( &r[0], 0, chan_index ); 3031 FETCH( &r[1], 1, chan_index ); 3032 micro_xor( &r[0], &r[0], &r[1] ); 3033 STORE( &r[0], 0, chan_index ); 3034 } 3035 break; 3036 3037 case TGSI_OPCODE_SAD: 3038 assert (0); 3039 break; 3040 3041 case TGSI_OPCODE_TXF: 3042 assert (0); 3043 break; 3044 3045 case TGSI_OPCODE_TXQ: 3046 assert (0); 3047 break; 3048 3049 case TGSI_OPCODE_EMIT: 3050 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 3051 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 3052 break; 3053 3054 case TGSI_OPCODE_ENDPRIM: 3055 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 3056 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 3057 break; 3058 3059 case TGSI_OPCODE_LOOP: 3060 /* fall-through (for now) */ 3061 case TGSI_OPCODE_BGNLOOP2: 3062 /* push LoopMask and ContMasks */ 3063 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3064 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3065 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3066 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3067 break; 3068 3069 case TGSI_OPCODE_ENDLOOP: 3070 /* fall-through (for now at least) */ 3071 case TGSI_OPCODE_ENDLOOP2: 3072 /* Restore ContMask, but don't pop */ 3073 assert(mach->ContStackTop > 0); 3074 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3075 UPDATE_EXEC_MASK(mach); 3076 if (mach->ExecMask) { 3077 /* repeat loop: jump to instruction just past BGNLOOP */ 3078 *pc = inst->InstructionExtLabel.Label + 1; 3079 } 3080 else { 3081 /* exit loop: pop LoopMask */ 3082 assert(mach->LoopStackTop > 0); 3083 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3084 /* pop ContMask */ 3085 assert(mach->ContStackTop > 0); 3086 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3087 } 3088 UPDATE_EXEC_MASK(mach); 3089 break; 3090 3091 case TGSI_OPCODE_BRK: 3092 /* turn off loop channels for each enabled exec channel */ 3093 mach->LoopMask &= ~mach->ExecMask; 3094 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3095 UPDATE_EXEC_MASK(mach); 3096 break; 3097 3098 case TGSI_OPCODE_CONT: 3099 /* turn off cont channels for each enabled exec channel */ 3100 mach->ContMask &= ~mach->ExecMask; 3101 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3102 UPDATE_EXEC_MASK(mach); 3103 break; 3104 3105 case TGSI_OPCODE_BGNSUB: 3106 /* no-op */ 3107 break; 3108 3109 case TGSI_OPCODE_ENDSUB: 3110 /* no-op */ 3111 break; 3112 3113 case TGSI_OPCODE_NOISE1: 3114 assert( 0 ); 3115 break; 3116 3117 case TGSI_OPCODE_NOISE2: 3118 assert( 0 ); 3119 break; 3120 3121 case TGSI_OPCODE_NOISE3: 3122 assert( 0 ); 3123 break; 3124 3125 case TGSI_OPCODE_NOISE4: 3126 assert( 0 ); 3127 break; 3128 3129 case TGSI_OPCODE_NOP: 3130 break; 3131 3132 default: 3133 assert( 0 ); 3134 } 3135} 3136 3137 3138/** 3139 * Run TGSI interpreter. 3140 * \return bitmask of "alive" quad components 3141 */ 3142uint 3143tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3144{ 3145 uint i; 3146 int pc = 0; 3147 3148 mach->CondMask = 0xf; 3149 mach->LoopMask = 0xf; 3150 mach->ContMask = 0xf; 3151 mach->FuncMask = 0xf; 3152 mach->ExecMask = 0xf; 3153 3154 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 3155 assert(mach->CondStackTop == 0); 3156 assert(mach->LoopStackTop == 0); 3157 assert(mach->ContStackTop == 0); 3158 assert(mach->CallStackTop == 0); 3159 3160 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3161 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3162 3163 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3164 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3165 mach->Primitives[0] = 0; 3166 } 3167 3168 for (i = 0; i < QUAD_SIZE; i++) { 3169 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3170 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3171 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3172 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3173 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3174 } 3175 3176 /* execute declarations (interpolants) */ 3177 for (i = 0; i < mach->NumDeclarations; i++) { 3178 exec_declaration( mach, mach->Declarations+i ); 3179 } 3180 3181 /* execute instructions, until pc is set to -1 */ 3182 while (pc != -1) { 3183 assert(pc < (int) mach->NumInstructions); 3184 exec_instruction( mach, mach->Instructions + pc, &pc ); 3185 } 3186 3187#if 0 3188 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3189 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3190 /* 3191 * Scale back depth component. 3192 */ 3193 for (i = 0; i < 4; i++) 3194 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3195 } 3196#endif 3197 3198 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3199} 3200