tgsi_exec.c revision b9cb74c7f826dfd320f5e5b54aa933898f7ddd3d
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65/** for tgsi_full_instruction::Flags */ 66#define SOA_DEPENDENCY_FLAG 0x1 67 68#define TILE_TOP_LEFT 0 69#define TILE_TOP_RIGHT 1 70#define TILE_BOTTOM_LEFT 2 71#define TILE_BOTTOM_RIGHT 3 72 73#define CHAN_X 0 74#define CHAN_Y 1 75#define CHAN_Z 2 76#define CHAN_W 3 77 78/* 79 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 80 */ 81#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 82#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 83#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 84#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 85#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 86#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 87#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 88#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 89#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 90#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 91#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 92#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 93#define TEMP_128_I TGSI_EXEC_TEMP_128_I 94#define TEMP_128_C TGSI_EXEC_TEMP_128_C 95#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 96#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 97#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 98#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 99#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 100#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 101#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 102#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 103#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 104#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 105#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 106#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 107#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 108#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 109#define TEMP_R0 TGSI_EXEC_TEMP_R0 110 111#define IS_CHANNEL_ENABLED(INST, CHAN)\ 112 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 113 114#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 115 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 116 117#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 119 if (IS_CHANNEL_ENABLED( INST, CHAN )) 120 121#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 122 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 123 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 124 125 126/** The execution mask depends on the conditional mask and the loop mask */ 127#define UPDATE_EXEC_MASK(MACH) \ 128 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 129 130 131static const union tgsi_exec_channel ZeroVec = 132 { { 0.0, 0.0, 0.0, 0.0 } }; 133 134 135#ifdef DEBUG 136static void 137check_inf_or_nan(const union tgsi_exec_channel *chan) 138{ 139 assert(!util_is_inf_or_nan(chan->f[0])); 140 assert(!util_is_inf_or_nan(chan->f[1])); 141 assert(!util_is_inf_or_nan(chan->f[2])); 142 assert(!util_is_inf_or_nan(chan->f[3])); 143} 144#endif 145 146 147#ifdef DEBUG 148static void 149print_chan(const char *msg, const union tgsi_exec_channel *chan) 150{ 151 debug_printf("%s = {%f, %f, %f, %f}\n", 152 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 153} 154#endif 155 156 157#ifdef DEBUG 158static void 159print_temp(const struct tgsi_exec_machine *mach, uint index) 160{ 161 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 162 int i; 163 debug_printf("Temp[%u] =\n", index); 164 for (i = 0; i < 4; i++) { 165 debug_printf(" %c: { %f, %f, %f, %f }\n", 166 "XYZW"[i], 167 tmp->xyzw[i].f[0], 168 tmp->xyzw[i].f[1], 169 tmp->xyzw[i].f[2], 170 tmp->xyzw[i].f[3]); 171 } 172} 173#endif 174 175 176/** 177 * Check if there's a potential src/dst register data dependency when 178 * using SOA execution. 179 * Example: 180 * MOV T, T.yxwz; 181 * This would expand into: 182 * MOV t0, t1; 183 * MOV t1, t0; 184 * MOV t2, t3; 185 * MOV t3, t2; 186 * The second instruction will have the wrong value for t0 if executed as-is. 187 */ 188boolean 189tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 190{ 191 uint i, chan; 192 193 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 194 if (writemask == TGSI_WRITEMASK_X || 195 writemask == TGSI_WRITEMASK_Y || 196 writemask == TGSI_WRITEMASK_Z || 197 writemask == TGSI_WRITEMASK_W || 198 writemask == TGSI_WRITEMASK_NONE) { 199 /* no chance of data dependency */ 200 return FALSE; 201 } 202 203 /* loop over src regs */ 204 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 205 if ((inst->FullSrcRegisters[i].SrcRegister.File == 206 inst->FullDstRegisters[0].DstRegister.File) && 207 (inst->FullSrcRegisters[i].SrcRegister.Index == 208 inst->FullDstRegisters[0].DstRegister.Index)) { 209 /* loop over dest channels */ 210 uint channelsWritten = 0x0; 211 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 212 /* check if we're reading a channel that's been written */ 213 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); 214 if (channelsWritten & (1 << swizzle)) { 215 return TRUE; 216 } 217 218 channelsWritten |= (1 << chan); 219 } 220 } 221 } 222 return FALSE; 223} 224 225 226/** 227 * Initialize machine state by expanding tokens to full instructions, 228 * allocating temporary storage, setting up constants, etc. 229 * After this, we can call tgsi_exec_machine_run() many times. 230 */ 231void 232tgsi_exec_machine_bind_shader( 233 struct tgsi_exec_machine *mach, 234 const struct tgsi_token *tokens, 235 uint numSamplers, 236 struct tgsi_sampler **samplers) 237{ 238 uint k; 239 struct tgsi_parse_context parse; 240 struct tgsi_exec_labels *labels = &mach->Labels; 241 struct tgsi_full_instruction *instructions; 242 struct tgsi_full_declaration *declarations; 243 uint maxInstructions = 10, numInstructions = 0; 244 uint maxDeclarations = 10, numDeclarations = 0; 245 uint instno = 0; 246 247#if 0 248 tgsi_dump(tokens, 0); 249#endif 250 251 util_init_math(); 252 253 mach->Tokens = tokens; 254 mach->Samplers = samplers; 255 256 k = tgsi_parse_init (&parse, mach->Tokens); 257 if (k != TGSI_PARSE_OK) { 258 debug_printf( "Problem parsing!\n" ); 259 return; 260 } 261 262 mach->Processor = parse.FullHeader.Processor.Processor; 263 mach->ImmLimit = 0; 264 labels->count = 0; 265 266 declarations = (struct tgsi_full_declaration *) 267 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 268 269 if (!declarations) { 270 return; 271 } 272 273 instructions = (struct tgsi_full_instruction *) 274 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 275 276 if (!instructions) { 277 FREE( declarations ); 278 return; 279 } 280 281 while( !tgsi_parse_end_of_tokens( &parse ) ) { 282 uint pointer = parse.Position; 283 uint i; 284 285 tgsi_parse_token( &parse ); 286 switch( parse.FullToken.Token.Type ) { 287 case TGSI_TOKEN_TYPE_DECLARATION: 288 /* save expanded declaration */ 289 if (numDeclarations == maxDeclarations) { 290 declarations = REALLOC(declarations, 291 maxDeclarations 292 * sizeof(struct tgsi_full_declaration), 293 (maxDeclarations + 10) 294 * sizeof(struct tgsi_full_declaration)); 295 maxDeclarations += 10; 296 } 297 memcpy(declarations + numDeclarations, 298 &parse.FullToken.FullDeclaration, 299 sizeof(declarations[0])); 300 numDeclarations++; 301 break; 302 303 case TGSI_TOKEN_TYPE_IMMEDIATE: 304 { 305 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 306 assert( size <= 4 ); 307 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 308 309 for( i = 0; i < size; i++ ) { 310 mach->Imms[mach->ImmLimit][i] = 311 parse.FullToken.FullImmediate.u[i].Float; 312 } 313 mach->ImmLimit += 1; 314 } 315 break; 316 317 case TGSI_TOKEN_TYPE_INSTRUCTION: 318 assert( labels->count < MAX_LABELS ); 319 320 labels->labels[labels->count][0] = instno; 321 labels->labels[labels->count][1] = pointer; 322 labels->count++; 323 324 /* save expanded instruction */ 325 if (numInstructions == maxInstructions) { 326 instructions = REALLOC(instructions, 327 maxInstructions 328 * sizeof(struct tgsi_full_instruction), 329 (maxInstructions + 10) 330 * sizeof(struct tgsi_full_instruction)); 331 maxInstructions += 10; 332 } 333 334 if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { 335 uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 336 parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG; 337 /* XXX we only handle SOA dependencies properly for MOV/SWZ 338 * at this time! 339 */ 340 if (opcode != TGSI_OPCODE_MOV) { 341 debug_printf("Warning: SOA dependency in instruction" 342 " is not handled:\n"); 343 tgsi_dump_instruction(&parse.FullToken.FullInstruction, 344 numInstructions); 345 } 346 } 347 348 memcpy(instructions + numInstructions, 349 &parse.FullToken.FullInstruction, 350 sizeof(instructions[0])); 351 352 numInstructions++; 353 break; 354 355 default: 356 assert( 0 ); 357 } 358 } 359 tgsi_parse_free (&parse); 360 361 if (mach->Declarations) { 362 FREE( mach->Declarations ); 363 } 364 mach->Declarations = declarations; 365 mach->NumDeclarations = numDeclarations; 366 367 if (mach->Instructions) { 368 FREE( mach->Instructions ); 369 } 370 mach->Instructions = instructions; 371 mach->NumInstructions = numInstructions; 372} 373 374 375struct tgsi_exec_machine * 376tgsi_exec_machine_create( void ) 377{ 378 struct tgsi_exec_machine *mach; 379 uint i; 380 381 mach = align_malloc( sizeof *mach, 16 ); 382 if (!mach) 383 goto fail; 384 385 memset(mach, 0, sizeof(*mach)); 386 387 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 388 389 /* Setup constants. */ 390 for( i = 0; i < 4; i++ ) { 391 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 392 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 393 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 394 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 395 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 396 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 397 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 398 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 399 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 400 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 401 } 402 403#ifdef DEBUG 404 /* silence warnings */ 405 (void) print_chan; 406 (void) print_temp; 407#endif 408 409 return mach; 410 411fail: 412 align_free(mach); 413 return NULL; 414} 415 416 417void 418tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 419{ 420 if (mach) { 421 FREE(mach->Instructions); 422 FREE(mach->Declarations); 423 } 424 425 align_free(mach); 426} 427 428 429static void 430micro_abs( 431 union tgsi_exec_channel *dst, 432 const union tgsi_exec_channel *src ) 433{ 434 dst->f[0] = fabsf( src->f[0] ); 435 dst->f[1] = fabsf( src->f[1] ); 436 dst->f[2] = fabsf( src->f[2] ); 437 dst->f[3] = fabsf( src->f[3] ); 438} 439 440static void 441micro_add( 442 union tgsi_exec_channel *dst, 443 const union tgsi_exec_channel *src0, 444 const union tgsi_exec_channel *src1 ) 445{ 446 dst->f[0] = src0->f[0] + src1->f[0]; 447 dst->f[1] = src0->f[1] + src1->f[1]; 448 dst->f[2] = src0->f[2] + src1->f[2]; 449 dst->f[3] = src0->f[3] + src1->f[3]; 450} 451 452#if 0 453static void 454micro_iadd( 455 union tgsi_exec_channel *dst, 456 const union tgsi_exec_channel *src0, 457 const union tgsi_exec_channel *src1 ) 458{ 459 dst->i[0] = src0->i[0] + src1->i[0]; 460 dst->i[1] = src0->i[1] + src1->i[1]; 461 dst->i[2] = src0->i[2] + src1->i[2]; 462 dst->i[3] = src0->i[3] + src1->i[3]; 463} 464#endif 465 466static void 467micro_and( 468 union tgsi_exec_channel *dst, 469 const union tgsi_exec_channel *src0, 470 const union tgsi_exec_channel *src1 ) 471{ 472 dst->u[0] = src0->u[0] & src1->u[0]; 473 dst->u[1] = src0->u[1] & src1->u[1]; 474 dst->u[2] = src0->u[2] & src1->u[2]; 475 dst->u[3] = src0->u[3] & src1->u[3]; 476} 477 478static void 479micro_ceil( 480 union tgsi_exec_channel *dst, 481 const union tgsi_exec_channel *src ) 482{ 483 dst->f[0] = ceilf( src->f[0] ); 484 dst->f[1] = ceilf( src->f[1] ); 485 dst->f[2] = ceilf( src->f[2] ); 486 dst->f[3] = ceilf( src->f[3] ); 487} 488 489static void 490micro_cos( 491 union tgsi_exec_channel *dst, 492 const union tgsi_exec_channel *src ) 493{ 494 dst->f[0] = cosf( src->f[0] ); 495 dst->f[1] = cosf( src->f[1] ); 496 dst->f[2] = cosf( src->f[2] ); 497 dst->f[3] = cosf( src->f[3] ); 498} 499 500static void 501micro_ddx( 502 union tgsi_exec_channel *dst, 503 const union tgsi_exec_channel *src ) 504{ 505 dst->f[0] = 506 dst->f[1] = 507 dst->f[2] = 508 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 509} 510 511static void 512micro_ddy( 513 union tgsi_exec_channel *dst, 514 const union tgsi_exec_channel *src ) 515{ 516 dst->f[0] = 517 dst->f[1] = 518 dst->f[2] = 519 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 520} 521 522static void 523micro_div( 524 union tgsi_exec_channel *dst, 525 const union tgsi_exec_channel *src0, 526 const union tgsi_exec_channel *src1 ) 527{ 528 if (src1->f[0] != 0) { 529 dst->f[0] = src0->f[0] / src1->f[0]; 530 } 531 if (src1->f[1] != 0) { 532 dst->f[1] = src0->f[1] / src1->f[1]; 533 } 534 if (src1->f[2] != 0) { 535 dst->f[2] = src0->f[2] / src1->f[2]; 536 } 537 if (src1->f[3] != 0) { 538 dst->f[3] = src0->f[3] / src1->f[3]; 539 } 540} 541 542#if 0 543static void 544micro_udiv( 545 union tgsi_exec_channel *dst, 546 const union tgsi_exec_channel *src0, 547 const union tgsi_exec_channel *src1 ) 548{ 549 dst->u[0] = src0->u[0] / src1->u[0]; 550 dst->u[1] = src0->u[1] / src1->u[1]; 551 dst->u[2] = src0->u[2] / src1->u[2]; 552 dst->u[3] = src0->u[3] / src1->u[3]; 553} 554#endif 555 556static void 557micro_eq( 558 union tgsi_exec_channel *dst, 559 const union tgsi_exec_channel *src0, 560 const union tgsi_exec_channel *src1, 561 const union tgsi_exec_channel *src2, 562 const union tgsi_exec_channel *src3 ) 563{ 564 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 565 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 566 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 567 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 568} 569 570#if 0 571static void 572micro_ieq( 573 union tgsi_exec_channel *dst, 574 const union tgsi_exec_channel *src0, 575 const union tgsi_exec_channel *src1, 576 const union tgsi_exec_channel *src2, 577 const union tgsi_exec_channel *src3 ) 578{ 579 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 580 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 581 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 582 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 583} 584#endif 585 586static void 587micro_exp2( 588 union tgsi_exec_channel *dst, 589 const union tgsi_exec_channel *src) 590{ 591#if FAST_MATH 592 dst->f[0] = util_fast_exp2( src->f[0] ); 593 dst->f[1] = util_fast_exp2( src->f[1] ); 594 dst->f[2] = util_fast_exp2( src->f[2] ); 595 dst->f[3] = util_fast_exp2( src->f[3] ); 596#else 597 dst->f[0] = powf( 2.0f, src->f[0] ); 598 dst->f[1] = powf( 2.0f, src->f[1] ); 599 dst->f[2] = powf( 2.0f, src->f[2] ); 600 dst->f[3] = powf( 2.0f, src->f[3] ); 601#endif 602} 603 604#if 0 605static void 606micro_f2ut( 607 union tgsi_exec_channel *dst, 608 const union tgsi_exec_channel *src ) 609{ 610 dst->u[0] = (uint) src->f[0]; 611 dst->u[1] = (uint) src->f[1]; 612 dst->u[2] = (uint) src->f[2]; 613 dst->u[3] = (uint) src->f[3]; 614} 615#endif 616 617static void 618micro_float_clamp(union tgsi_exec_channel *dst, 619 const union tgsi_exec_channel *src) 620{ 621 uint i; 622 623 for (i = 0; i < 4; i++) { 624 if (src->f[i] > 0.0f) { 625 if (src->f[i] > 1.884467e+019f) 626 dst->f[i] = 1.884467e+019f; 627 else if (src->f[i] < 5.42101e-020f) 628 dst->f[i] = 5.42101e-020f; 629 else 630 dst->f[i] = src->f[i]; 631 } 632 else { 633 if (src->f[i] < -1.884467e+019f) 634 dst->f[i] = -1.884467e+019f; 635 else if (src->f[i] > -5.42101e-020f) 636 dst->f[i] = -5.42101e-020f; 637 else 638 dst->f[i] = src->f[i]; 639 } 640 } 641} 642 643static void 644micro_flr( 645 union tgsi_exec_channel *dst, 646 const union tgsi_exec_channel *src ) 647{ 648 dst->f[0] = floorf( src->f[0] ); 649 dst->f[1] = floorf( src->f[1] ); 650 dst->f[2] = floorf( src->f[2] ); 651 dst->f[3] = floorf( src->f[3] ); 652} 653 654static void 655micro_frc( 656 union tgsi_exec_channel *dst, 657 const union tgsi_exec_channel *src ) 658{ 659 dst->f[0] = src->f[0] - floorf( src->f[0] ); 660 dst->f[1] = src->f[1] - floorf( src->f[1] ); 661 dst->f[2] = src->f[2] - floorf( src->f[2] ); 662 dst->f[3] = src->f[3] - floorf( src->f[3] ); 663} 664 665static void 666micro_i2f( 667 union tgsi_exec_channel *dst, 668 const union tgsi_exec_channel *src ) 669{ 670 dst->f[0] = (float) src->i[0]; 671 dst->f[1] = (float) src->i[1]; 672 dst->f[2] = (float) src->i[2]; 673 dst->f[3] = (float) src->i[3]; 674} 675 676static void 677micro_lg2( 678 union tgsi_exec_channel *dst, 679 const union tgsi_exec_channel *src ) 680{ 681#if FAST_MATH 682 dst->f[0] = util_fast_log2( src->f[0] ); 683 dst->f[1] = util_fast_log2( src->f[1] ); 684 dst->f[2] = util_fast_log2( src->f[2] ); 685 dst->f[3] = util_fast_log2( src->f[3] ); 686#else 687 dst->f[0] = logf( src->f[0] ) * 1.442695f; 688 dst->f[1] = logf( src->f[1] ) * 1.442695f; 689 dst->f[2] = logf( src->f[2] ) * 1.442695f; 690 dst->f[3] = logf( src->f[3] ) * 1.442695f; 691#endif 692} 693 694static void 695micro_le( 696 union tgsi_exec_channel *dst, 697 const union tgsi_exec_channel *src0, 698 const union tgsi_exec_channel *src1, 699 const union tgsi_exec_channel *src2, 700 const union tgsi_exec_channel *src3 ) 701{ 702 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 703 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 704 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 705 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 706} 707 708static void 709micro_lt( 710 union tgsi_exec_channel *dst, 711 const union tgsi_exec_channel *src0, 712 const union tgsi_exec_channel *src1, 713 const union tgsi_exec_channel *src2, 714 const union tgsi_exec_channel *src3 ) 715{ 716 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 717 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 718 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 719 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 720} 721 722#if 0 723static void 724micro_ilt( 725 union tgsi_exec_channel *dst, 726 const union tgsi_exec_channel *src0, 727 const union tgsi_exec_channel *src1, 728 const union tgsi_exec_channel *src2, 729 const union tgsi_exec_channel *src3 ) 730{ 731 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 732 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 733 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 734 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 735} 736#endif 737 738#if 0 739static void 740micro_ult( 741 union tgsi_exec_channel *dst, 742 const union tgsi_exec_channel *src0, 743 const union tgsi_exec_channel *src1, 744 const union tgsi_exec_channel *src2, 745 const union tgsi_exec_channel *src3 ) 746{ 747 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 748 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 749 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 750 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 751} 752#endif 753 754static void 755micro_max( 756 union tgsi_exec_channel *dst, 757 const union tgsi_exec_channel *src0, 758 const union tgsi_exec_channel *src1 ) 759{ 760 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 761 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 762 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 763 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 764} 765 766#if 0 767static void 768micro_imax( 769 union tgsi_exec_channel *dst, 770 const union tgsi_exec_channel *src0, 771 const union tgsi_exec_channel *src1 ) 772{ 773 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 774 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 775 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 776 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 777} 778#endif 779 780#if 0 781static void 782micro_umax( 783 union tgsi_exec_channel *dst, 784 const union tgsi_exec_channel *src0, 785 const union tgsi_exec_channel *src1 ) 786{ 787 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 788 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 789 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 790 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 791} 792#endif 793 794static void 795micro_min( 796 union tgsi_exec_channel *dst, 797 const union tgsi_exec_channel *src0, 798 const union tgsi_exec_channel *src1 ) 799{ 800 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 801 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 802 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 803 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 804} 805 806#if 0 807static void 808micro_imin( 809 union tgsi_exec_channel *dst, 810 const union tgsi_exec_channel *src0, 811 const union tgsi_exec_channel *src1 ) 812{ 813 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 814 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 815 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 816 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 817} 818#endif 819 820#if 0 821static void 822micro_umin( 823 union tgsi_exec_channel *dst, 824 const union tgsi_exec_channel *src0, 825 const union tgsi_exec_channel *src1 ) 826{ 827 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 828 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 829 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 830 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 831} 832#endif 833 834#if 0 835static void 836micro_umod( 837 union tgsi_exec_channel *dst, 838 const union tgsi_exec_channel *src0, 839 const union tgsi_exec_channel *src1 ) 840{ 841 dst->u[0] = src0->u[0] % src1->u[0]; 842 dst->u[1] = src0->u[1] % src1->u[1]; 843 dst->u[2] = src0->u[2] % src1->u[2]; 844 dst->u[3] = src0->u[3] % src1->u[3]; 845} 846#endif 847 848static void 849micro_mul( 850 union tgsi_exec_channel *dst, 851 const union tgsi_exec_channel *src0, 852 const union tgsi_exec_channel *src1 ) 853{ 854 dst->f[0] = src0->f[0] * src1->f[0]; 855 dst->f[1] = src0->f[1] * src1->f[1]; 856 dst->f[2] = src0->f[2] * src1->f[2]; 857 dst->f[3] = src0->f[3] * src1->f[3]; 858} 859 860#if 0 861static void 862micro_imul( 863 union tgsi_exec_channel *dst, 864 const union tgsi_exec_channel *src0, 865 const union tgsi_exec_channel *src1 ) 866{ 867 dst->i[0] = src0->i[0] * src1->i[0]; 868 dst->i[1] = src0->i[1] * src1->i[1]; 869 dst->i[2] = src0->i[2] * src1->i[2]; 870 dst->i[3] = src0->i[3] * src1->i[3]; 871} 872#endif 873 874#if 0 875static void 876micro_imul64( 877 union tgsi_exec_channel *dst0, 878 union tgsi_exec_channel *dst1, 879 const union tgsi_exec_channel *src0, 880 const union tgsi_exec_channel *src1 ) 881{ 882 dst1->i[0] = src0->i[0] * src1->i[0]; 883 dst1->i[1] = src0->i[1] * src1->i[1]; 884 dst1->i[2] = src0->i[2] * src1->i[2]; 885 dst1->i[3] = src0->i[3] * src1->i[3]; 886 dst0->i[0] = 0; 887 dst0->i[1] = 0; 888 dst0->i[2] = 0; 889 dst0->i[3] = 0; 890} 891#endif 892 893#if 0 894static void 895micro_umul64( 896 union tgsi_exec_channel *dst0, 897 union tgsi_exec_channel *dst1, 898 const union tgsi_exec_channel *src0, 899 const union tgsi_exec_channel *src1 ) 900{ 901 dst1->u[0] = src0->u[0] * src1->u[0]; 902 dst1->u[1] = src0->u[1] * src1->u[1]; 903 dst1->u[2] = src0->u[2] * src1->u[2]; 904 dst1->u[3] = src0->u[3] * src1->u[3]; 905 dst0->u[0] = 0; 906 dst0->u[1] = 0; 907 dst0->u[2] = 0; 908 dst0->u[3] = 0; 909} 910#endif 911 912 913#if 0 914static void 915micro_movc( 916 union tgsi_exec_channel *dst, 917 const union tgsi_exec_channel *src0, 918 const union tgsi_exec_channel *src1, 919 const union tgsi_exec_channel *src2 ) 920{ 921 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 922 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 923 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 924 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 925} 926#endif 927 928static void 929micro_neg( 930 union tgsi_exec_channel *dst, 931 const union tgsi_exec_channel *src ) 932{ 933 dst->f[0] = -src->f[0]; 934 dst->f[1] = -src->f[1]; 935 dst->f[2] = -src->f[2]; 936 dst->f[3] = -src->f[3]; 937} 938 939#if 0 940static void 941micro_ineg( 942 union tgsi_exec_channel *dst, 943 const union tgsi_exec_channel *src ) 944{ 945 dst->i[0] = -src->i[0]; 946 dst->i[1] = -src->i[1]; 947 dst->i[2] = -src->i[2]; 948 dst->i[3] = -src->i[3]; 949} 950#endif 951 952static void 953micro_not( 954 union tgsi_exec_channel *dst, 955 const union tgsi_exec_channel *src ) 956{ 957 dst->u[0] = ~src->u[0]; 958 dst->u[1] = ~src->u[1]; 959 dst->u[2] = ~src->u[2]; 960 dst->u[3] = ~src->u[3]; 961} 962 963static void 964micro_or( 965 union tgsi_exec_channel *dst, 966 const union tgsi_exec_channel *src0, 967 const union tgsi_exec_channel *src1 ) 968{ 969 dst->u[0] = src0->u[0] | src1->u[0]; 970 dst->u[1] = src0->u[1] | src1->u[1]; 971 dst->u[2] = src0->u[2] | src1->u[2]; 972 dst->u[3] = src0->u[3] | src1->u[3]; 973} 974 975static void 976micro_pow( 977 union tgsi_exec_channel *dst, 978 const union tgsi_exec_channel *src0, 979 const union tgsi_exec_channel *src1 ) 980{ 981#if FAST_MATH 982 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 983 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 984 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 985 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 986#else 987 dst->f[0] = powf( src0->f[0], src1->f[0] ); 988 dst->f[1] = powf( src0->f[1], src1->f[1] ); 989 dst->f[2] = powf( src0->f[2], src1->f[2] ); 990 dst->f[3] = powf( src0->f[3], src1->f[3] ); 991#endif 992} 993 994static void 995micro_rnd( 996 union tgsi_exec_channel *dst, 997 const union tgsi_exec_channel *src ) 998{ 999 dst->f[0] = floorf( src->f[0] + 0.5f ); 1000 dst->f[1] = floorf( src->f[1] + 0.5f ); 1001 dst->f[2] = floorf( src->f[2] + 0.5f ); 1002 dst->f[3] = floorf( src->f[3] + 0.5f ); 1003} 1004 1005static void 1006micro_sgn( 1007 union tgsi_exec_channel *dst, 1008 const union tgsi_exec_channel *src ) 1009{ 1010 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 1011 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 1012 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 1013 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1014} 1015 1016static void 1017micro_shl( 1018 union tgsi_exec_channel *dst, 1019 const union tgsi_exec_channel *src0, 1020 const union tgsi_exec_channel *src1 ) 1021{ 1022 dst->i[0] = src0->i[0] << src1->i[0]; 1023 dst->i[1] = src0->i[1] << src1->i[1]; 1024 dst->i[2] = src0->i[2] << src1->i[2]; 1025 dst->i[3] = src0->i[3] << src1->i[3]; 1026} 1027 1028static void 1029micro_ishr( 1030 union tgsi_exec_channel *dst, 1031 const union tgsi_exec_channel *src0, 1032 const union tgsi_exec_channel *src1 ) 1033{ 1034 dst->i[0] = src0->i[0] >> src1->i[0]; 1035 dst->i[1] = src0->i[1] >> src1->i[1]; 1036 dst->i[2] = src0->i[2] >> src1->i[2]; 1037 dst->i[3] = src0->i[3] >> src1->i[3]; 1038} 1039 1040static void 1041micro_trunc( 1042 union tgsi_exec_channel *dst, 1043 const union tgsi_exec_channel *src0 ) 1044{ 1045 dst->f[0] = (float) (int) src0->f[0]; 1046 dst->f[1] = (float) (int) src0->f[1]; 1047 dst->f[2] = (float) (int) src0->f[2]; 1048 dst->f[3] = (float) (int) src0->f[3]; 1049} 1050 1051#if 0 1052static void 1053micro_ushr( 1054 union tgsi_exec_channel *dst, 1055 const union tgsi_exec_channel *src0, 1056 const union tgsi_exec_channel *src1 ) 1057{ 1058 dst->u[0] = src0->u[0] >> src1->u[0]; 1059 dst->u[1] = src0->u[1] >> src1->u[1]; 1060 dst->u[2] = src0->u[2] >> src1->u[2]; 1061 dst->u[3] = src0->u[3] >> src1->u[3]; 1062} 1063#endif 1064 1065static void 1066micro_sin( 1067 union tgsi_exec_channel *dst, 1068 const union tgsi_exec_channel *src ) 1069{ 1070 dst->f[0] = sinf( src->f[0] ); 1071 dst->f[1] = sinf( src->f[1] ); 1072 dst->f[2] = sinf( src->f[2] ); 1073 dst->f[3] = sinf( src->f[3] ); 1074} 1075 1076static void 1077micro_sqrt( union tgsi_exec_channel *dst, 1078 const union tgsi_exec_channel *src ) 1079{ 1080 dst->f[0] = sqrtf( src->f[0] ); 1081 dst->f[1] = sqrtf( src->f[1] ); 1082 dst->f[2] = sqrtf( src->f[2] ); 1083 dst->f[3] = sqrtf( src->f[3] ); 1084} 1085 1086static void 1087micro_sub( 1088 union tgsi_exec_channel *dst, 1089 const union tgsi_exec_channel *src0, 1090 const union tgsi_exec_channel *src1 ) 1091{ 1092 dst->f[0] = src0->f[0] - src1->f[0]; 1093 dst->f[1] = src0->f[1] - src1->f[1]; 1094 dst->f[2] = src0->f[2] - src1->f[2]; 1095 dst->f[3] = src0->f[3] - src1->f[3]; 1096} 1097 1098#if 0 1099static void 1100micro_u2f( 1101 union tgsi_exec_channel *dst, 1102 const union tgsi_exec_channel *src ) 1103{ 1104 dst->f[0] = (float) src->u[0]; 1105 dst->f[1] = (float) src->u[1]; 1106 dst->f[2] = (float) src->u[2]; 1107 dst->f[3] = (float) src->u[3]; 1108} 1109#endif 1110 1111static void 1112micro_xor( 1113 union tgsi_exec_channel *dst, 1114 const union tgsi_exec_channel *src0, 1115 const union tgsi_exec_channel *src1 ) 1116{ 1117 dst->u[0] = src0->u[0] ^ src1->u[0]; 1118 dst->u[1] = src0->u[1] ^ src1->u[1]; 1119 dst->u[2] = src0->u[2] ^ src1->u[2]; 1120 dst->u[3] = src0->u[3] ^ src1->u[3]; 1121} 1122 1123static void 1124fetch_src_file_channel( 1125 const struct tgsi_exec_machine *mach, 1126 const uint file, 1127 const uint swizzle, 1128 const union tgsi_exec_channel *index, 1129 union tgsi_exec_channel *chan ) 1130{ 1131 switch( swizzle ) { 1132 case TGSI_SWIZZLE_X: 1133 case TGSI_SWIZZLE_Y: 1134 case TGSI_SWIZZLE_Z: 1135 case TGSI_SWIZZLE_W: 1136 switch( file ) { 1137 case TGSI_FILE_CONSTANT: 1138 assert(mach->Consts); 1139 if (index->i[0] < 0) 1140 chan->f[0] = 0.0f; 1141 else 1142 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1143 if (index->i[1] < 0) 1144 chan->f[1] = 0.0f; 1145 else 1146 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1147 if (index->i[2] < 0) 1148 chan->f[2] = 0.0f; 1149 else 1150 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1151 if (index->i[3] < 0) 1152 chan->f[3] = 0.0f; 1153 else 1154 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1155 break; 1156 1157 case TGSI_FILE_INPUT: 1158 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1159 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1160 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1161 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1162 break; 1163 1164 case TGSI_FILE_TEMPORARY: 1165 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1166 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1167 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1168 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1169 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1170 break; 1171 1172 case TGSI_FILE_IMMEDIATE: 1173 assert( index->i[0] < (int) mach->ImmLimit ); 1174 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1175 assert( index->i[1] < (int) mach->ImmLimit ); 1176 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1177 assert( index->i[2] < (int) mach->ImmLimit ); 1178 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1179 assert( index->i[3] < (int) mach->ImmLimit ); 1180 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1181 break; 1182 1183 case TGSI_FILE_ADDRESS: 1184 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1185 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1186 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1187 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1188 break; 1189 1190 case TGSI_FILE_OUTPUT: 1191 /* vertex/fragment output vars can be read too */ 1192 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1193 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1194 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1195 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1196 break; 1197 1198 default: 1199 assert( 0 ); 1200 } 1201 break; 1202 1203 default: 1204 assert( 0 ); 1205 } 1206} 1207 1208static void 1209fetch_source( 1210 const struct tgsi_exec_machine *mach, 1211 union tgsi_exec_channel *chan, 1212 const struct tgsi_full_src_register *reg, 1213 const uint chan_index ) 1214{ 1215 union tgsi_exec_channel index; 1216 uint swizzle; 1217 1218 /* We start with a direct index into a register file. 1219 * 1220 * file[1], 1221 * where: 1222 * file = SrcRegister.File 1223 * [1] = SrcRegister.Index 1224 */ 1225 index.i[0] = 1226 index.i[1] = 1227 index.i[2] = 1228 index.i[3] = reg->SrcRegister.Index; 1229 1230 /* There is an extra source register that indirectly subscripts 1231 * a register file. The direct index now becomes an offset 1232 * that is being added to the indirect register. 1233 * 1234 * file[ind[2].x+1], 1235 * where: 1236 * ind = SrcRegisterInd.File 1237 * [2] = SrcRegisterInd.Index 1238 * .x = SrcRegisterInd.SwizzleX 1239 */ 1240 if (reg->SrcRegister.Indirect) { 1241 union tgsi_exec_channel index2; 1242 union tgsi_exec_channel indir_index; 1243 const uint execmask = mach->ExecMask; 1244 uint i; 1245 1246 /* which address register (always zero now) */ 1247 index2.i[0] = 1248 index2.i[1] = 1249 index2.i[2] = 1250 index2.i[3] = reg->SrcRegisterInd.Index; 1251 1252 /* get current value of address register[swizzle] */ 1253 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1254 fetch_src_file_channel( 1255 mach, 1256 reg->SrcRegisterInd.File, 1257 swizzle, 1258 &index2, 1259 &indir_index ); 1260 1261 /* add value of address register to the offset */ 1262 index.i[0] += (int) indir_index.f[0]; 1263 index.i[1] += (int) indir_index.f[1]; 1264 index.i[2] += (int) indir_index.f[2]; 1265 index.i[3] += (int) indir_index.f[3]; 1266 1267 /* for disabled execution channels, zero-out the index to 1268 * avoid using a potential garbage value. 1269 */ 1270 for (i = 0; i < QUAD_SIZE; i++) { 1271 if ((execmask & (1 << i)) == 0) 1272 index.i[i] = 0; 1273 } 1274 } 1275 1276 /* There is an extra source register that is a second 1277 * subscript to a register file. Effectively it means that 1278 * the register file is actually a 2D array of registers. 1279 * 1280 * file[1][3] == file[1*sizeof(file[1])+3], 1281 * where: 1282 * [3] = SrcRegisterDim.Index 1283 */ 1284 if (reg->SrcRegister.Dimension) { 1285 /* The size of the first-order array depends on the register file type. 1286 * We need to multiply the index to the first array to get an effective, 1287 * "flat" index that points to the beginning of the second-order array. 1288 */ 1289 switch (reg->SrcRegister.File) { 1290 case TGSI_FILE_INPUT: 1291 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1292 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1293 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1294 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1295 break; 1296 case TGSI_FILE_CONSTANT: 1297 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1298 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1299 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1300 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1301 break; 1302 default: 1303 assert( 0 ); 1304 } 1305 1306 index.i[0] += reg->SrcRegisterDim.Index; 1307 index.i[1] += reg->SrcRegisterDim.Index; 1308 index.i[2] += reg->SrcRegisterDim.Index; 1309 index.i[3] += reg->SrcRegisterDim.Index; 1310 1311 /* Again, the second subscript index can be addressed indirectly 1312 * identically to the first one. 1313 * Nothing stops us from indirectly addressing the indirect register, 1314 * but there is no need for that, so we won't exercise it. 1315 * 1316 * file[1][ind[4].y+3], 1317 * where: 1318 * ind = SrcRegisterDimInd.File 1319 * [4] = SrcRegisterDimInd.Index 1320 * .y = SrcRegisterDimInd.SwizzleX 1321 */ 1322 if (reg->SrcRegisterDim.Indirect) { 1323 union tgsi_exec_channel index2; 1324 union tgsi_exec_channel indir_index; 1325 const uint execmask = mach->ExecMask; 1326 uint i; 1327 1328 index2.i[0] = 1329 index2.i[1] = 1330 index2.i[2] = 1331 index2.i[3] = reg->SrcRegisterDimInd.Index; 1332 1333 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1334 fetch_src_file_channel( 1335 mach, 1336 reg->SrcRegisterDimInd.File, 1337 swizzle, 1338 &index2, 1339 &indir_index ); 1340 1341 index.i[0] += (int) indir_index.f[0]; 1342 index.i[1] += (int) indir_index.f[1]; 1343 index.i[2] += (int) indir_index.f[2]; 1344 index.i[3] += (int) indir_index.f[3]; 1345 1346 /* for disabled execution channels, zero-out the index to 1347 * avoid using a potential garbage value. 1348 */ 1349 for (i = 0; i < QUAD_SIZE; i++) { 1350 if ((execmask & (1 << i)) == 0) 1351 index.i[i] = 0; 1352 } 1353 } 1354 1355 /* If by any chance there was a need for a 3D array of register 1356 * files, we would have to check whether SrcRegisterDim is followed 1357 * by a dimension register and continue the saga. 1358 */ 1359 } 1360 1361 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1362 fetch_src_file_channel( 1363 mach, 1364 reg->SrcRegister.File, 1365 swizzle, 1366 &index, 1367 chan ); 1368 1369 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1370 case TGSI_UTIL_SIGN_CLEAR: 1371 micro_abs( chan, chan ); 1372 break; 1373 1374 case TGSI_UTIL_SIGN_SET: 1375 micro_abs( chan, chan ); 1376 micro_neg( chan, chan ); 1377 break; 1378 1379 case TGSI_UTIL_SIGN_TOGGLE: 1380 micro_neg( chan, chan ); 1381 break; 1382 1383 case TGSI_UTIL_SIGN_KEEP: 1384 break; 1385 } 1386 1387 if (reg->SrcRegisterExtMod.Complement) { 1388 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1389 } 1390} 1391 1392static void 1393store_dest( 1394 struct tgsi_exec_machine *mach, 1395 const union tgsi_exec_channel *chan, 1396 const struct tgsi_full_dst_register *reg, 1397 const struct tgsi_full_instruction *inst, 1398 uint chan_index ) 1399{ 1400 uint i; 1401 union tgsi_exec_channel null; 1402 union tgsi_exec_channel *dst; 1403 uint execmask = mach->ExecMask; 1404 int offset = 0; /* indirection offset */ 1405 int index; 1406 1407#ifdef DEBUG 1408 check_inf_or_nan(chan); 1409#endif 1410 1411 /* There is an extra source register that indirectly subscripts 1412 * a register file. The direct index now becomes an offset 1413 * that is being added to the indirect register. 1414 * 1415 * file[ind[2].x+1], 1416 * where: 1417 * ind = DstRegisterInd.File 1418 * [2] = DstRegisterInd.Index 1419 * .x = DstRegisterInd.SwizzleX 1420 */ 1421 if (reg->DstRegister.Indirect) { 1422 union tgsi_exec_channel index; 1423 union tgsi_exec_channel indir_index; 1424 uint swizzle; 1425 1426 /* which address register (always zero for now) */ 1427 index.i[0] = 1428 index.i[1] = 1429 index.i[2] = 1430 index.i[3] = reg->DstRegisterInd.Index; 1431 1432 /* get current value of address register[swizzle] */ 1433 swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); 1434 1435 /* fetch values from the address/indirection register */ 1436 fetch_src_file_channel( 1437 mach, 1438 reg->DstRegisterInd.File, 1439 swizzle, 1440 &index, 1441 &indir_index ); 1442 1443 /* save indirection offset */ 1444 offset = (int) indir_index.f[0]; 1445 } 1446 1447 switch (reg->DstRegister.File) { 1448 case TGSI_FILE_NULL: 1449 dst = &null; 1450 break; 1451 1452 case TGSI_FILE_OUTPUT: 1453 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1454 + reg->DstRegister.Index; 1455 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1456 break; 1457 1458 case TGSI_FILE_TEMPORARY: 1459 index = reg->DstRegister.Index; 1460 assert( index < TGSI_EXEC_NUM_TEMPS ); 1461 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1462 break; 1463 1464 case TGSI_FILE_ADDRESS: 1465 index = reg->DstRegister.Index; 1466 dst = &mach->Addrs[index].xyzw[chan_index]; 1467 break; 1468 1469 default: 1470 assert( 0 ); 1471 return; 1472 } 1473 1474 if (inst->InstructionExtNv.CondFlowEnable) { 1475 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1476 uint swizzle; 1477 uint shift; 1478 uint mask; 1479 uint test; 1480 1481 /* Only CC0 supported. 1482 */ 1483 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1484 1485 switch (chan_index) { 1486 case CHAN_X: 1487 swizzle = inst->InstructionExtNv.CondSwizzleX; 1488 break; 1489 case CHAN_Y: 1490 swizzle = inst->InstructionExtNv.CondSwizzleY; 1491 break; 1492 case CHAN_Z: 1493 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1494 break; 1495 case CHAN_W: 1496 swizzle = inst->InstructionExtNv.CondSwizzleW; 1497 break; 1498 default: 1499 assert( 0 ); 1500 return; 1501 } 1502 1503 switch (swizzle) { 1504 case TGSI_SWIZZLE_X: 1505 shift = TGSI_EXEC_CC_X_SHIFT; 1506 mask = TGSI_EXEC_CC_X_MASK; 1507 break; 1508 case TGSI_SWIZZLE_Y: 1509 shift = TGSI_EXEC_CC_Y_SHIFT; 1510 mask = TGSI_EXEC_CC_Y_MASK; 1511 break; 1512 case TGSI_SWIZZLE_Z: 1513 shift = TGSI_EXEC_CC_Z_SHIFT; 1514 mask = TGSI_EXEC_CC_Z_MASK; 1515 break; 1516 case TGSI_SWIZZLE_W: 1517 shift = TGSI_EXEC_CC_W_SHIFT; 1518 mask = TGSI_EXEC_CC_W_MASK; 1519 break; 1520 default: 1521 assert( 0 ); 1522 return; 1523 } 1524 1525 switch (inst->InstructionExtNv.CondMask) { 1526 case TGSI_CC_GT: 1527 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1528 for (i = 0; i < QUAD_SIZE; i++) 1529 if (cc->u[i] & test) 1530 execmask &= ~(1 << i); 1531 break; 1532 1533 case TGSI_CC_EQ: 1534 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1535 for (i = 0; i < QUAD_SIZE; i++) 1536 if (cc->u[i] & test) 1537 execmask &= ~(1 << i); 1538 break; 1539 1540 case TGSI_CC_LT: 1541 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1542 for (i = 0; i < QUAD_SIZE; i++) 1543 if (cc->u[i] & test) 1544 execmask &= ~(1 << i); 1545 break; 1546 1547 case TGSI_CC_GE: 1548 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1549 for (i = 0; i < QUAD_SIZE; i++) 1550 if (cc->u[i] & test) 1551 execmask &= ~(1 << i); 1552 break; 1553 1554 case TGSI_CC_LE: 1555 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1556 for (i = 0; i < QUAD_SIZE; i++) 1557 if (cc->u[i] & test) 1558 execmask &= ~(1 << i); 1559 break; 1560 1561 case TGSI_CC_NE: 1562 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1563 for (i = 0; i < QUAD_SIZE; i++) 1564 if (cc->u[i] & test) 1565 execmask &= ~(1 << i); 1566 break; 1567 1568 case TGSI_CC_TR: 1569 break; 1570 1571 case TGSI_CC_FL: 1572 for (i = 0; i < QUAD_SIZE; i++) 1573 execmask &= ~(1 << i); 1574 break; 1575 1576 default: 1577 assert( 0 ); 1578 return; 1579 } 1580 } 1581 1582 switch (inst->Instruction.Saturate) { 1583 case TGSI_SAT_NONE: 1584 for (i = 0; i < QUAD_SIZE; i++) 1585 if (execmask & (1 << i)) 1586 dst->i[i] = chan->i[i]; 1587 break; 1588 1589 case TGSI_SAT_ZERO_ONE: 1590 for (i = 0; i < QUAD_SIZE; i++) 1591 if (execmask & (1 << i)) { 1592 if (chan->f[i] < 0.0f) 1593 dst->f[i] = 0.0f; 1594 else if (chan->f[i] > 1.0f) 1595 dst->f[i] = 1.0f; 1596 else 1597 dst->i[i] = chan->i[i]; 1598 } 1599 break; 1600 1601 case TGSI_SAT_MINUS_PLUS_ONE: 1602 for (i = 0; i < QUAD_SIZE; i++) 1603 if (execmask & (1 << i)) { 1604 if (chan->f[i] < -1.0f) 1605 dst->f[i] = -1.0f; 1606 else if (chan->f[i] > 1.0f) 1607 dst->f[i] = 1.0f; 1608 else 1609 dst->i[i] = chan->i[i]; 1610 } 1611 break; 1612 1613 default: 1614 assert( 0 ); 1615 } 1616 1617 if (inst->InstructionExtNv.CondDstUpdate) { 1618 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1619 uint shift; 1620 uint mask; 1621 1622 /* Only CC0 supported. 1623 */ 1624 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1625 1626 switch (chan_index) { 1627 case CHAN_X: 1628 shift = TGSI_EXEC_CC_X_SHIFT; 1629 mask = ~TGSI_EXEC_CC_X_MASK; 1630 break; 1631 case CHAN_Y: 1632 shift = TGSI_EXEC_CC_Y_SHIFT; 1633 mask = ~TGSI_EXEC_CC_Y_MASK; 1634 break; 1635 case CHAN_Z: 1636 shift = TGSI_EXEC_CC_Z_SHIFT; 1637 mask = ~TGSI_EXEC_CC_Z_MASK; 1638 break; 1639 case CHAN_W: 1640 shift = TGSI_EXEC_CC_W_SHIFT; 1641 mask = ~TGSI_EXEC_CC_W_MASK; 1642 break; 1643 default: 1644 assert( 0 ); 1645 return; 1646 } 1647 1648 for (i = 0; i < QUAD_SIZE; i++) 1649 if (execmask & (1 << i)) { 1650 cc->u[i] &= mask; 1651 if (dst->f[i] < 0.0f) 1652 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1653 else if (dst->f[i] > 0.0f) 1654 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1655 else if (dst->f[i] == 0.0f) 1656 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1657 else 1658 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1659 } 1660 } 1661} 1662 1663#define FETCH(VAL,INDEX,CHAN)\ 1664 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1665 1666#define STORE(VAL,INDEX,CHAN)\ 1667 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1668 1669 1670/** 1671 * Execute ARB-style KIL which is predicated by a src register. 1672 * Kill fragment if any of the four values is less than zero. 1673 */ 1674static void 1675exec_kil(struct tgsi_exec_machine *mach, 1676 const struct tgsi_full_instruction *inst) 1677{ 1678 uint uniquemask; 1679 uint chan_index; 1680 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1681 union tgsi_exec_channel r[1]; 1682 1683 /* This mask stores component bits that were already tested. */ 1684 uniquemask = 0; 1685 1686 for (chan_index = 0; chan_index < 4; chan_index++) 1687 { 1688 uint swizzle; 1689 uint i; 1690 1691 /* unswizzle channel */ 1692 swizzle = tgsi_util_get_full_src_register_swizzle ( 1693 &inst->FullSrcRegisters[0], 1694 chan_index); 1695 1696 /* check if the component has not been already tested */ 1697 if (uniquemask & (1 << swizzle)) 1698 continue; 1699 uniquemask |= 1 << swizzle; 1700 1701 FETCH(&r[0], 0, chan_index); 1702 for (i = 0; i < 4; i++) 1703 if (r[0].f[i] < 0.0f) 1704 kilmask |= 1 << i; 1705 } 1706 1707 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1708} 1709 1710/** 1711 * Execute NVIDIA-style KIL which is predicated by a condition code. 1712 * Kill fragment if the condition code is TRUE. 1713 */ 1714static void 1715exec_kilp(struct tgsi_exec_machine *mach, 1716 const struct tgsi_full_instruction *inst) 1717{ 1718 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1719 1720 if (inst->InstructionExtNv.CondFlowEnable) { 1721 uint swizzle[4]; 1722 uint chan_index; 1723 1724 kilmask = 0x0; 1725 1726 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1727 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1728 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1729 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1730 1731 for (chan_index = 0; chan_index < 4; chan_index++) 1732 { 1733 uint i; 1734 1735 for (i = 0; i < 4; i++) { 1736 /* TODO: evaluate the condition code */ 1737 if (0) 1738 kilmask |= 1 << i; 1739 } 1740 } 1741 } 1742 else { 1743 /* "unconditional" kil */ 1744 kilmask = mach->ExecMask; 1745 } 1746 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1747} 1748 1749 1750/* 1751 * Fetch a four texture samples using STR texture coordinates. 1752 */ 1753static void 1754fetch_texel( struct tgsi_sampler *sampler, 1755 const union tgsi_exec_channel *s, 1756 const union tgsi_exec_channel *t, 1757 const union tgsi_exec_channel *p, 1758 float lodbias, /* XXX should be float[4] */ 1759 union tgsi_exec_channel *r, 1760 union tgsi_exec_channel *g, 1761 union tgsi_exec_channel *b, 1762 union tgsi_exec_channel *a ) 1763{ 1764 uint j; 1765 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1766 1767 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1768 1769 for (j = 0; j < 4; j++) { 1770 r->f[j] = rgba[0][j]; 1771 g->f[j] = rgba[1][j]; 1772 b->f[j] = rgba[2][j]; 1773 a->f[j] = rgba[3][j]; 1774 } 1775} 1776 1777 1778static void 1779exec_tex(struct tgsi_exec_machine *mach, 1780 const struct tgsi_full_instruction *inst, 1781 boolean biasLod, 1782 boolean projected) 1783{ 1784 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1785 union tgsi_exec_channel r[4]; 1786 uint chan_index; 1787 float lodBias; 1788 1789 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1790 1791 switch (inst->InstructionExtTexture.Texture) { 1792 case TGSI_TEXTURE_1D: 1793 case TGSI_TEXTURE_SHADOW1D: 1794 1795 FETCH(&r[0], 0, CHAN_X); 1796 1797 if (projected) { 1798 FETCH(&r[1], 0, CHAN_W); 1799 micro_div( &r[0], &r[0], &r[1] ); 1800 } 1801 1802 if (biasLod) { 1803 FETCH(&r[1], 0, CHAN_W); 1804 lodBias = r[2].f[0]; 1805 } 1806 else 1807 lodBias = 0.0; 1808 1809 fetch_texel(mach->Samplers[unit], 1810 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1811 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1812 break; 1813 1814 case TGSI_TEXTURE_2D: 1815 case TGSI_TEXTURE_RECT: 1816 case TGSI_TEXTURE_SHADOW2D: 1817 case TGSI_TEXTURE_SHADOWRECT: 1818 1819 FETCH(&r[0], 0, CHAN_X); 1820 FETCH(&r[1], 0, CHAN_Y); 1821 FETCH(&r[2], 0, CHAN_Z); 1822 1823 if (projected) { 1824 FETCH(&r[3], 0, CHAN_W); 1825 micro_div( &r[0], &r[0], &r[3] ); 1826 micro_div( &r[1], &r[1], &r[3] ); 1827 micro_div( &r[2], &r[2], &r[3] ); 1828 } 1829 1830 if (biasLod) { 1831 FETCH(&r[3], 0, CHAN_W); 1832 lodBias = r[3].f[0]; 1833 } 1834 else 1835 lodBias = 0.0; 1836 1837 fetch_texel(mach->Samplers[unit], 1838 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1839 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1840 break; 1841 1842 case TGSI_TEXTURE_3D: 1843 case TGSI_TEXTURE_CUBE: 1844 1845 FETCH(&r[0], 0, CHAN_X); 1846 FETCH(&r[1], 0, CHAN_Y); 1847 FETCH(&r[2], 0, CHAN_Z); 1848 1849 if (projected) { 1850 FETCH(&r[3], 0, CHAN_W); 1851 micro_div( &r[0], &r[0], &r[3] ); 1852 micro_div( &r[1], &r[1], &r[3] ); 1853 micro_div( &r[2], &r[2], &r[3] ); 1854 } 1855 1856 if (biasLod) { 1857 FETCH(&r[3], 0, CHAN_W); 1858 lodBias = r[3].f[0]; 1859 } 1860 else 1861 lodBias = 0.0; 1862 1863 fetch_texel(mach->Samplers[unit], 1864 &r[0], &r[1], &r[2], lodBias, 1865 &r[0], &r[1], &r[2], &r[3]); 1866 break; 1867 1868 default: 1869 assert (0); 1870 } 1871 1872 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1873 STORE( &r[chan_index], 0, chan_index ); 1874 } 1875} 1876 1877 1878/** 1879 * Evaluate a constant-valued coefficient at the position of the 1880 * current quad. 1881 */ 1882static void 1883eval_constant_coef( 1884 struct tgsi_exec_machine *mach, 1885 unsigned attrib, 1886 unsigned chan ) 1887{ 1888 unsigned i; 1889 1890 for( i = 0; i < QUAD_SIZE; i++ ) { 1891 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1892 } 1893} 1894 1895/** 1896 * Evaluate a linear-valued coefficient at the position of the 1897 * current quad. 1898 */ 1899static void 1900eval_linear_coef( 1901 struct tgsi_exec_machine *mach, 1902 unsigned attrib, 1903 unsigned chan ) 1904{ 1905 const float x = mach->QuadPos.xyzw[0].f[0]; 1906 const float y = mach->QuadPos.xyzw[1].f[0]; 1907 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1908 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1909 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1910 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1911 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1912 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1913 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1914} 1915 1916/** 1917 * Evaluate a perspective-valued coefficient at the position of the 1918 * current quad. 1919 */ 1920static void 1921eval_perspective_coef( 1922 struct tgsi_exec_machine *mach, 1923 unsigned attrib, 1924 unsigned chan ) 1925{ 1926 const float x = mach->QuadPos.xyzw[0].f[0]; 1927 const float y = mach->QuadPos.xyzw[1].f[0]; 1928 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1929 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1930 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1931 const float *w = mach->QuadPos.xyzw[3].f; 1932 /* divide by W here */ 1933 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1934 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1935 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1936 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1937} 1938 1939 1940typedef void (* eval_coef_func)( 1941 struct tgsi_exec_machine *mach, 1942 unsigned attrib, 1943 unsigned chan ); 1944 1945static void 1946exec_declaration( 1947 struct tgsi_exec_machine *mach, 1948 const struct tgsi_full_declaration *decl ) 1949{ 1950 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1951 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1952 unsigned first, last, mask; 1953 eval_coef_func eval; 1954 1955 first = decl->DeclarationRange.First; 1956 last = decl->DeclarationRange.Last; 1957 mask = decl->Declaration.UsageMask; 1958 1959 switch( decl->Declaration.Interpolate ) { 1960 case TGSI_INTERPOLATE_CONSTANT: 1961 eval = eval_constant_coef; 1962 break; 1963 1964 case TGSI_INTERPOLATE_LINEAR: 1965 eval = eval_linear_coef; 1966 break; 1967 1968 case TGSI_INTERPOLATE_PERSPECTIVE: 1969 eval = eval_perspective_coef; 1970 break; 1971 1972 default: 1973 eval = NULL; 1974 assert( 0 ); 1975 } 1976 1977 if( mask == TGSI_WRITEMASK_XYZW ) { 1978 unsigned i, j; 1979 1980 for( i = first; i <= last; i++ ) { 1981 for( j = 0; j < NUM_CHANNELS; j++ ) { 1982 eval( mach, i, j ); 1983 } 1984 } 1985 } 1986 else { 1987 unsigned i, j; 1988 1989 for( j = 0; j < NUM_CHANNELS; j++ ) { 1990 if( mask & (1 << j) ) { 1991 for( i = first; i <= last; i++ ) { 1992 eval( mach, i, j ); 1993 } 1994 } 1995 } 1996 } 1997 } 1998 } 1999} 2000 2001static void 2002exec_instruction( 2003 struct tgsi_exec_machine *mach, 2004 const struct tgsi_full_instruction *inst, 2005 int *pc ) 2006{ 2007 uint chan_index; 2008 union tgsi_exec_channel r[10]; 2009 2010 (*pc)++; 2011 2012 switch (inst->Instruction.Opcode) { 2013 case TGSI_OPCODE_ARL: 2014 case TGSI_OPCODE_FLR: 2015 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2016 FETCH( &r[0], 0, chan_index ); 2017 micro_flr( &r[0], &r[0] ); 2018 STORE( &r[0], 0, chan_index ); 2019 } 2020 break; 2021 2022 case TGSI_OPCODE_MOV: 2023 if (inst->Flags & SOA_DEPENDENCY_FLAG) { 2024 /* Do all fetches into temp regs, then do all stores to avoid 2025 * intermediate/accidental clobbering. This could be done all the 2026 * time for MOV but for other instructions we'll need more temps... 2027 */ 2028 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2029 FETCH( &r[chan_index], 0, chan_index ); 2030 } 2031 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2032 STORE( &r[chan_index], 0, chan_index ); 2033 } 2034 } 2035 else { 2036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2037 FETCH( &r[0], 0, chan_index ); 2038 STORE( &r[0], 0, chan_index ); 2039 } 2040 } 2041 break; 2042 2043 case TGSI_OPCODE_LIT: 2044 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2045 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2046 } 2047 2048 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2049 FETCH( &r[0], 0, CHAN_X ); 2050 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2051 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2052 STORE( &r[0], 0, CHAN_Y ); 2053 } 2054 2055 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2056 FETCH( &r[1], 0, CHAN_Y ); 2057 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2058 2059 FETCH( &r[2], 0, CHAN_W ); 2060 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2061 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2062 micro_pow( &r[1], &r[1], &r[2] ); 2063 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2064 STORE( &r[0], 0, CHAN_Z ); 2065 } 2066 } 2067 2068 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2069 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2070 } 2071 break; 2072 2073 case TGSI_OPCODE_RCP: 2074 /* TGSI_OPCODE_RECIP */ 2075 FETCH( &r[0], 0, CHAN_X ); 2076 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2077 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2078 STORE( &r[0], 0, chan_index ); 2079 } 2080 break; 2081 2082 case TGSI_OPCODE_RSQ: 2083 /* TGSI_OPCODE_RECIPSQRT */ 2084 FETCH( &r[0], 0, CHAN_X ); 2085 micro_abs( &r[0], &r[0] ); 2086 micro_sqrt( &r[0], &r[0] ); 2087 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2088 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2089 STORE( &r[0], 0, chan_index ); 2090 } 2091 break; 2092 2093 case TGSI_OPCODE_EXP: 2094 FETCH( &r[0], 0, CHAN_X ); 2095 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2096 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2097 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2098 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2099 } 2100 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2101 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2102 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2103 } 2104 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2105 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2106 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2107 } 2108 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2109 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2110 } 2111 break; 2112 2113 case TGSI_OPCODE_LOG: 2114 FETCH( &r[0], 0, CHAN_X ); 2115 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2116 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2117 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2118 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2119 STORE( &r[0], 0, CHAN_X ); 2120 } 2121 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2122 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2123 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2124 STORE( &r[0], 0, CHAN_Y ); 2125 } 2126 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2127 STORE( &r[1], 0, CHAN_Z ); 2128 } 2129 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2130 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2131 } 2132 break; 2133 2134 case TGSI_OPCODE_MUL: 2135 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 2136 { 2137 FETCH(&r[0], 0, chan_index); 2138 FETCH(&r[1], 1, chan_index); 2139 2140 micro_mul( &r[0], &r[0], &r[1] ); 2141 2142 STORE(&r[0], 0, chan_index); 2143 } 2144 break; 2145 2146 case TGSI_OPCODE_ADD: 2147 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2148 FETCH( &r[0], 0, chan_index ); 2149 FETCH( &r[1], 1, chan_index ); 2150 micro_add( &r[0], &r[0], &r[1] ); 2151 STORE( &r[0], 0, chan_index ); 2152 } 2153 break; 2154 2155 case TGSI_OPCODE_DP3: 2156 /* TGSI_OPCODE_DOT3 */ 2157 FETCH( &r[0], 0, CHAN_X ); 2158 FETCH( &r[1], 1, CHAN_X ); 2159 micro_mul( &r[0], &r[0], &r[1] ); 2160 2161 FETCH( &r[1], 0, CHAN_Y ); 2162 FETCH( &r[2], 1, CHAN_Y ); 2163 micro_mul( &r[1], &r[1], &r[2] ); 2164 micro_add( &r[0], &r[0], &r[1] ); 2165 2166 FETCH( &r[1], 0, CHAN_Z ); 2167 FETCH( &r[2], 1, CHAN_Z ); 2168 micro_mul( &r[1], &r[1], &r[2] ); 2169 micro_add( &r[0], &r[0], &r[1] ); 2170 2171 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2172 STORE( &r[0], 0, chan_index ); 2173 } 2174 break; 2175 2176 case TGSI_OPCODE_DP4: 2177 /* TGSI_OPCODE_DOT4 */ 2178 FETCH(&r[0], 0, CHAN_X); 2179 FETCH(&r[1], 1, CHAN_X); 2180 2181 micro_mul( &r[0], &r[0], &r[1] ); 2182 2183 FETCH(&r[1], 0, CHAN_Y); 2184 FETCH(&r[2], 1, CHAN_Y); 2185 2186 micro_mul( &r[1], &r[1], &r[2] ); 2187 micro_add( &r[0], &r[0], &r[1] ); 2188 2189 FETCH(&r[1], 0, CHAN_Z); 2190 FETCH(&r[2], 1, CHAN_Z); 2191 2192 micro_mul( &r[1], &r[1], &r[2] ); 2193 micro_add( &r[0], &r[0], &r[1] ); 2194 2195 FETCH(&r[1], 0, CHAN_W); 2196 FETCH(&r[2], 1, CHAN_W); 2197 2198 micro_mul( &r[1], &r[1], &r[2] ); 2199 micro_add( &r[0], &r[0], &r[1] ); 2200 2201 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2202 STORE( &r[0], 0, chan_index ); 2203 } 2204 break; 2205 2206 case TGSI_OPCODE_DST: 2207 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2208 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2209 } 2210 2211 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2212 FETCH( &r[0], 0, CHAN_Y ); 2213 FETCH( &r[1], 1, CHAN_Y); 2214 micro_mul( &r[0], &r[0], &r[1] ); 2215 STORE( &r[0], 0, CHAN_Y ); 2216 } 2217 2218 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2219 FETCH( &r[0], 0, CHAN_Z ); 2220 STORE( &r[0], 0, CHAN_Z ); 2221 } 2222 2223 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2224 FETCH( &r[0], 1, CHAN_W ); 2225 STORE( &r[0], 0, CHAN_W ); 2226 } 2227 break; 2228 2229 case TGSI_OPCODE_MIN: 2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2231 FETCH(&r[0], 0, chan_index); 2232 FETCH(&r[1], 1, chan_index); 2233 2234 /* XXX use micro_min()?? */ 2235 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2236 2237 STORE(&r[0], 0, chan_index); 2238 } 2239 break; 2240 2241 case TGSI_OPCODE_MAX: 2242 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2243 FETCH(&r[0], 0, chan_index); 2244 FETCH(&r[1], 1, chan_index); 2245 2246 /* XXX use micro_max()?? */ 2247 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2248 2249 STORE(&r[0], 0, chan_index ); 2250 } 2251 break; 2252 2253 case TGSI_OPCODE_SLT: 2254 /* TGSI_OPCODE_SETLT */ 2255 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2256 FETCH( &r[0], 0, chan_index ); 2257 FETCH( &r[1], 1, chan_index ); 2258 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2259 STORE( &r[0], 0, chan_index ); 2260 } 2261 break; 2262 2263 case TGSI_OPCODE_SGE: 2264 /* TGSI_OPCODE_SETGE */ 2265 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2266 FETCH( &r[0], 0, chan_index ); 2267 FETCH( &r[1], 1, chan_index ); 2268 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2269 STORE( &r[0], 0, chan_index ); 2270 } 2271 break; 2272 2273 case TGSI_OPCODE_MAD: 2274 /* TGSI_OPCODE_MADD */ 2275 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2276 FETCH( &r[0], 0, chan_index ); 2277 FETCH( &r[1], 1, chan_index ); 2278 micro_mul( &r[0], &r[0], &r[1] ); 2279 FETCH( &r[1], 2, chan_index ); 2280 micro_add( &r[0], &r[0], &r[1] ); 2281 STORE( &r[0], 0, chan_index ); 2282 } 2283 break; 2284 2285 case TGSI_OPCODE_SUB: 2286 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2287 FETCH(&r[0], 0, chan_index); 2288 FETCH(&r[1], 1, chan_index); 2289 2290 micro_sub( &r[0], &r[0], &r[1] ); 2291 2292 STORE(&r[0], 0, chan_index); 2293 } 2294 break; 2295 2296 case TGSI_OPCODE_LRP: 2297 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2298 FETCH(&r[0], 0, chan_index); 2299 FETCH(&r[1], 1, chan_index); 2300 FETCH(&r[2], 2, chan_index); 2301 2302 micro_sub( &r[1], &r[1], &r[2] ); 2303 micro_mul( &r[0], &r[0], &r[1] ); 2304 micro_add( &r[0], &r[0], &r[2] ); 2305 2306 STORE(&r[0], 0, chan_index); 2307 } 2308 break; 2309 2310 case TGSI_OPCODE_CND: 2311 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2312 FETCH(&r[0], 0, chan_index); 2313 FETCH(&r[1], 1, chan_index); 2314 FETCH(&r[2], 2, chan_index); 2315 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2316 STORE(&r[0], 0, chan_index); 2317 } 2318 break; 2319 2320 case TGSI_OPCODE_DP2A: 2321 FETCH( &r[0], 0, CHAN_X ); 2322 FETCH( &r[1], 1, CHAN_X ); 2323 micro_mul( &r[0], &r[0], &r[1] ); 2324 2325 FETCH( &r[1], 0, CHAN_Y ); 2326 FETCH( &r[2], 1, CHAN_Y ); 2327 micro_mul( &r[1], &r[1], &r[2] ); 2328 micro_add( &r[0], &r[0], &r[1] ); 2329 2330 FETCH( &r[2], 2, CHAN_X ); 2331 micro_add( &r[0], &r[0], &r[2] ); 2332 2333 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2334 STORE( &r[0], 0, chan_index ); 2335 } 2336 break; 2337 2338 case TGSI_OPCODE_FRC: 2339 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2340 FETCH( &r[0], 0, chan_index ); 2341 micro_frc( &r[0], &r[0] ); 2342 STORE( &r[0], 0, chan_index ); 2343 } 2344 break; 2345 2346 case TGSI_OPCODE_CLAMP: 2347 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2348 FETCH(&r[0], 0, chan_index); 2349 FETCH(&r[1], 1, chan_index); 2350 micro_max(&r[0], &r[0], &r[1]); 2351 FETCH(&r[1], 2, chan_index); 2352 micro_min(&r[0], &r[0], &r[1]); 2353 STORE(&r[0], 0, chan_index); 2354 } 2355 break; 2356 2357 case TGSI_OPCODE_ROUND: 2358 case TGSI_OPCODE_ARR: 2359 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2360 FETCH( &r[0], 0, chan_index ); 2361 micro_rnd( &r[0], &r[0] ); 2362 STORE( &r[0], 0, chan_index ); 2363 } 2364 break; 2365 2366 case TGSI_OPCODE_EX2: 2367 FETCH(&r[0], 0, CHAN_X); 2368 2369#if FAST_MATH 2370 micro_exp2( &r[0], &r[0] ); 2371#else 2372 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2373#endif 2374 2375 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2376 STORE( &r[0], 0, chan_index ); 2377 } 2378 break; 2379 2380 case TGSI_OPCODE_LG2: 2381 FETCH( &r[0], 0, CHAN_X ); 2382 micro_lg2( &r[0], &r[0] ); 2383 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2384 STORE( &r[0], 0, chan_index ); 2385 } 2386 break; 2387 2388 case TGSI_OPCODE_POW: 2389 FETCH(&r[0], 0, CHAN_X); 2390 FETCH(&r[1], 1, CHAN_X); 2391 2392 micro_pow( &r[0], &r[0], &r[1] ); 2393 2394 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2395 STORE( &r[0], 0, chan_index ); 2396 } 2397 break; 2398 2399 case TGSI_OPCODE_XPD: 2400 FETCH(&r[0], 0, CHAN_Y); 2401 FETCH(&r[1], 1, CHAN_Z); 2402 2403 micro_mul( &r[2], &r[0], &r[1] ); 2404 2405 FETCH(&r[3], 0, CHAN_Z); 2406 FETCH(&r[4], 1, CHAN_Y); 2407 2408 micro_mul( &r[5], &r[3], &r[4] ); 2409 micro_sub( &r[2], &r[2], &r[5] ); 2410 2411 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2412 STORE( &r[2], 0, CHAN_X ); 2413 } 2414 2415 FETCH(&r[2], 1, CHAN_X); 2416 2417 micro_mul( &r[3], &r[3], &r[2] ); 2418 2419 FETCH(&r[5], 0, CHAN_X); 2420 2421 micro_mul( &r[1], &r[1], &r[5] ); 2422 micro_sub( &r[3], &r[3], &r[1] ); 2423 2424 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2425 STORE( &r[3], 0, CHAN_Y ); 2426 } 2427 2428 micro_mul( &r[5], &r[5], &r[4] ); 2429 micro_mul( &r[0], &r[0], &r[2] ); 2430 micro_sub( &r[5], &r[5], &r[0] ); 2431 2432 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2433 STORE( &r[5], 0, CHAN_Z ); 2434 } 2435 2436 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2437 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2438 } 2439 break; 2440 2441 case TGSI_OPCODE_ABS: 2442 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2443 FETCH(&r[0], 0, chan_index); 2444 2445 micro_abs( &r[0], &r[0] ); 2446 2447 STORE(&r[0], 0, chan_index); 2448 } 2449 break; 2450 2451 case TGSI_OPCODE_RCC: 2452 FETCH(&r[0], 0, CHAN_X); 2453 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2454 micro_float_clamp(&r[0], &r[0]); 2455 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2456 STORE(&r[0], 0, chan_index); 2457 } 2458 break; 2459 2460 case TGSI_OPCODE_DPH: 2461 FETCH(&r[0], 0, CHAN_X); 2462 FETCH(&r[1], 1, CHAN_X); 2463 2464 micro_mul( &r[0], &r[0], &r[1] ); 2465 2466 FETCH(&r[1], 0, CHAN_Y); 2467 FETCH(&r[2], 1, CHAN_Y); 2468 2469 micro_mul( &r[1], &r[1], &r[2] ); 2470 micro_add( &r[0], &r[0], &r[1] ); 2471 2472 FETCH(&r[1], 0, CHAN_Z); 2473 FETCH(&r[2], 1, CHAN_Z); 2474 2475 micro_mul( &r[1], &r[1], &r[2] ); 2476 micro_add( &r[0], &r[0], &r[1] ); 2477 2478 FETCH(&r[1], 1, CHAN_W); 2479 2480 micro_add( &r[0], &r[0], &r[1] ); 2481 2482 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2483 STORE( &r[0], 0, chan_index ); 2484 } 2485 break; 2486 2487 case TGSI_OPCODE_COS: 2488 FETCH(&r[0], 0, CHAN_X); 2489 2490 micro_cos( &r[0], &r[0] ); 2491 2492 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2493 STORE( &r[0], 0, chan_index ); 2494 } 2495 break; 2496 2497 case TGSI_OPCODE_DDX: 2498 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2499 FETCH( &r[0], 0, chan_index ); 2500 micro_ddx( &r[0], &r[0] ); 2501 STORE( &r[0], 0, chan_index ); 2502 } 2503 break; 2504 2505 case TGSI_OPCODE_DDY: 2506 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2507 FETCH( &r[0], 0, chan_index ); 2508 micro_ddy( &r[0], &r[0] ); 2509 STORE( &r[0], 0, chan_index ); 2510 } 2511 break; 2512 2513 case TGSI_OPCODE_KILP: 2514 exec_kilp (mach, inst); 2515 break; 2516 2517 case TGSI_OPCODE_KIL: 2518 exec_kil (mach, inst); 2519 break; 2520 2521 case TGSI_OPCODE_PK2H: 2522 assert (0); 2523 break; 2524 2525 case TGSI_OPCODE_PK2US: 2526 assert (0); 2527 break; 2528 2529 case TGSI_OPCODE_PK4B: 2530 assert (0); 2531 break; 2532 2533 case TGSI_OPCODE_PK4UB: 2534 assert (0); 2535 break; 2536 2537 case TGSI_OPCODE_RFL: 2538 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2539 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2540 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2541 /* r0 = dp3(src0, src0) */ 2542 FETCH(&r[2], 0, CHAN_X); 2543 micro_mul(&r[0], &r[2], &r[2]); 2544 FETCH(&r[4], 0, CHAN_Y); 2545 micro_mul(&r[8], &r[4], &r[4]); 2546 micro_add(&r[0], &r[0], &r[8]); 2547 FETCH(&r[6], 0, CHAN_Z); 2548 micro_mul(&r[8], &r[6], &r[6]); 2549 micro_add(&r[0], &r[0], &r[8]); 2550 2551 /* r1 = dp3(src0, src1) */ 2552 FETCH(&r[3], 1, CHAN_X); 2553 micro_mul(&r[1], &r[2], &r[3]); 2554 FETCH(&r[5], 1, CHAN_Y); 2555 micro_mul(&r[8], &r[4], &r[5]); 2556 micro_add(&r[1], &r[1], &r[8]); 2557 FETCH(&r[7], 1, CHAN_Z); 2558 micro_mul(&r[8], &r[6], &r[7]); 2559 micro_add(&r[1], &r[1], &r[8]); 2560 2561 /* r1 = 2 * r1 / r0 */ 2562 micro_add(&r[1], &r[1], &r[1]); 2563 micro_div(&r[1], &r[1], &r[0]); 2564 2565 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2566 micro_mul(&r[2], &r[2], &r[1]); 2567 micro_sub(&r[2], &r[2], &r[3]); 2568 STORE(&r[2], 0, CHAN_X); 2569 } 2570 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2571 micro_mul(&r[4], &r[4], &r[1]); 2572 micro_sub(&r[4], &r[4], &r[5]); 2573 STORE(&r[4], 0, CHAN_Y); 2574 } 2575 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2576 micro_mul(&r[6], &r[6], &r[1]); 2577 micro_sub(&r[6], &r[6], &r[7]); 2578 STORE(&r[6], 0, CHAN_Z); 2579 } 2580 } 2581 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2582 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2583 } 2584 break; 2585 2586 case TGSI_OPCODE_SEQ: 2587 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2588 FETCH( &r[0], 0, chan_index ); 2589 FETCH( &r[1], 1, chan_index ); 2590 micro_eq( &r[0], &r[0], &r[1], 2591 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2592 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2593 STORE( &r[0], 0, chan_index ); 2594 } 2595 break; 2596 2597 case TGSI_OPCODE_SFL: 2598 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2599 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2600 } 2601 break; 2602 2603 case TGSI_OPCODE_SGT: 2604 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2605 FETCH( &r[0], 0, chan_index ); 2606 FETCH( &r[1], 1, chan_index ); 2607 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2608 STORE( &r[0], 0, chan_index ); 2609 } 2610 break; 2611 2612 case TGSI_OPCODE_SIN: 2613 FETCH( &r[0], 0, CHAN_X ); 2614 micro_sin( &r[0], &r[0] ); 2615 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2616 STORE( &r[0], 0, chan_index ); 2617 } 2618 break; 2619 2620 case TGSI_OPCODE_SLE: 2621 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2622 FETCH( &r[0], 0, chan_index ); 2623 FETCH( &r[1], 1, chan_index ); 2624 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2625 STORE( &r[0], 0, chan_index ); 2626 } 2627 break; 2628 2629 case TGSI_OPCODE_SNE: 2630 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2631 FETCH( &r[0], 0, chan_index ); 2632 FETCH( &r[1], 1, chan_index ); 2633 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2634 STORE( &r[0], 0, chan_index ); 2635 } 2636 break; 2637 2638 case TGSI_OPCODE_STR: 2639 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2640 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2641 } 2642 break; 2643 2644 case TGSI_OPCODE_TEX: 2645 /* simple texture lookup */ 2646 /* src[0] = texcoord */ 2647 /* src[1] = sampler unit */ 2648 exec_tex(mach, inst, FALSE, FALSE); 2649 break; 2650 2651 case TGSI_OPCODE_TXB: 2652 /* Texture lookup with lod bias */ 2653 /* src[0] = texcoord (src[0].w = LOD bias) */ 2654 /* src[1] = sampler unit */ 2655 exec_tex(mach, inst, TRUE, FALSE); 2656 break; 2657 2658 case TGSI_OPCODE_TXD: 2659 /* Texture lookup with explict partial derivatives */ 2660 /* src[0] = texcoord */ 2661 /* src[1] = d[strq]/dx */ 2662 /* src[2] = d[strq]/dy */ 2663 /* src[3] = sampler unit */ 2664 assert (0); 2665 break; 2666 2667 case TGSI_OPCODE_TXL: 2668 /* Texture lookup with explit LOD */ 2669 /* src[0] = texcoord (src[0].w = LOD) */ 2670 /* src[1] = sampler unit */ 2671 exec_tex(mach, inst, TRUE, FALSE); 2672 break; 2673 2674 case TGSI_OPCODE_TXP: 2675 /* Texture lookup with projection */ 2676 /* src[0] = texcoord (src[0].w = projection) */ 2677 /* src[1] = sampler unit */ 2678 exec_tex(mach, inst, FALSE, TRUE); 2679 break; 2680 2681 case TGSI_OPCODE_UP2H: 2682 assert (0); 2683 break; 2684 2685 case TGSI_OPCODE_UP2US: 2686 assert (0); 2687 break; 2688 2689 case TGSI_OPCODE_UP4B: 2690 assert (0); 2691 break; 2692 2693 case TGSI_OPCODE_UP4UB: 2694 assert (0); 2695 break; 2696 2697 case TGSI_OPCODE_X2D: 2698 FETCH(&r[0], 1, CHAN_X); 2699 FETCH(&r[1], 1, CHAN_Y); 2700 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2701 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2702 FETCH(&r[2], 2, CHAN_X); 2703 micro_mul(&r[2], &r[2], &r[0]); 2704 FETCH(&r[3], 2, CHAN_Y); 2705 micro_mul(&r[3], &r[3], &r[1]); 2706 micro_add(&r[2], &r[2], &r[3]); 2707 FETCH(&r[3], 0, CHAN_X); 2708 micro_add(&r[2], &r[2], &r[3]); 2709 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2710 STORE(&r[2], 0, CHAN_X); 2711 } 2712 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2713 STORE(&r[2], 0, CHAN_Z); 2714 } 2715 } 2716 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2717 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2718 FETCH(&r[2], 2, CHAN_Z); 2719 micro_mul(&r[2], &r[2], &r[0]); 2720 FETCH(&r[3], 2, CHAN_W); 2721 micro_mul(&r[3], &r[3], &r[1]); 2722 micro_add(&r[2], &r[2], &r[3]); 2723 FETCH(&r[3], 0, CHAN_Y); 2724 micro_add(&r[2], &r[2], &r[3]); 2725 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2726 STORE(&r[2], 0, CHAN_Y); 2727 } 2728 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2729 STORE(&r[2], 0, CHAN_W); 2730 } 2731 } 2732 break; 2733 2734 case TGSI_OPCODE_ARA: 2735 assert (0); 2736 break; 2737 2738 case TGSI_OPCODE_BRA: 2739 assert (0); 2740 break; 2741 2742 case TGSI_OPCODE_CAL: 2743 /* skip the call if no execution channels are enabled */ 2744 if (mach->ExecMask) { 2745 /* do the call */ 2746 2747 /* First, record the depths of the execution stacks. 2748 * This is important for deeply nested/looped return statements. 2749 * We have to unwind the stacks by the correct amount. For a 2750 * real code generator, we could determine the number of entries 2751 * to pop off each stack with simple static analysis and avoid 2752 * implementing this data structure at run time. 2753 */ 2754 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 2755 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 2756 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 2757 /* note that PC was already incremented above */ 2758 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 2759 2760 mach->CallStackTop++; 2761 2762 /* Second, push the Cond, Loop, Cont, Func stacks */ 2763 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2764 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2765 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2766 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2767 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2768 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2769 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2770 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2771 2772 /* Finally, jump to the subroutine */ 2773 *pc = inst->InstructionExtLabel.Label; 2774 } 2775 break; 2776 2777 case TGSI_OPCODE_RET: 2778 mach->FuncMask &= ~mach->ExecMask; 2779 UPDATE_EXEC_MASK(mach); 2780 2781 if (mach->FuncMask == 0x0) { 2782 /* really return now (otherwise, keep executing */ 2783 2784 if (mach->CallStackTop == 0) { 2785 /* returning from main() */ 2786 *pc = -1; 2787 return; 2788 } 2789 2790 assert(mach->CallStackTop > 0); 2791 mach->CallStackTop--; 2792 2793 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 2794 mach->CondMask = mach->CondStack[mach->CondStackTop]; 2795 2796 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 2797 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 2798 2799 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 2800 mach->ContMask = mach->ContStack[mach->ContStackTop]; 2801 2802 assert(mach->FuncStackTop > 0); 2803 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2804 2805 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 2806 2807 UPDATE_EXEC_MASK(mach); 2808 } 2809 break; 2810 2811 case TGSI_OPCODE_SSG: 2812 /* TGSI_OPCODE_SGN */ 2813 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2814 FETCH( &r[0], 0, chan_index ); 2815 micro_sgn( &r[0], &r[0] ); 2816 STORE( &r[0], 0, chan_index ); 2817 } 2818 break; 2819 2820 case TGSI_OPCODE_CMP: 2821 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2822 FETCH(&r[0], 0, chan_index); 2823 FETCH(&r[1], 1, chan_index); 2824 FETCH(&r[2], 2, chan_index); 2825 2826 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2827 2828 STORE(&r[0], 0, chan_index); 2829 } 2830 break; 2831 2832 case TGSI_OPCODE_SCS: 2833 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2834 FETCH( &r[0], 0, CHAN_X ); 2835 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2836 micro_cos(&r[1], &r[0]); 2837 STORE(&r[1], 0, CHAN_X); 2838 } 2839 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2840 micro_sin(&r[1], &r[0]); 2841 STORE(&r[1], 0, CHAN_Y); 2842 } 2843 } 2844 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2845 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2846 } 2847 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2848 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2849 } 2850 break; 2851 2852 case TGSI_OPCODE_NRM: 2853 /* 3-component vector normalize */ 2854 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2855 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2856 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2857 /* r3 = sqrt(dp3(src0, src0)) */ 2858 FETCH(&r[0], 0, CHAN_X); 2859 micro_mul(&r[3], &r[0], &r[0]); 2860 FETCH(&r[1], 0, CHAN_Y); 2861 micro_mul(&r[4], &r[1], &r[1]); 2862 micro_add(&r[3], &r[3], &r[4]); 2863 FETCH(&r[2], 0, CHAN_Z); 2864 micro_mul(&r[4], &r[2], &r[2]); 2865 micro_add(&r[3], &r[3], &r[4]); 2866 micro_sqrt(&r[3], &r[3]); 2867 2868 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2869 micro_div(&r[0], &r[0], &r[3]); 2870 STORE(&r[0], 0, CHAN_X); 2871 } 2872 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2873 micro_div(&r[1], &r[1], &r[3]); 2874 STORE(&r[1], 0, CHAN_Y); 2875 } 2876 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2877 micro_div(&r[2], &r[2], &r[3]); 2878 STORE(&r[2], 0, CHAN_Z); 2879 } 2880 } 2881 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2882 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2883 } 2884 break; 2885 2886 case TGSI_OPCODE_NRM4: 2887 /* 4-component vector normalize */ 2888 { 2889 union tgsi_exec_channel tmp, dot; 2890 2891 /* tmp = dp4(src0, src0): */ 2892 FETCH( &r[0], 0, CHAN_X ); 2893 micro_mul( &tmp, &r[0], &r[0] ); 2894 2895 FETCH( &r[1], 0, CHAN_Y ); 2896 micro_mul( &dot, &r[1], &r[1] ); 2897 micro_add( &tmp, &tmp, &dot ); 2898 2899 FETCH( &r[2], 0, CHAN_Z ); 2900 micro_mul( &dot, &r[2], &r[2] ); 2901 micro_add( &tmp, &tmp, &dot ); 2902 2903 FETCH( &r[3], 0, CHAN_W ); 2904 micro_mul( &dot, &r[3], &r[3] ); 2905 micro_add( &tmp, &tmp, &dot ); 2906 2907 /* tmp = 1 / sqrt(tmp) */ 2908 micro_sqrt( &tmp, &tmp ); 2909 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2910 2911 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2912 /* chan = chan * tmp */ 2913 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2914 STORE( &r[chan_index], 0, chan_index ); 2915 } 2916 } 2917 break; 2918 2919 case TGSI_OPCODE_DIV: 2920 assert( 0 ); 2921 break; 2922 2923 case TGSI_OPCODE_DP2: 2924 FETCH( &r[0], 0, CHAN_X ); 2925 FETCH( &r[1], 1, CHAN_X ); 2926 micro_mul( &r[0], &r[0], &r[1] ); 2927 2928 FETCH( &r[1], 0, CHAN_Y ); 2929 FETCH( &r[2], 1, CHAN_Y ); 2930 micro_mul( &r[1], &r[1], &r[2] ); 2931 micro_add( &r[0], &r[0], &r[1] ); 2932 2933 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2934 STORE( &r[0], 0, chan_index ); 2935 } 2936 break; 2937 2938 case TGSI_OPCODE_IF: 2939 /* push CondMask */ 2940 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2941 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2942 FETCH( &r[0], 0, CHAN_X ); 2943 /* update CondMask */ 2944 if( ! r[0].u[0] ) { 2945 mach->CondMask &= ~0x1; 2946 } 2947 if( ! r[0].u[1] ) { 2948 mach->CondMask &= ~0x2; 2949 } 2950 if( ! r[0].u[2] ) { 2951 mach->CondMask &= ~0x4; 2952 } 2953 if( ! r[0].u[3] ) { 2954 mach->CondMask &= ~0x8; 2955 } 2956 UPDATE_EXEC_MASK(mach); 2957 /* Todo: If CondMask==0, jump to ELSE */ 2958 break; 2959 2960 case TGSI_OPCODE_ELSE: 2961 /* invert CondMask wrt previous mask */ 2962 { 2963 uint prevMask; 2964 assert(mach->CondStackTop > 0); 2965 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2966 mach->CondMask = ~mach->CondMask & prevMask; 2967 UPDATE_EXEC_MASK(mach); 2968 /* Todo: If CondMask==0, jump to ENDIF */ 2969 } 2970 break; 2971 2972 case TGSI_OPCODE_ENDIF: 2973 /* pop CondMask */ 2974 assert(mach->CondStackTop > 0); 2975 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2976 UPDATE_EXEC_MASK(mach); 2977 break; 2978 2979 case TGSI_OPCODE_END: 2980 /* halt execution */ 2981 *pc = -1; 2982 break; 2983 2984 case TGSI_OPCODE_REP: 2985 assert (0); 2986 break; 2987 2988 case TGSI_OPCODE_ENDREP: 2989 assert (0); 2990 break; 2991 2992 case TGSI_OPCODE_PUSHA: 2993 assert (0); 2994 break; 2995 2996 case TGSI_OPCODE_POPA: 2997 assert (0); 2998 break; 2999 3000 case TGSI_OPCODE_CEIL: 3001 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3002 FETCH( &r[0], 0, chan_index ); 3003 micro_ceil( &r[0], &r[0] ); 3004 STORE( &r[0], 0, chan_index ); 3005 } 3006 break; 3007 3008 case TGSI_OPCODE_I2F: 3009 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3010 FETCH( &r[0], 0, chan_index ); 3011 micro_i2f( &r[0], &r[0] ); 3012 STORE( &r[0], 0, chan_index ); 3013 } 3014 break; 3015 3016 case TGSI_OPCODE_NOT: 3017 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3018 FETCH( &r[0], 0, chan_index ); 3019 micro_not( &r[0], &r[0] ); 3020 STORE( &r[0], 0, chan_index ); 3021 } 3022 break; 3023 3024 case TGSI_OPCODE_TRUNC: 3025 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3026 FETCH( &r[0], 0, chan_index ); 3027 micro_trunc( &r[0], &r[0] ); 3028 STORE( &r[0], 0, chan_index ); 3029 } 3030 break; 3031 3032 case TGSI_OPCODE_SHL: 3033 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3034 FETCH( &r[0], 0, chan_index ); 3035 FETCH( &r[1], 1, chan_index ); 3036 micro_shl( &r[0], &r[0], &r[1] ); 3037 STORE( &r[0], 0, chan_index ); 3038 } 3039 break; 3040 3041 case TGSI_OPCODE_SHR: 3042 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3043 FETCH( &r[0], 0, chan_index ); 3044 FETCH( &r[1], 1, chan_index ); 3045 micro_ishr( &r[0], &r[0], &r[1] ); 3046 STORE( &r[0], 0, chan_index ); 3047 } 3048 break; 3049 3050 case TGSI_OPCODE_AND: 3051 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3052 FETCH( &r[0], 0, chan_index ); 3053 FETCH( &r[1], 1, chan_index ); 3054 micro_and( &r[0], &r[0], &r[1] ); 3055 STORE( &r[0], 0, chan_index ); 3056 } 3057 break; 3058 3059 case TGSI_OPCODE_OR: 3060 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3061 FETCH( &r[0], 0, chan_index ); 3062 FETCH( &r[1], 1, chan_index ); 3063 micro_or( &r[0], &r[0], &r[1] ); 3064 STORE( &r[0], 0, chan_index ); 3065 } 3066 break; 3067 3068 case TGSI_OPCODE_MOD: 3069 assert (0); 3070 break; 3071 3072 case TGSI_OPCODE_XOR: 3073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3074 FETCH( &r[0], 0, chan_index ); 3075 FETCH( &r[1], 1, chan_index ); 3076 micro_xor( &r[0], &r[0], &r[1] ); 3077 STORE( &r[0], 0, chan_index ); 3078 } 3079 break; 3080 3081 case TGSI_OPCODE_SAD: 3082 assert (0); 3083 break; 3084 3085 case TGSI_OPCODE_TXF: 3086 assert (0); 3087 break; 3088 3089 case TGSI_OPCODE_TXQ: 3090 assert (0); 3091 break; 3092 3093 case TGSI_OPCODE_EMIT: 3094 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 3095 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 3096 break; 3097 3098 case TGSI_OPCODE_ENDPRIM: 3099 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 3100 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 3101 break; 3102 3103 case TGSI_OPCODE_BGNFOR: 3104 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3105 for (chan_index = 0; chan_index < 3; chan_index++) { 3106 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3107 } 3108 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); 3109 ++mach->LoopCounterStackTop; 3110 /* fall-through (for now) */ 3111 case TGSI_OPCODE_BGNLOOP: 3112 /* push LoopMask and ContMasks */ 3113 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3114 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3115 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3116 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3117 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3118 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3119 break; 3120 3121 case TGSI_OPCODE_ENDFOR: 3122 assert(mach->LoopCounterStackTop > 0); 3123 micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3124 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3125 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 3126 /* update LoopMask */ 3127 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { 3128 mach->LoopMask &= ~0x1; 3129 } 3130 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { 3131 mach->LoopMask &= ~0x2; 3132 } 3133 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { 3134 mach->LoopMask &= ~0x4; 3135 } 3136 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { 3137 mach->LoopMask &= ~0x8; 3138 } 3139 micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3140 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3141 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3142 assert(mach->LoopLabelStackTop > 0); 3143 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3144 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); 3145 /* Restore ContMask, but don't pop */ 3146 assert(mach->ContStackTop > 0); 3147 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3148 UPDATE_EXEC_MASK(mach); 3149 if (mach->ExecMask) { 3150 /* repeat loop: jump to instruction just past BGNLOOP */ 3151 assert(mach->LoopLabelStackTop > 0); 3152 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3153 } 3154 else { 3155 /* exit loop: pop LoopMask */ 3156 assert(mach->LoopStackTop > 0); 3157 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3158 /* pop ContMask */ 3159 assert(mach->ContStackTop > 0); 3160 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3161 assert(mach->LoopLabelStackTop > 0); 3162 --mach->LoopLabelStackTop; 3163 assert(mach->LoopCounterStackTop > 0); 3164 --mach->LoopCounterStackTop; 3165 } 3166 UPDATE_EXEC_MASK(mach); 3167 break; 3168 3169 case TGSI_OPCODE_ENDLOOP: 3170 /* Restore ContMask, but don't pop */ 3171 assert(mach->ContStackTop > 0); 3172 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3173 UPDATE_EXEC_MASK(mach); 3174 if (mach->ExecMask) { 3175 /* repeat loop: jump to instruction just past BGNLOOP */ 3176 assert(mach->LoopLabelStackTop > 0); 3177 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3178 } 3179 else { 3180 /* exit loop: pop LoopMask */ 3181 assert(mach->LoopStackTop > 0); 3182 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3183 /* pop ContMask */ 3184 assert(mach->ContStackTop > 0); 3185 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3186 assert(mach->LoopLabelStackTop > 0); 3187 --mach->LoopLabelStackTop; 3188 } 3189 UPDATE_EXEC_MASK(mach); 3190 break; 3191 3192 case TGSI_OPCODE_BRK: 3193 /* turn off loop channels for each enabled exec channel */ 3194 mach->LoopMask &= ~mach->ExecMask; 3195 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3196 UPDATE_EXEC_MASK(mach); 3197 break; 3198 3199 case TGSI_OPCODE_CONT: 3200 /* turn off cont channels for each enabled exec channel */ 3201 mach->ContMask &= ~mach->ExecMask; 3202 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3203 UPDATE_EXEC_MASK(mach); 3204 break; 3205 3206 case TGSI_OPCODE_BGNSUB: 3207 /* no-op */ 3208 break; 3209 3210 case TGSI_OPCODE_ENDSUB: 3211 /* no-op */ 3212 break; 3213 3214 case TGSI_OPCODE_NOP: 3215 break; 3216 3217 default: 3218 assert( 0 ); 3219 } 3220} 3221 3222 3223/** 3224 * Run TGSI interpreter. 3225 * \return bitmask of "alive" quad components 3226 */ 3227uint 3228tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3229{ 3230 uint i; 3231 int pc = 0; 3232 3233 mach->CondMask = 0xf; 3234 mach->LoopMask = 0xf; 3235 mach->ContMask = 0xf; 3236 mach->FuncMask = 0xf; 3237 mach->ExecMask = 0xf; 3238 3239 assert(mach->CondStackTop == 0); 3240 assert(mach->LoopStackTop == 0); 3241 assert(mach->ContStackTop == 0); 3242 assert(mach->CallStackTop == 0); 3243 3244 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3245 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3246 3247 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3248 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3249 mach->Primitives[0] = 0; 3250 } 3251 3252 for (i = 0; i < QUAD_SIZE; i++) { 3253 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3254 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3255 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3256 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3257 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3258 } 3259 3260 /* execute declarations (interpolants) */ 3261 for (i = 0; i < mach->NumDeclarations; i++) { 3262 exec_declaration( mach, mach->Declarations+i ); 3263 } 3264 3265 /* execute instructions, until pc is set to -1 */ 3266 while (pc != -1) { 3267 assert(pc < (int) mach->NumInstructions); 3268 exec_instruction( mach, mach->Instructions + pc, &pc ); 3269 } 3270 3271#if 0 3272 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3273 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3274 /* 3275 * Scale back depth component. 3276 */ 3277 for (i = 0; i < 4; i++) 3278 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3279 } 3280#endif 3281 3282 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3283} 3284