tgsi_exec.c revision ba1ca28cc62fed71c77902b95ae4ed36c6bf25f8
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65#define TILE_TOP_LEFT 0 66#define TILE_TOP_RIGHT 1 67#define TILE_BOTTOM_LEFT 2 68#define TILE_BOTTOM_RIGHT 3 69 70#define CHAN_X 0 71#define CHAN_Y 1 72#define CHAN_Z 2 73#define CHAN_W 3 74 75/* 76 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 77 */ 78#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 79#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 80#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 81#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 82#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 83#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 84#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 85#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 86#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 87#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 88#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 89#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 90#define TEMP_128_I TGSI_EXEC_TEMP_128_I 91#define TEMP_128_C TGSI_EXEC_TEMP_128_C 92#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 93#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 94#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 95#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 96#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 97#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 98#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 99#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 100#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 101#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 102#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 103#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 104#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 105#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 106#define TEMP_R0 TGSI_EXEC_TEMP_R0 107#define TEMP_P0 TGSI_EXEC_TEMP_P0 108 109#define IS_CHANNEL_ENABLED(INST, CHAN)\ 110 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 111 112#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 113 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 114 115#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 117 if (IS_CHANNEL_ENABLED( INST, CHAN )) 118 119#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 121 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 122 123 124/** The execution mask depends on the conditional mask and the loop mask */ 125#define UPDATE_EXEC_MASK(MACH) \ 126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 127 128 129static const union tgsi_exec_channel ZeroVec = 130 { { 0.0, 0.0, 0.0, 0.0 } }; 131 132 133#ifdef DEBUG 134static void 135check_inf_or_nan(const union tgsi_exec_channel *chan) 136{ 137 assert(!util_is_inf_or_nan(chan->f[0])); 138 assert(!util_is_inf_or_nan(chan->f[1])); 139 assert(!util_is_inf_or_nan(chan->f[2])); 140 assert(!util_is_inf_or_nan(chan->f[3])); 141} 142#endif 143 144 145#ifdef DEBUG 146static void 147print_chan(const char *msg, const union tgsi_exec_channel *chan) 148{ 149 debug_printf("%s = {%f, %f, %f, %f}\n", 150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 151} 152#endif 153 154 155#ifdef DEBUG 156static void 157print_temp(const struct tgsi_exec_machine *mach, uint index) 158{ 159 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 160 int i; 161 debug_printf("Temp[%u] =\n", index); 162 for (i = 0; i < 4; i++) { 163 debug_printf(" %c: { %f, %f, %f, %f }\n", 164 "XYZW"[i], 165 tmp->xyzw[i].f[0], 166 tmp->xyzw[i].f[1], 167 tmp->xyzw[i].f[2], 168 tmp->xyzw[i].f[3]); 169 } 170} 171#endif 172 173 174/** 175 * Check if there's a potential src/dst register data dependency when 176 * using SOA execution. 177 * Example: 178 * MOV T, T.yxwz; 179 * This would expand into: 180 * MOV t0, t1; 181 * MOV t1, t0; 182 * MOV t2, t3; 183 * MOV t3, t2; 184 * The second instruction will have the wrong value for t0 if executed as-is. 185 */ 186boolean 187tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 188{ 189 uint i, chan; 190 191 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 192 if (writemask == TGSI_WRITEMASK_X || 193 writemask == TGSI_WRITEMASK_Y || 194 writemask == TGSI_WRITEMASK_Z || 195 writemask == TGSI_WRITEMASK_W || 196 writemask == TGSI_WRITEMASK_NONE) { 197 /* no chance of data dependency */ 198 return FALSE; 199 } 200 201 /* loop over src regs */ 202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 203 if ((inst->FullSrcRegisters[i].SrcRegister.File == 204 inst->FullDstRegisters[0].DstRegister.File) && 205 (inst->FullSrcRegisters[i].SrcRegister.Index == 206 inst->FullDstRegisters[0].DstRegister.Index)) { 207 /* loop over dest channels */ 208 uint channelsWritten = 0x0; 209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 210 /* check if we're reading a channel that's been written */ 211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); 212 if (channelsWritten & (1 << swizzle)) { 213 return TRUE; 214 } 215 216 channelsWritten |= (1 << chan); 217 } 218 } 219 } 220 return FALSE; 221} 222 223 224/** 225 * Initialize machine state by expanding tokens to full instructions, 226 * allocating temporary storage, setting up constants, etc. 227 * After this, we can call tgsi_exec_machine_run() many times. 228 */ 229void 230tgsi_exec_machine_bind_shader( 231 struct tgsi_exec_machine *mach, 232 const struct tgsi_token *tokens, 233 uint numSamplers, 234 struct tgsi_sampler **samplers) 235{ 236 uint k; 237 struct tgsi_parse_context parse; 238 struct tgsi_exec_labels *labels = &mach->Labels; 239 struct tgsi_full_instruction *instructions; 240 struct tgsi_full_declaration *declarations; 241 uint maxInstructions = 10, numInstructions = 0; 242 uint maxDeclarations = 10, numDeclarations = 0; 243 uint instno = 0; 244 245#if 0 246 tgsi_dump(tokens, 0); 247#endif 248 249 util_init_math(); 250 251 mach->Tokens = tokens; 252 mach->Samplers = samplers; 253 254 k = tgsi_parse_init (&parse, mach->Tokens); 255 if (k != TGSI_PARSE_OK) { 256 debug_printf( "Problem parsing!\n" ); 257 return; 258 } 259 260 mach->Processor = parse.FullHeader.Processor.Processor; 261 mach->ImmLimit = 0; 262 labels->count = 0; 263 264 declarations = (struct tgsi_full_declaration *) 265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 266 267 if (!declarations) { 268 return; 269 } 270 271 instructions = (struct tgsi_full_instruction *) 272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 273 274 if (!instructions) { 275 FREE( declarations ); 276 return; 277 } 278 279 while( !tgsi_parse_end_of_tokens( &parse ) ) { 280 uint pointer = parse.Position; 281 uint i; 282 283 tgsi_parse_token( &parse ); 284 switch( parse.FullToken.Token.Type ) { 285 case TGSI_TOKEN_TYPE_DECLARATION: 286 /* save expanded declaration */ 287 if (numDeclarations == maxDeclarations) { 288 declarations = REALLOC(declarations, 289 maxDeclarations 290 * sizeof(struct tgsi_full_declaration), 291 (maxDeclarations + 10) 292 * sizeof(struct tgsi_full_declaration)); 293 maxDeclarations += 10; 294 } 295 memcpy(declarations + numDeclarations, 296 &parse.FullToken.FullDeclaration, 297 sizeof(declarations[0])); 298 numDeclarations++; 299 break; 300 301 case TGSI_TOKEN_TYPE_IMMEDIATE: 302 { 303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 304 assert( size <= 4 ); 305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 306 307 for( i = 0; i < size; i++ ) { 308 mach->Imms[mach->ImmLimit][i] = 309 parse.FullToken.FullImmediate.u[i].Float; 310 } 311 mach->ImmLimit += 1; 312 } 313 break; 314 315 case TGSI_TOKEN_TYPE_INSTRUCTION: 316 assert( labels->count < MAX_LABELS ); 317 318 labels->labels[labels->count][0] = instno; 319 labels->labels[labels->count][1] = pointer; 320 labels->count++; 321 322 /* save expanded instruction */ 323 if (numInstructions == maxInstructions) { 324 instructions = REALLOC(instructions, 325 maxInstructions 326 * sizeof(struct tgsi_full_instruction), 327 (maxInstructions + 10) 328 * sizeof(struct tgsi_full_instruction)); 329 maxInstructions += 10; 330 } 331 332 memcpy(instructions + numInstructions, 333 &parse.FullToken.FullInstruction, 334 sizeof(instructions[0])); 335 336 numInstructions++; 337 break; 338 339 default: 340 assert( 0 ); 341 } 342 } 343 tgsi_parse_free (&parse); 344 345 if (mach->Declarations) { 346 FREE( mach->Declarations ); 347 } 348 mach->Declarations = declarations; 349 mach->NumDeclarations = numDeclarations; 350 351 if (mach->Instructions) { 352 FREE( mach->Instructions ); 353 } 354 mach->Instructions = instructions; 355 mach->NumInstructions = numInstructions; 356} 357 358 359struct tgsi_exec_machine * 360tgsi_exec_machine_create( void ) 361{ 362 struct tgsi_exec_machine *mach; 363 uint i; 364 365 mach = align_malloc( sizeof *mach, 16 ); 366 if (!mach) 367 goto fail; 368 369 memset(mach, 0, sizeof(*mach)); 370 371 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 372 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 373 374 /* Setup constants. */ 375 for( i = 0; i < 4; i++ ) { 376 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 377 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 378 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 379 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 380 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 381 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 382 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 383 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 384 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 385 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 386 } 387 388#ifdef DEBUG 389 /* silence warnings */ 390 (void) print_chan; 391 (void) print_temp; 392#endif 393 394 return mach; 395 396fail: 397 align_free(mach); 398 return NULL; 399} 400 401 402void 403tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 404{ 405 if (mach) { 406 FREE(mach->Instructions); 407 FREE(mach->Declarations); 408 } 409 410 align_free(mach); 411} 412 413 414static void 415micro_abs( 416 union tgsi_exec_channel *dst, 417 const union tgsi_exec_channel *src ) 418{ 419 dst->f[0] = fabsf( src->f[0] ); 420 dst->f[1] = fabsf( src->f[1] ); 421 dst->f[2] = fabsf( src->f[2] ); 422 dst->f[3] = fabsf( src->f[3] ); 423} 424 425static void 426micro_add( 427 union tgsi_exec_channel *dst, 428 const union tgsi_exec_channel *src0, 429 const union tgsi_exec_channel *src1 ) 430{ 431 dst->f[0] = src0->f[0] + src1->f[0]; 432 dst->f[1] = src0->f[1] + src1->f[1]; 433 dst->f[2] = src0->f[2] + src1->f[2]; 434 dst->f[3] = src0->f[3] + src1->f[3]; 435} 436 437#if 0 438static void 439micro_iadd( 440 union tgsi_exec_channel *dst, 441 const union tgsi_exec_channel *src0, 442 const union tgsi_exec_channel *src1 ) 443{ 444 dst->i[0] = src0->i[0] + src1->i[0]; 445 dst->i[1] = src0->i[1] + src1->i[1]; 446 dst->i[2] = src0->i[2] + src1->i[2]; 447 dst->i[3] = src0->i[3] + src1->i[3]; 448} 449#endif 450 451static void 452micro_and( 453 union tgsi_exec_channel *dst, 454 const union tgsi_exec_channel *src0, 455 const union tgsi_exec_channel *src1 ) 456{ 457 dst->u[0] = src0->u[0] & src1->u[0]; 458 dst->u[1] = src0->u[1] & src1->u[1]; 459 dst->u[2] = src0->u[2] & src1->u[2]; 460 dst->u[3] = src0->u[3] & src1->u[3]; 461} 462 463static void 464micro_ceil( 465 union tgsi_exec_channel *dst, 466 const union tgsi_exec_channel *src ) 467{ 468 dst->f[0] = ceilf( src->f[0] ); 469 dst->f[1] = ceilf( src->f[1] ); 470 dst->f[2] = ceilf( src->f[2] ); 471 dst->f[3] = ceilf( src->f[3] ); 472} 473 474static void 475micro_cos( 476 union tgsi_exec_channel *dst, 477 const union tgsi_exec_channel *src ) 478{ 479 dst->f[0] = cosf( src->f[0] ); 480 dst->f[1] = cosf( src->f[1] ); 481 dst->f[2] = cosf( src->f[2] ); 482 dst->f[3] = cosf( src->f[3] ); 483} 484 485static void 486micro_ddx( 487 union tgsi_exec_channel *dst, 488 const union tgsi_exec_channel *src ) 489{ 490 dst->f[0] = 491 dst->f[1] = 492 dst->f[2] = 493 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 494} 495 496static void 497micro_ddy( 498 union tgsi_exec_channel *dst, 499 const union tgsi_exec_channel *src ) 500{ 501 dst->f[0] = 502 dst->f[1] = 503 dst->f[2] = 504 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 505} 506 507static void 508micro_div( 509 union tgsi_exec_channel *dst, 510 const union tgsi_exec_channel *src0, 511 const union tgsi_exec_channel *src1 ) 512{ 513 if (src1->f[0] != 0) { 514 dst->f[0] = src0->f[0] / src1->f[0]; 515 } 516 if (src1->f[1] != 0) { 517 dst->f[1] = src0->f[1] / src1->f[1]; 518 } 519 if (src1->f[2] != 0) { 520 dst->f[2] = src0->f[2] / src1->f[2]; 521 } 522 if (src1->f[3] != 0) { 523 dst->f[3] = src0->f[3] / src1->f[3]; 524 } 525} 526 527#if 0 528static void 529micro_udiv( 530 union tgsi_exec_channel *dst, 531 const union tgsi_exec_channel *src0, 532 const union tgsi_exec_channel *src1 ) 533{ 534 dst->u[0] = src0->u[0] / src1->u[0]; 535 dst->u[1] = src0->u[1] / src1->u[1]; 536 dst->u[2] = src0->u[2] / src1->u[2]; 537 dst->u[3] = src0->u[3] / src1->u[3]; 538} 539#endif 540 541static void 542micro_eq( 543 union tgsi_exec_channel *dst, 544 const union tgsi_exec_channel *src0, 545 const union tgsi_exec_channel *src1, 546 const union tgsi_exec_channel *src2, 547 const union tgsi_exec_channel *src3 ) 548{ 549 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 550 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 551 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 552 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 553} 554 555#if 0 556static void 557micro_ieq( 558 union tgsi_exec_channel *dst, 559 const union tgsi_exec_channel *src0, 560 const union tgsi_exec_channel *src1, 561 const union tgsi_exec_channel *src2, 562 const union tgsi_exec_channel *src3 ) 563{ 564 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 565 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 566 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 567 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 568} 569#endif 570 571static void 572micro_exp2( 573 union tgsi_exec_channel *dst, 574 const union tgsi_exec_channel *src) 575{ 576#if FAST_MATH 577 dst->f[0] = util_fast_exp2( src->f[0] ); 578 dst->f[1] = util_fast_exp2( src->f[1] ); 579 dst->f[2] = util_fast_exp2( src->f[2] ); 580 dst->f[3] = util_fast_exp2( src->f[3] ); 581#else 582 583#if DEBUG 584 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 585 uint i; 586 union tgsi_exec_channel clamped; 587 588 for (i = 0; i < 4; i++) { 589 if (src->f[i] > 127.99999f) { 590 clamped.f[i] = 127.99999f; 591 } else if (src->f[i] < -126.99999f) { 592 clamped.f[i] = -126.99999f; 593 } else { 594 clamped.f[i] = src->f[i]; 595 } 596 } 597 src = &clamped; 598#endif 599 600 dst->f[0] = powf( 2.0f, src->f[0] ); 601 dst->f[1] = powf( 2.0f, src->f[1] ); 602 dst->f[2] = powf( 2.0f, src->f[2] ); 603 dst->f[3] = powf( 2.0f, src->f[3] ); 604#endif 605} 606 607#if 0 608static void 609micro_f2ut( 610 union tgsi_exec_channel *dst, 611 const union tgsi_exec_channel *src ) 612{ 613 dst->u[0] = (uint) src->f[0]; 614 dst->u[1] = (uint) src->f[1]; 615 dst->u[2] = (uint) src->f[2]; 616 dst->u[3] = (uint) src->f[3]; 617} 618#endif 619 620static void 621micro_float_clamp(union tgsi_exec_channel *dst, 622 const union tgsi_exec_channel *src) 623{ 624 uint i; 625 626 for (i = 0; i < 4; i++) { 627 if (src->f[i] > 0.0f) { 628 if (src->f[i] > 1.884467e+019f) 629 dst->f[i] = 1.884467e+019f; 630 else if (src->f[i] < 5.42101e-020f) 631 dst->f[i] = 5.42101e-020f; 632 else 633 dst->f[i] = src->f[i]; 634 } 635 else { 636 if (src->f[i] < -1.884467e+019f) 637 dst->f[i] = -1.884467e+019f; 638 else if (src->f[i] > -5.42101e-020f) 639 dst->f[i] = -5.42101e-020f; 640 else 641 dst->f[i] = src->f[i]; 642 } 643 } 644} 645 646static void 647micro_flr( 648 union tgsi_exec_channel *dst, 649 const union tgsi_exec_channel *src ) 650{ 651 dst->f[0] = floorf( src->f[0] ); 652 dst->f[1] = floorf( src->f[1] ); 653 dst->f[2] = floorf( src->f[2] ); 654 dst->f[3] = floorf( src->f[3] ); 655} 656 657static void 658micro_frc( 659 union tgsi_exec_channel *dst, 660 const union tgsi_exec_channel *src ) 661{ 662 dst->f[0] = src->f[0] - floorf( src->f[0] ); 663 dst->f[1] = src->f[1] - floorf( src->f[1] ); 664 dst->f[2] = src->f[2] - floorf( src->f[2] ); 665 dst->f[3] = src->f[3] - floorf( src->f[3] ); 666} 667 668static void 669micro_i2f( 670 union tgsi_exec_channel *dst, 671 const union tgsi_exec_channel *src ) 672{ 673 dst->f[0] = (float) src->i[0]; 674 dst->f[1] = (float) src->i[1]; 675 dst->f[2] = (float) src->i[2]; 676 dst->f[3] = (float) src->i[3]; 677} 678 679static void 680micro_lg2( 681 union tgsi_exec_channel *dst, 682 const union tgsi_exec_channel *src ) 683{ 684#if FAST_MATH 685 dst->f[0] = util_fast_log2( src->f[0] ); 686 dst->f[1] = util_fast_log2( src->f[1] ); 687 dst->f[2] = util_fast_log2( src->f[2] ); 688 dst->f[3] = util_fast_log2( src->f[3] ); 689#else 690 dst->f[0] = logf( src->f[0] ) * 1.442695f; 691 dst->f[1] = logf( src->f[1] ) * 1.442695f; 692 dst->f[2] = logf( src->f[2] ) * 1.442695f; 693 dst->f[3] = logf( src->f[3] ) * 1.442695f; 694#endif 695} 696 697static void 698micro_le( 699 union tgsi_exec_channel *dst, 700 const union tgsi_exec_channel *src0, 701 const union tgsi_exec_channel *src1, 702 const union tgsi_exec_channel *src2, 703 const union tgsi_exec_channel *src3 ) 704{ 705 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 706 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 707 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 708 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 709} 710 711static void 712micro_lt( 713 union tgsi_exec_channel *dst, 714 const union tgsi_exec_channel *src0, 715 const union tgsi_exec_channel *src1, 716 const union tgsi_exec_channel *src2, 717 const union tgsi_exec_channel *src3 ) 718{ 719 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 720 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 721 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 722 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 723} 724 725#if 0 726static void 727micro_ilt( 728 union tgsi_exec_channel *dst, 729 const union tgsi_exec_channel *src0, 730 const union tgsi_exec_channel *src1, 731 const union tgsi_exec_channel *src2, 732 const union tgsi_exec_channel *src3 ) 733{ 734 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 735 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 736 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 737 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 738} 739#endif 740 741#if 0 742static void 743micro_ult( 744 union tgsi_exec_channel *dst, 745 const union tgsi_exec_channel *src0, 746 const union tgsi_exec_channel *src1, 747 const union tgsi_exec_channel *src2, 748 const union tgsi_exec_channel *src3 ) 749{ 750 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 751 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 752 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 753 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 754} 755#endif 756 757static void 758micro_max( 759 union tgsi_exec_channel *dst, 760 const union tgsi_exec_channel *src0, 761 const union tgsi_exec_channel *src1 ) 762{ 763 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 764 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 765 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 766 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 767} 768 769#if 0 770static void 771micro_imax( 772 union tgsi_exec_channel *dst, 773 const union tgsi_exec_channel *src0, 774 const union tgsi_exec_channel *src1 ) 775{ 776 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 777 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 778 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 779 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 780} 781#endif 782 783#if 0 784static void 785micro_umax( 786 union tgsi_exec_channel *dst, 787 const union tgsi_exec_channel *src0, 788 const union tgsi_exec_channel *src1 ) 789{ 790 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 791 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 792 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 793 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 794} 795#endif 796 797static void 798micro_min( 799 union tgsi_exec_channel *dst, 800 const union tgsi_exec_channel *src0, 801 const union tgsi_exec_channel *src1 ) 802{ 803 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 804 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 805 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 806 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 807} 808 809#if 0 810static void 811micro_imin( 812 union tgsi_exec_channel *dst, 813 const union tgsi_exec_channel *src0, 814 const union tgsi_exec_channel *src1 ) 815{ 816 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 817 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 818 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 819 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 820} 821#endif 822 823#if 0 824static void 825micro_umin( 826 union tgsi_exec_channel *dst, 827 const union tgsi_exec_channel *src0, 828 const union tgsi_exec_channel *src1 ) 829{ 830 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 831 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 832 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 833 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 834} 835#endif 836 837#if 0 838static void 839micro_umod( 840 union tgsi_exec_channel *dst, 841 const union tgsi_exec_channel *src0, 842 const union tgsi_exec_channel *src1 ) 843{ 844 dst->u[0] = src0->u[0] % src1->u[0]; 845 dst->u[1] = src0->u[1] % src1->u[1]; 846 dst->u[2] = src0->u[2] % src1->u[2]; 847 dst->u[3] = src0->u[3] % src1->u[3]; 848} 849#endif 850 851static void 852micro_mul( 853 union tgsi_exec_channel *dst, 854 const union tgsi_exec_channel *src0, 855 const union tgsi_exec_channel *src1 ) 856{ 857 dst->f[0] = src0->f[0] * src1->f[0]; 858 dst->f[1] = src0->f[1] * src1->f[1]; 859 dst->f[2] = src0->f[2] * src1->f[2]; 860 dst->f[3] = src0->f[3] * src1->f[3]; 861} 862 863#if 0 864static void 865micro_imul( 866 union tgsi_exec_channel *dst, 867 const union tgsi_exec_channel *src0, 868 const union tgsi_exec_channel *src1 ) 869{ 870 dst->i[0] = src0->i[0] * src1->i[0]; 871 dst->i[1] = src0->i[1] * src1->i[1]; 872 dst->i[2] = src0->i[2] * src1->i[2]; 873 dst->i[3] = src0->i[3] * src1->i[3]; 874} 875#endif 876 877#if 0 878static void 879micro_imul64( 880 union tgsi_exec_channel *dst0, 881 union tgsi_exec_channel *dst1, 882 const union tgsi_exec_channel *src0, 883 const union tgsi_exec_channel *src1 ) 884{ 885 dst1->i[0] = src0->i[0] * src1->i[0]; 886 dst1->i[1] = src0->i[1] * src1->i[1]; 887 dst1->i[2] = src0->i[2] * src1->i[2]; 888 dst1->i[3] = src0->i[3] * src1->i[3]; 889 dst0->i[0] = 0; 890 dst0->i[1] = 0; 891 dst0->i[2] = 0; 892 dst0->i[3] = 0; 893} 894#endif 895 896#if 0 897static void 898micro_umul64( 899 union tgsi_exec_channel *dst0, 900 union tgsi_exec_channel *dst1, 901 const union tgsi_exec_channel *src0, 902 const union tgsi_exec_channel *src1 ) 903{ 904 dst1->u[0] = src0->u[0] * src1->u[0]; 905 dst1->u[1] = src0->u[1] * src1->u[1]; 906 dst1->u[2] = src0->u[2] * src1->u[2]; 907 dst1->u[3] = src0->u[3] * src1->u[3]; 908 dst0->u[0] = 0; 909 dst0->u[1] = 0; 910 dst0->u[2] = 0; 911 dst0->u[3] = 0; 912} 913#endif 914 915 916#if 0 917static void 918micro_movc( 919 union tgsi_exec_channel *dst, 920 const union tgsi_exec_channel *src0, 921 const union tgsi_exec_channel *src1, 922 const union tgsi_exec_channel *src2 ) 923{ 924 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 925 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 926 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 927 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 928} 929#endif 930 931static void 932micro_neg( 933 union tgsi_exec_channel *dst, 934 const union tgsi_exec_channel *src ) 935{ 936 dst->f[0] = -src->f[0]; 937 dst->f[1] = -src->f[1]; 938 dst->f[2] = -src->f[2]; 939 dst->f[3] = -src->f[3]; 940} 941 942#if 0 943static void 944micro_ineg( 945 union tgsi_exec_channel *dst, 946 const union tgsi_exec_channel *src ) 947{ 948 dst->i[0] = -src->i[0]; 949 dst->i[1] = -src->i[1]; 950 dst->i[2] = -src->i[2]; 951 dst->i[3] = -src->i[3]; 952} 953#endif 954 955static void 956micro_not( 957 union tgsi_exec_channel *dst, 958 const union tgsi_exec_channel *src ) 959{ 960 dst->u[0] = ~src->u[0]; 961 dst->u[1] = ~src->u[1]; 962 dst->u[2] = ~src->u[2]; 963 dst->u[3] = ~src->u[3]; 964} 965 966static void 967micro_or( 968 union tgsi_exec_channel *dst, 969 const union tgsi_exec_channel *src0, 970 const union tgsi_exec_channel *src1 ) 971{ 972 dst->u[0] = src0->u[0] | src1->u[0]; 973 dst->u[1] = src0->u[1] | src1->u[1]; 974 dst->u[2] = src0->u[2] | src1->u[2]; 975 dst->u[3] = src0->u[3] | src1->u[3]; 976} 977 978static void 979micro_pow( 980 union tgsi_exec_channel *dst, 981 const union tgsi_exec_channel *src0, 982 const union tgsi_exec_channel *src1 ) 983{ 984#if FAST_MATH 985 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 986 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 987 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 988 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 989#else 990 dst->f[0] = powf( src0->f[0], src1->f[0] ); 991 dst->f[1] = powf( src0->f[1], src1->f[1] ); 992 dst->f[2] = powf( src0->f[2], src1->f[2] ); 993 dst->f[3] = powf( src0->f[3], src1->f[3] ); 994#endif 995} 996 997static void 998micro_rnd( 999 union tgsi_exec_channel *dst, 1000 const union tgsi_exec_channel *src ) 1001{ 1002 dst->f[0] = floorf( src->f[0] + 0.5f ); 1003 dst->f[1] = floorf( src->f[1] + 0.5f ); 1004 dst->f[2] = floorf( src->f[2] + 0.5f ); 1005 dst->f[3] = floorf( src->f[3] + 0.5f ); 1006} 1007 1008static void 1009micro_sgn( 1010 union tgsi_exec_channel *dst, 1011 const union tgsi_exec_channel *src ) 1012{ 1013 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 1014 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 1015 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 1016 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1017} 1018 1019static void 1020micro_shl( 1021 union tgsi_exec_channel *dst, 1022 const union tgsi_exec_channel *src0, 1023 const union tgsi_exec_channel *src1 ) 1024{ 1025 dst->i[0] = src0->i[0] << src1->i[0]; 1026 dst->i[1] = src0->i[1] << src1->i[1]; 1027 dst->i[2] = src0->i[2] << src1->i[2]; 1028 dst->i[3] = src0->i[3] << src1->i[3]; 1029} 1030 1031static void 1032micro_ishr( 1033 union tgsi_exec_channel *dst, 1034 const union tgsi_exec_channel *src0, 1035 const union tgsi_exec_channel *src1 ) 1036{ 1037 dst->i[0] = src0->i[0] >> src1->i[0]; 1038 dst->i[1] = src0->i[1] >> src1->i[1]; 1039 dst->i[2] = src0->i[2] >> src1->i[2]; 1040 dst->i[3] = src0->i[3] >> src1->i[3]; 1041} 1042 1043static void 1044micro_trunc( 1045 union tgsi_exec_channel *dst, 1046 const union tgsi_exec_channel *src0 ) 1047{ 1048 dst->f[0] = (float) (int) src0->f[0]; 1049 dst->f[1] = (float) (int) src0->f[1]; 1050 dst->f[2] = (float) (int) src0->f[2]; 1051 dst->f[3] = (float) (int) src0->f[3]; 1052} 1053 1054#if 0 1055static void 1056micro_ushr( 1057 union tgsi_exec_channel *dst, 1058 const union tgsi_exec_channel *src0, 1059 const union tgsi_exec_channel *src1 ) 1060{ 1061 dst->u[0] = src0->u[0] >> src1->u[0]; 1062 dst->u[1] = src0->u[1] >> src1->u[1]; 1063 dst->u[2] = src0->u[2] >> src1->u[2]; 1064 dst->u[3] = src0->u[3] >> src1->u[3]; 1065} 1066#endif 1067 1068static void 1069micro_sin( 1070 union tgsi_exec_channel *dst, 1071 const union tgsi_exec_channel *src ) 1072{ 1073 dst->f[0] = sinf( src->f[0] ); 1074 dst->f[1] = sinf( src->f[1] ); 1075 dst->f[2] = sinf( src->f[2] ); 1076 dst->f[3] = sinf( src->f[3] ); 1077} 1078 1079static void 1080micro_sqrt( union tgsi_exec_channel *dst, 1081 const union tgsi_exec_channel *src ) 1082{ 1083 dst->f[0] = sqrtf( src->f[0] ); 1084 dst->f[1] = sqrtf( src->f[1] ); 1085 dst->f[2] = sqrtf( src->f[2] ); 1086 dst->f[3] = sqrtf( src->f[3] ); 1087} 1088 1089static void 1090micro_sub( 1091 union tgsi_exec_channel *dst, 1092 const union tgsi_exec_channel *src0, 1093 const union tgsi_exec_channel *src1 ) 1094{ 1095 dst->f[0] = src0->f[0] - src1->f[0]; 1096 dst->f[1] = src0->f[1] - src1->f[1]; 1097 dst->f[2] = src0->f[2] - src1->f[2]; 1098 dst->f[3] = src0->f[3] - src1->f[3]; 1099} 1100 1101#if 0 1102static void 1103micro_u2f( 1104 union tgsi_exec_channel *dst, 1105 const union tgsi_exec_channel *src ) 1106{ 1107 dst->f[0] = (float) src->u[0]; 1108 dst->f[1] = (float) src->u[1]; 1109 dst->f[2] = (float) src->u[2]; 1110 dst->f[3] = (float) src->u[3]; 1111} 1112#endif 1113 1114static void 1115micro_xor( 1116 union tgsi_exec_channel *dst, 1117 const union tgsi_exec_channel *src0, 1118 const union tgsi_exec_channel *src1 ) 1119{ 1120 dst->u[0] = src0->u[0] ^ src1->u[0]; 1121 dst->u[1] = src0->u[1] ^ src1->u[1]; 1122 dst->u[2] = src0->u[2] ^ src1->u[2]; 1123 dst->u[3] = src0->u[3] ^ src1->u[3]; 1124} 1125 1126static void 1127fetch_src_file_channel( 1128 const struct tgsi_exec_machine *mach, 1129 const uint file, 1130 const uint swizzle, 1131 const union tgsi_exec_channel *index, 1132 union tgsi_exec_channel *chan ) 1133{ 1134 switch( swizzle ) { 1135 case TGSI_SWIZZLE_X: 1136 case TGSI_SWIZZLE_Y: 1137 case TGSI_SWIZZLE_Z: 1138 case TGSI_SWIZZLE_W: 1139 switch( file ) { 1140 case TGSI_FILE_CONSTANT: 1141 assert(mach->Consts); 1142 if (index->i[0] < 0) 1143 chan->f[0] = 0.0f; 1144 else 1145 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1146 if (index->i[1] < 0) 1147 chan->f[1] = 0.0f; 1148 else 1149 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1150 if (index->i[2] < 0) 1151 chan->f[2] = 0.0f; 1152 else 1153 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1154 if (index->i[3] < 0) 1155 chan->f[3] = 0.0f; 1156 else 1157 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1158 break; 1159 1160 case TGSI_FILE_INPUT: 1161 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1162 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1163 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1164 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1165 break; 1166 1167 case TGSI_FILE_TEMPORARY: 1168 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1169 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1170 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1171 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1172 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1173 break; 1174 1175 case TGSI_FILE_IMMEDIATE: 1176 assert( index->i[0] < (int) mach->ImmLimit ); 1177 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1178 assert( index->i[1] < (int) mach->ImmLimit ); 1179 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1180 assert( index->i[2] < (int) mach->ImmLimit ); 1181 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1182 assert( index->i[3] < (int) mach->ImmLimit ); 1183 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1184 break; 1185 1186 case TGSI_FILE_ADDRESS: 1187 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1188 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1189 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1190 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1191 break; 1192 1193 case TGSI_FILE_PREDICATE: 1194 assert(index->i[0] < TGSI_EXEC_NUM_PREDS); 1195 assert(index->i[1] < TGSI_EXEC_NUM_PREDS); 1196 assert(index->i[2] < TGSI_EXEC_NUM_PREDS); 1197 assert(index->i[3] < TGSI_EXEC_NUM_PREDS); 1198 chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0]; 1199 chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1]; 1200 chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2]; 1201 chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3]; 1202 break; 1203 1204 case TGSI_FILE_OUTPUT: 1205 /* vertex/fragment output vars can be read too */ 1206 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1207 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1208 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1209 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1210 break; 1211 1212 default: 1213 assert( 0 ); 1214 } 1215 break; 1216 1217 default: 1218 assert( 0 ); 1219 } 1220} 1221 1222static void 1223fetch_source( 1224 const struct tgsi_exec_machine *mach, 1225 union tgsi_exec_channel *chan, 1226 const struct tgsi_full_src_register *reg, 1227 const uint chan_index ) 1228{ 1229 union tgsi_exec_channel index; 1230 uint swizzle; 1231 1232 /* We start with a direct index into a register file. 1233 * 1234 * file[1], 1235 * where: 1236 * file = SrcRegister.File 1237 * [1] = SrcRegister.Index 1238 */ 1239 index.i[0] = 1240 index.i[1] = 1241 index.i[2] = 1242 index.i[3] = reg->SrcRegister.Index; 1243 1244 /* There is an extra source register that indirectly subscripts 1245 * a register file. The direct index now becomes an offset 1246 * that is being added to the indirect register. 1247 * 1248 * file[ind[2].x+1], 1249 * where: 1250 * ind = SrcRegisterInd.File 1251 * [2] = SrcRegisterInd.Index 1252 * .x = SrcRegisterInd.SwizzleX 1253 */ 1254 if (reg->SrcRegister.Indirect) { 1255 union tgsi_exec_channel index2; 1256 union tgsi_exec_channel indir_index; 1257 const uint execmask = mach->ExecMask; 1258 uint i; 1259 1260 /* which address register (always zero now) */ 1261 index2.i[0] = 1262 index2.i[1] = 1263 index2.i[2] = 1264 index2.i[3] = reg->SrcRegisterInd.Index; 1265 1266 /* get current value of address register[swizzle] */ 1267 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1268 fetch_src_file_channel( 1269 mach, 1270 reg->SrcRegisterInd.File, 1271 swizzle, 1272 &index2, 1273 &indir_index ); 1274 1275 /* add value of address register to the offset */ 1276 index.i[0] += (int) indir_index.f[0]; 1277 index.i[1] += (int) indir_index.f[1]; 1278 index.i[2] += (int) indir_index.f[2]; 1279 index.i[3] += (int) indir_index.f[3]; 1280 1281 /* for disabled execution channels, zero-out the index to 1282 * avoid using a potential garbage value. 1283 */ 1284 for (i = 0; i < QUAD_SIZE; i++) { 1285 if ((execmask & (1 << i)) == 0) 1286 index.i[i] = 0; 1287 } 1288 } 1289 1290 /* There is an extra source register that is a second 1291 * subscript to a register file. Effectively it means that 1292 * the register file is actually a 2D array of registers. 1293 * 1294 * file[1][3] == file[1*sizeof(file[1])+3], 1295 * where: 1296 * [3] = SrcRegisterDim.Index 1297 */ 1298 if (reg->SrcRegister.Dimension) { 1299 /* The size of the first-order array depends on the register file type. 1300 * We need to multiply the index to the first array to get an effective, 1301 * "flat" index that points to the beginning of the second-order array. 1302 */ 1303 switch (reg->SrcRegister.File) { 1304 case TGSI_FILE_INPUT: 1305 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1306 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1307 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1308 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1309 break; 1310 case TGSI_FILE_CONSTANT: 1311 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1312 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1313 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1314 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1315 break; 1316 default: 1317 assert( 0 ); 1318 } 1319 1320 index.i[0] += reg->SrcRegisterDim.Index; 1321 index.i[1] += reg->SrcRegisterDim.Index; 1322 index.i[2] += reg->SrcRegisterDim.Index; 1323 index.i[3] += reg->SrcRegisterDim.Index; 1324 1325 /* Again, the second subscript index can be addressed indirectly 1326 * identically to the first one. 1327 * Nothing stops us from indirectly addressing the indirect register, 1328 * but there is no need for that, so we won't exercise it. 1329 * 1330 * file[1][ind[4].y+3], 1331 * where: 1332 * ind = SrcRegisterDimInd.File 1333 * [4] = SrcRegisterDimInd.Index 1334 * .y = SrcRegisterDimInd.SwizzleX 1335 */ 1336 if (reg->SrcRegisterDim.Indirect) { 1337 union tgsi_exec_channel index2; 1338 union tgsi_exec_channel indir_index; 1339 const uint execmask = mach->ExecMask; 1340 uint i; 1341 1342 index2.i[0] = 1343 index2.i[1] = 1344 index2.i[2] = 1345 index2.i[3] = reg->SrcRegisterDimInd.Index; 1346 1347 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1348 fetch_src_file_channel( 1349 mach, 1350 reg->SrcRegisterDimInd.File, 1351 swizzle, 1352 &index2, 1353 &indir_index ); 1354 1355 index.i[0] += (int) indir_index.f[0]; 1356 index.i[1] += (int) indir_index.f[1]; 1357 index.i[2] += (int) indir_index.f[2]; 1358 index.i[3] += (int) indir_index.f[3]; 1359 1360 /* for disabled execution channels, zero-out the index to 1361 * avoid using a potential garbage value. 1362 */ 1363 for (i = 0; i < QUAD_SIZE; i++) { 1364 if ((execmask & (1 << i)) == 0) 1365 index.i[i] = 0; 1366 } 1367 } 1368 1369 /* If by any chance there was a need for a 3D array of register 1370 * files, we would have to check whether SrcRegisterDim is followed 1371 * by a dimension register and continue the saga. 1372 */ 1373 } 1374 1375 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1376 fetch_src_file_channel( 1377 mach, 1378 reg->SrcRegister.File, 1379 swizzle, 1380 &index, 1381 chan ); 1382 1383 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1384 case TGSI_UTIL_SIGN_CLEAR: 1385 micro_abs( chan, chan ); 1386 break; 1387 1388 case TGSI_UTIL_SIGN_SET: 1389 micro_abs( chan, chan ); 1390 micro_neg( chan, chan ); 1391 break; 1392 1393 case TGSI_UTIL_SIGN_TOGGLE: 1394 micro_neg( chan, chan ); 1395 break; 1396 1397 case TGSI_UTIL_SIGN_KEEP: 1398 break; 1399 } 1400} 1401 1402static void 1403store_dest( 1404 struct tgsi_exec_machine *mach, 1405 const union tgsi_exec_channel *chan, 1406 const struct tgsi_full_dst_register *reg, 1407 const struct tgsi_full_instruction *inst, 1408 uint chan_index ) 1409{ 1410 uint i; 1411 union tgsi_exec_channel null; 1412 union tgsi_exec_channel *dst; 1413 uint execmask = mach->ExecMask; 1414 int offset = 0; /* indirection offset */ 1415 int index; 1416 1417#ifdef DEBUG 1418 check_inf_or_nan(chan); 1419#endif 1420 1421 /* There is an extra source register that indirectly subscripts 1422 * a register file. The direct index now becomes an offset 1423 * that is being added to the indirect register. 1424 * 1425 * file[ind[2].x+1], 1426 * where: 1427 * ind = DstRegisterInd.File 1428 * [2] = DstRegisterInd.Index 1429 * .x = DstRegisterInd.SwizzleX 1430 */ 1431 if (reg->DstRegister.Indirect) { 1432 union tgsi_exec_channel index; 1433 union tgsi_exec_channel indir_index; 1434 uint swizzle; 1435 1436 /* which address register (always zero for now) */ 1437 index.i[0] = 1438 index.i[1] = 1439 index.i[2] = 1440 index.i[3] = reg->DstRegisterInd.Index; 1441 1442 /* get current value of address register[swizzle] */ 1443 swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); 1444 1445 /* fetch values from the address/indirection register */ 1446 fetch_src_file_channel( 1447 mach, 1448 reg->DstRegisterInd.File, 1449 swizzle, 1450 &index, 1451 &indir_index ); 1452 1453 /* save indirection offset */ 1454 offset = (int) indir_index.f[0]; 1455 } 1456 1457 switch (reg->DstRegister.File) { 1458 case TGSI_FILE_NULL: 1459 dst = &null; 1460 break; 1461 1462 case TGSI_FILE_OUTPUT: 1463 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1464 + reg->DstRegister.Index; 1465 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1466 break; 1467 1468 case TGSI_FILE_TEMPORARY: 1469 index = reg->DstRegister.Index; 1470 assert( index < TGSI_EXEC_NUM_TEMPS ); 1471 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1472 break; 1473 1474 case TGSI_FILE_ADDRESS: 1475 index = reg->DstRegister.Index; 1476 dst = &mach->Addrs[index].xyzw[chan_index]; 1477 break; 1478 1479 case TGSI_FILE_LOOP: 1480 assert(reg->DstRegister.Index == 0); 1481 assert(mach->LoopCounterStackTop > 0); 1482 assert(chan_index == CHAN_X); 1483 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; 1484 break; 1485 1486 case TGSI_FILE_PREDICATE: 1487 index = reg->DstRegister.Index; 1488 assert(index < TGSI_EXEC_NUM_PREDS); 1489 dst = &mach->Predicates[index].xyzw[chan_index]; 1490 break; 1491 1492 default: 1493 assert( 0 ); 1494 return; 1495 } 1496 1497 if (inst->Instruction.Predicate) { 1498 uint swizzle; 1499 union tgsi_exec_channel *pred; 1500 1501 switch (chan_index) { 1502 case CHAN_X: 1503 swizzle = inst->InstructionPredicate.SwizzleX; 1504 break; 1505 case CHAN_Y: 1506 swizzle = inst->InstructionPredicate.SwizzleY; 1507 break; 1508 case CHAN_Z: 1509 swizzle = inst->InstructionPredicate.SwizzleZ; 1510 break; 1511 case CHAN_W: 1512 swizzle = inst->InstructionPredicate.SwizzleW; 1513 break; 1514 default: 1515 assert(0); 1516 return; 1517 } 1518 1519 assert(inst->InstructionPredicate.Index == 0); 1520 1521 pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle]; 1522 1523 if (inst->InstructionPredicate.Negate) { 1524 for (i = 0; i < QUAD_SIZE; i++) { 1525 if (pred->u[i]) { 1526 execmask &= ~(1 << i); 1527 } 1528 } 1529 } else { 1530 for (i = 0; i < QUAD_SIZE; i++) { 1531 if (!pred->u[i]) { 1532 execmask &= ~(1 << i); 1533 } 1534 } 1535 } 1536 } 1537 1538 switch (inst->Instruction.Saturate) { 1539 case TGSI_SAT_NONE: 1540 for (i = 0; i < QUAD_SIZE; i++) 1541 if (execmask & (1 << i)) 1542 dst->i[i] = chan->i[i]; 1543 break; 1544 1545 case TGSI_SAT_ZERO_ONE: 1546 for (i = 0; i < QUAD_SIZE; i++) 1547 if (execmask & (1 << i)) { 1548 if (chan->f[i] < 0.0f) 1549 dst->f[i] = 0.0f; 1550 else if (chan->f[i] > 1.0f) 1551 dst->f[i] = 1.0f; 1552 else 1553 dst->i[i] = chan->i[i]; 1554 } 1555 break; 1556 1557 case TGSI_SAT_MINUS_PLUS_ONE: 1558 for (i = 0; i < QUAD_SIZE; i++) 1559 if (execmask & (1 << i)) { 1560 if (chan->f[i] < -1.0f) 1561 dst->f[i] = -1.0f; 1562 else if (chan->f[i] > 1.0f) 1563 dst->f[i] = 1.0f; 1564 else 1565 dst->i[i] = chan->i[i]; 1566 } 1567 break; 1568 1569 default: 1570 assert( 0 ); 1571 } 1572} 1573 1574#define FETCH(VAL,INDEX,CHAN)\ 1575 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1576 1577#define STORE(VAL,INDEX,CHAN)\ 1578 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1579 1580 1581/** 1582 * Execute ARB-style KIL which is predicated by a src register. 1583 * Kill fragment if any of the four values is less than zero. 1584 */ 1585static void 1586exec_kil(struct tgsi_exec_machine *mach, 1587 const struct tgsi_full_instruction *inst) 1588{ 1589 uint uniquemask; 1590 uint chan_index; 1591 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1592 union tgsi_exec_channel r[1]; 1593 1594 /* This mask stores component bits that were already tested. */ 1595 uniquemask = 0; 1596 1597 for (chan_index = 0; chan_index < 4; chan_index++) 1598 { 1599 uint swizzle; 1600 uint i; 1601 1602 /* unswizzle channel */ 1603 swizzle = tgsi_util_get_full_src_register_swizzle ( 1604 &inst->FullSrcRegisters[0], 1605 chan_index); 1606 1607 /* check if the component has not been already tested */ 1608 if (uniquemask & (1 << swizzle)) 1609 continue; 1610 uniquemask |= 1 << swizzle; 1611 1612 FETCH(&r[0], 0, chan_index); 1613 for (i = 0; i < 4; i++) 1614 if (r[0].f[i] < 0.0f) 1615 kilmask |= 1 << i; 1616 } 1617 1618 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1619} 1620 1621/** 1622 * Execute NVIDIA-style KIL which is predicated by a condition code. 1623 * Kill fragment if the condition code is TRUE. 1624 */ 1625static void 1626exec_kilp(struct tgsi_exec_machine *mach, 1627 const struct tgsi_full_instruction *inst) 1628{ 1629 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1630 1631 /* "unconditional" kil */ 1632 kilmask = mach->ExecMask; 1633 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1634} 1635 1636 1637/* 1638 * Fetch a four texture samples using STR texture coordinates. 1639 */ 1640static void 1641fetch_texel( struct tgsi_sampler *sampler, 1642 const union tgsi_exec_channel *s, 1643 const union tgsi_exec_channel *t, 1644 const union tgsi_exec_channel *p, 1645 float lodbias, /* XXX should be float[4] */ 1646 union tgsi_exec_channel *r, 1647 union tgsi_exec_channel *g, 1648 union tgsi_exec_channel *b, 1649 union tgsi_exec_channel *a ) 1650{ 1651 uint j; 1652 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1653 1654 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1655 1656 for (j = 0; j < 4; j++) { 1657 r->f[j] = rgba[0][j]; 1658 g->f[j] = rgba[1][j]; 1659 b->f[j] = rgba[2][j]; 1660 a->f[j] = rgba[3][j]; 1661 } 1662} 1663 1664 1665static void 1666exec_tex(struct tgsi_exec_machine *mach, 1667 const struct tgsi_full_instruction *inst, 1668 boolean biasLod, 1669 boolean projected) 1670{ 1671 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1672 union tgsi_exec_channel r[4]; 1673 uint chan_index; 1674 float lodBias; 1675 1676 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1677 1678 switch (inst->InstructionTexture.Texture) { 1679 case TGSI_TEXTURE_1D: 1680 case TGSI_TEXTURE_SHADOW1D: 1681 1682 FETCH(&r[0], 0, CHAN_X); 1683 1684 if (projected) { 1685 FETCH(&r[1], 0, CHAN_W); 1686 micro_div( &r[0], &r[0], &r[1] ); 1687 } 1688 1689 if (biasLod) { 1690 FETCH(&r[1], 0, CHAN_W); 1691 lodBias = r[2].f[0]; 1692 } 1693 else 1694 lodBias = 0.0; 1695 1696 fetch_texel(mach->Samplers[unit], 1697 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1698 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1699 break; 1700 1701 case TGSI_TEXTURE_2D: 1702 case TGSI_TEXTURE_RECT: 1703 case TGSI_TEXTURE_SHADOW2D: 1704 case TGSI_TEXTURE_SHADOWRECT: 1705 1706 FETCH(&r[0], 0, CHAN_X); 1707 FETCH(&r[1], 0, CHAN_Y); 1708 FETCH(&r[2], 0, CHAN_Z); 1709 1710 if (projected) { 1711 FETCH(&r[3], 0, CHAN_W); 1712 micro_div( &r[0], &r[0], &r[3] ); 1713 micro_div( &r[1], &r[1], &r[3] ); 1714 micro_div( &r[2], &r[2], &r[3] ); 1715 } 1716 1717 if (biasLod) { 1718 FETCH(&r[3], 0, CHAN_W); 1719 lodBias = r[3].f[0]; 1720 } 1721 else 1722 lodBias = 0.0; 1723 1724 fetch_texel(mach->Samplers[unit], 1725 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1726 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1727 break; 1728 1729 case TGSI_TEXTURE_3D: 1730 case TGSI_TEXTURE_CUBE: 1731 1732 FETCH(&r[0], 0, CHAN_X); 1733 FETCH(&r[1], 0, CHAN_Y); 1734 FETCH(&r[2], 0, CHAN_Z); 1735 1736 if (projected) { 1737 FETCH(&r[3], 0, CHAN_W); 1738 micro_div( &r[0], &r[0], &r[3] ); 1739 micro_div( &r[1], &r[1], &r[3] ); 1740 micro_div( &r[2], &r[2], &r[3] ); 1741 } 1742 1743 if (biasLod) { 1744 FETCH(&r[3], 0, CHAN_W); 1745 lodBias = r[3].f[0]; 1746 } 1747 else 1748 lodBias = 0.0; 1749 1750 fetch_texel(mach->Samplers[unit], 1751 &r[0], &r[1], &r[2], lodBias, 1752 &r[0], &r[1], &r[2], &r[3]); 1753 break; 1754 1755 default: 1756 assert (0); 1757 } 1758 1759 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1760 STORE( &r[chan_index], 0, chan_index ); 1761 } 1762} 1763 1764static void 1765exec_txd(struct tgsi_exec_machine *mach, 1766 const struct tgsi_full_instruction *inst) 1767{ 1768 const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index; 1769 union tgsi_exec_channel r[4]; 1770 uint chan_index; 1771 1772 /* 1773 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1774 */ 1775 1776 switch (inst->InstructionTexture.Texture) { 1777 case TGSI_TEXTURE_1D: 1778 case TGSI_TEXTURE_SHADOW1D: 1779 1780 FETCH(&r[0], 0, CHAN_X); 1781 1782 fetch_texel(mach->Samplers[unit], 1783 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ 1784 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1785 break; 1786 1787 case TGSI_TEXTURE_2D: 1788 case TGSI_TEXTURE_RECT: 1789 case TGSI_TEXTURE_SHADOW2D: 1790 case TGSI_TEXTURE_SHADOWRECT: 1791 1792 FETCH(&r[0], 0, CHAN_X); 1793 FETCH(&r[1], 0, CHAN_Y); 1794 FETCH(&r[2], 0, CHAN_Z); 1795 1796 fetch_texel(mach->Samplers[unit], 1797 &r[0], &r[1], &r[2], 0.0f, /* inputs */ 1798 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1799 break; 1800 1801 case TGSI_TEXTURE_3D: 1802 case TGSI_TEXTURE_CUBE: 1803 1804 FETCH(&r[0], 0, CHAN_X); 1805 FETCH(&r[1], 0, CHAN_Y); 1806 FETCH(&r[2], 0, CHAN_Z); 1807 1808 fetch_texel(mach->Samplers[unit], 1809 &r[0], &r[1], &r[2], 0.0f, 1810 &r[0], &r[1], &r[2], &r[3]); 1811 break; 1812 1813 default: 1814 assert(0); 1815 } 1816 1817 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1818 STORE(&r[chan_index], 0, chan_index); 1819 } 1820} 1821 1822 1823/** 1824 * Evaluate a constant-valued coefficient at the position of the 1825 * current quad. 1826 */ 1827static void 1828eval_constant_coef( 1829 struct tgsi_exec_machine *mach, 1830 unsigned attrib, 1831 unsigned chan ) 1832{ 1833 unsigned i; 1834 1835 for( i = 0; i < QUAD_SIZE; i++ ) { 1836 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1837 } 1838} 1839 1840/** 1841 * Evaluate a linear-valued coefficient at the position of the 1842 * current quad. 1843 */ 1844static void 1845eval_linear_coef( 1846 struct tgsi_exec_machine *mach, 1847 unsigned attrib, 1848 unsigned chan ) 1849{ 1850 const float x = mach->QuadPos.xyzw[0].f[0]; 1851 const float y = mach->QuadPos.xyzw[1].f[0]; 1852 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1853 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1854 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1855 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1856 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1857 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1858 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1859} 1860 1861/** 1862 * Evaluate a perspective-valued coefficient at the position of the 1863 * current quad. 1864 */ 1865static void 1866eval_perspective_coef( 1867 struct tgsi_exec_machine *mach, 1868 unsigned attrib, 1869 unsigned chan ) 1870{ 1871 const float x = mach->QuadPos.xyzw[0].f[0]; 1872 const float y = mach->QuadPos.xyzw[1].f[0]; 1873 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1874 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1875 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1876 const float *w = mach->QuadPos.xyzw[3].f; 1877 /* divide by W here */ 1878 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1879 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1880 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1881 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1882} 1883 1884 1885typedef void (* eval_coef_func)( 1886 struct tgsi_exec_machine *mach, 1887 unsigned attrib, 1888 unsigned chan ); 1889 1890static void 1891exec_declaration(struct tgsi_exec_machine *mach, 1892 const struct tgsi_full_declaration *decl) 1893{ 1894 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 1895 if (decl->Declaration.File == TGSI_FILE_INPUT) { 1896 uint first, last, mask; 1897 1898 first = decl->DeclarationRange.First; 1899 last = decl->DeclarationRange.Last; 1900 mask = decl->Declaration.UsageMask; 1901 1902 if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { 1903 assert(decl->Semantic.SemanticIndex == 0); 1904 assert(first == last); 1905 assert(mask = TGSI_WRITEMASK_XYZW); 1906 1907 mach->Inputs[first] = mach->QuadPos; 1908 } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) { 1909 uint i; 1910 1911 assert(decl->Semantic.SemanticIndex == 0); 1912 assert(first == last); 1913 1914 for (i = 0; i < QUAD_SIZE; i++) { 1915 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 1916 } 1917 } else { 1918 eval_coef_func eval; 1919 uint i, j; 1920 1921 switch (decl->Declaration.Interpolate) { 1922 case TGSI_INTERPOLATE_CONSTANT: 1923 eval = eval_constant_coef; 1924 break; 1925 1926 case TGSI_INTERPOLATE_LINEAR: 1927 eval = eval_linear_coef; 1928 break; 1929 1930 case TGSI_INTERPOLATE_PERSPECTIVE: 1931 eval = eval_perspective_coef; 1932 break; 1933 1934 default: 1935 assert(0); 1936 return; 1937 } 1938 1939 for (j = 0; j < NUM_CHANNELS; j++) { 1940 if (mask & (1 << j)) { 1941 for (i = first; i <= last; i++) { 1942 eval(mach, i, j); 1943 } 1944 } 1945 } 1946 } 1947 } 1948 } 1949} 1950 1951static void 1952exec_instruction( 1953 struct tgsi_exec_machine *mach, 1954 const struct tgsi_full_instruction *inst, 1955 int *pc ) 1956{ 1957 uint chan_index; 1958 union tgsi_exec_channel r[10]; 1959 union tgsi_exec_channel d[8]; 1960 1961 (*pc)++; 1962 1963 switch (inst->Instruction.Opcode) { 1964 case TGSI_OPCODE_ARL: 1965 case TGSI_OPCODE_FLR: 1966 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1967 FETCH( &r[0], 0, chan_index ); 1968 micro_flr(&d[chan_index], &r[0]); 1969 } 1970 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1971 STORE(&d[chan_index], 0, chan_index); 1972 } 1973 break; 1974 1975 case TGSI_OPCODE_MOV: 1976 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1977 FETCH(&d[chan_index], 0, chan_index); 1978 } 1979 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1980 STORE(&d[chan_index], 0, chan_index); 1981 } 1982 break; 1983 1984 case TGSI_OPCODE_LIT: 1985 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1986 FETCH( &r[0], 0, CHAN_X ); 1987 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1988 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 1989 } 1990 1991 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1992 FETCH( &r[1], 0, CHAN_Y ); 1993 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1994 1995 FETCH( &r[2], 0, CHAN_W ); 1996 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1997 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1998 micro_pow( &r[1], &r[1], &r[2] ); 1999 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2000 } 2001 2002 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2003 STORE(&d[CHAN_Y], 0, CHAN_Y); 2004 } 2005 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2006 STORE(&d[CHAN_Z], 0, CHAN_Z); 2007 } 2008 } 2009 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2010 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2011 } 2012 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2013 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2014 } 2015 break; 2016 2017 case TGSI_OPCODE_RCP: 2018 /* TGSI_OPCODE_RECIP */ 2019 FETCH( &r[0], 0, CHAN_X ); 2020 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2021 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2022 STORE( &r[0], 0, chan_index ); 2023 } 2024 break; 2025 2026 case TGSI_OPCODE_RSQ: 2027 /* TGSI_OPCODE_RECIPSQRT */ 2028 FETCH( &r[0], 0, CHAN_X ); 2029 micro_abs( &r[0], &r[0] ); 2030 micro_sqrt( &r[0], &r[0] ); 2031 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2032 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2033 STORE( &r[0], 0, chan_index ); 2034 } 2035 break; 2036 2037 case TGSI_OPCODE_EXP: 2038 FETCH( &r[0], 0, CHAN_X ); 2039 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2040 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2041 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2042 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2043 } 2044 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2045 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2046 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2047 } 2048 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2049 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2050 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2051 } 2052 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2053 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2054 } 2055 break; 2056 2057 case TGSI_OPCODE_LOG: 2058 FETCH( &r[0], 0, CHAN_X ); 2059 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2060 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2061 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2062 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2063 STORE( &r[0], 0, CHAN_X ); 2064 } 2065 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2066 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2067 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2068 STORE( &r[0], 0, CHAN_Y ); 2069 } 2070 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2071 STORE( &r[1], 0, CHAN_Z ); 2072 } 2073 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2074 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2075 } 2076 break; 2077 2078 case TGSI_OPCODE_MUL: 2079 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2080 FETCH(&r[0], 0, chan_index); 2081 FETCH(&r[1], 1, chan_index); 2082 micro_mul(&d[chan_index], &r[0], &r[1]); 2083 } 2084 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2085 STORE(&d[chan_index], 0, chan_index); 2086 } 2087 break; 2088 2089 case TGSI_OPCODE_ADD: 2090 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2091 FETCH( &r[0], 0, chan_index ); 2092 FETCH( &r[1], 1, chan_index ); 2093 micro_add(&d[chan_index], &r[0], &r[1]); 2094 } 2095 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2096 STORE(&d[chan_index], 0, chan_index); 2097 } 2098 break; 2099 2100 case TGSI_OPCODE_DP3: 2101 /* TGSI_OPCODE_DOT3 */ 2102 FETCH( &r[0], 0, CHAN_X ); 2103 FETCH( &r[1], 1, CHAN_X ); 2104 micro_mul( &r[0], &r[0], &r[1] ); 2105 2106 FETCH( &r[1], 0, CHAN_Y ); 2107 FETCH( &r[2], 1, CHAN_Y ); 2108 micro_mul( &r[1], &r[1], &r[2] ); 2109 micro_add( &r[0], &r[0], &r[1] ); 2110 2111 FETCH( &r[1], 0, CHAN_Z ); 2112 FETCH( &r[2], 1, CHAN_Z ); 2113 micro_mul( &r[1], &r[1], &r[2] ); 2114 micro_add( &r[0], &r[0], &r[1] ); 2115 2116 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2117 STORE( &r[0], 0, chan_index ); 2118 } 2119 break; 2120 2121 case TGSI_OPCODE_DP4: 2122 /* TGSI_OPCODE_DOT4 */ 2123 FETCH(&r[0], 0, CHAN_X); 2124 FETCH(&r[1], 1, CHAN_X); 2125 2126 micro_mul( &r[0], &r[0], &r[1] ); 2127 2128 FETCH(&r[1], 0, CHAN_Y); 2129 FETCH(&r[2], 1, CHAN_Y); 2130 2131 micro_mul( &r[1], &r[1], &r[2] ); 2132 micro_add( &r[0], &r[0], &r[1] ); 2133 2134 FETCH(&r[1], 0, CHAN_Z); 2135 FETCH(&r[2], 1, CHAN_Z); 2136 2137 micro_mul( &r[1], &r[1], &r[2] ); 2138 micro_add( &r[0], &r[0], &r[1] ); 2139 2140 FETCH(&r[1], 0, CHAN_W); 2141 FETCH(&r[2], 1, CHAN_W); 2142 2143 micro_mul( &r[1], &r[1], &r[2] ); 2144 micro_add( &r[0], &r[0], &r[1] ); 2145 2146 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2147 STORE( &r[0], 0, chan_index ); 2148 } 2149 break; 2150 2151 case TGSI_OPCODE_DST: 2152 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2153 FETCH( &r[0], 0, CHAN_Y ); 2154 FETCH( &r[1], 1, CHAN_Y); 2155 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2156 } 2157 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2158 FETCH(&d[CHAN_Z], 0, CHAN_Z); 2159 } 2160 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2161 FETCH(&d[CHAN_W], 1, CHAN_W); 2162 } 2163 2164 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2165 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); 2166 } 2167 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2168 STORE(&d[CHAN_Y], 0, CHAN_Y); 2169 } 2170 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2171 STORE(&d[CHAN_Z], 0, CHAN_Z); 2172 } 2173 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2174 STORE(&d[CHAN_W], 0, CHAN_W); 2175 } 2176 break; 2177 2178 case TGSI_OPCODE_MIN: 2179 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2180 FETCH(&r[0], 0, chan_index); 2181 FETCH(&r[1], 1, chan_index); 2182 2183 /* XXX use micro_min()?? */ 2184 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); 2185 } 2186 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2187 STORE(&d[chan_index], 0, chan_index); 2188 } 2189 break; 2190 2191 case TGSI_OPCODE_MAX: 2192 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2193 FETCH(&r[0], 0, chan_index); 2194 FETCH(&r[1], 1, chan_index); 2195 2196 /* XXX use micro_max()?? */ 2197 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); 2198 } 2199 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2200 STORE(&d[chan_index], 0, chan_index); 2201 } 2202 break; 2203 2204 case TGSI_OPCODE_SLT: 2205 /* TGSI_OPCODE_SETLT */ 2206 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2207 FETCH( &r[0], 0, chan_index ); 2208 FETCH( &r[1], 1, chan_index ); 2209 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2210 } 2211 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2212 STORE(&d[chan_index], 0, chan_index); 2213 } 2214 break; 2215 2216 case TGSI_OPCODE_SGE: 2217 /* TGSI_OPCODE_SETGE */ 2218 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2219 FETCH( &r[0], 0, chan_index ); 2220 FETCH( &r[1], 1, chan_index ); 2221 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2222 } 2223 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2224 STORE(&d[chan_index], 0, chan_index); 2225 } 2226 break; 2227 2228 case TGSI_OPCODE_MAD: 2229 /* TGSI_OPCODE_MADD */ 2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2231 FETCH( &r[0], 0, chan_index ); 2232 FETCH( &r[1], 1, chan_index ); 2233 micro_mul( &r[0], &r[0], &r[1] ); 2234 FETCH( &r[1], 2, chan_index ); 2235 micro_add(&d[chan_index], &r[0], &r[1]); 2236 } 2237 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2238 STORE(&d[chan_index], 0, chan_index); 2239 } 2240 break; 2241 2242 case TGSI_OPCODE_SUB: 2243 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2244 FETCH(&r[0], 0, chan_index); 2245 FETCH(&r[1], 1, chan_index); 2246 micro_sub(&d[chan_index], &r[0], &r[1]); 2247 } 2248 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2249 STORE(&d[chan_index], 0, chan_index); 2250 } 2251 break; 2252 2253 case TGSI_OPCODE_LRP: 2254 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2255 FETCH(&r[0], 0, chan_index); 2256 FETCH(&r[1], 1, chan_index); 2257 FETCH(&r[2], 2, chan_index); 2258 micro_sub( &r[1], &r[1], &r[2] ); 2259 micro_mul( &r[0], &r[0], &r[1] ); 2260 micro_add(&d[chan_index], &r[0], &r[2]); 2261 } 2262 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2263 STORE(&d[chan_index], 0, chan_index); 2264 } 2265 break; 2266 2267 case TGSI_OPCODE_CND: 2268 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2269 FETCH(&r[0], 0, chan_index); 2270 FETCH(&r[1], 1, chan_index); 2271 FETCH(&r[2], 2, chan_index); 2272 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2273 } 2274 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2275 STORE(&d[chan_index], 0, chan_index); 2276 } 2277 break; 2278 2279 case TGSI_OPCODE_DP2A: 2280 FETCH( &r[0], 0, CHAN_X ); 2281 FETCH( &r[1], 1, CHAN_X ); 2282 micro_mul( &r[0], &r[0], &r[1] ); 2283 2284 FETCH( &r[1], 0, CHAN_Y ); 2285 FETCH( &r[2], 1, CHAN_Y ); 2286 micro_mul( &r[1], &r[1], &r[2] ); 2287 micro_add( &r[0], &r[0], &r[1] ); 2288 2289 FETCH( &r[2], 2, CHAN_X ); 2290 micro_add( &r[0], &r[0], &r[2] ); 2291 2292 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2293 STORE( &r[0], 0, chan_index ); 2294 } 2295 break; 2296 2297 case TGSI_OPCODE_FRC: 2298 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2299 FETCH( &r[0], 0, chan_index ); 2300 micro_frc(&d[chan_index], &r[0]); 2301 } 2302 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2303 STORE(&d[chan_index], 0, chan_index); 2304 } 2305 break; 2306 2307 case TGSI_OPCODE_CLAMP: 2308 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2309 FETCH(&r[0], 0, chan_index); 2310 FETCH(&r[1], 1, chan_index); 2311 micro_max(&r[0], &r[0], &r[1]); 2312 FETCH(&r[1], 2, chan_index); 2313 micro_min(&d[chan_index], &r[0], &r[1]); 2314 } 2315 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2316 STORE(&d[chan_index], 0, chan_index); 2317 } 2318 break; 2319 2320 case TGSI_OPCODE_ROUND: 2321 case TGSI_OPCODE_ARR: 2322 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2323 FETCH( &r[0], 0, chan_index ); 2324 micro_rnd(&d[chan_index], &r[0]); 2325 } 2326 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2327 STORE(&d[chan_index], 0, chan_index); 2328 } 2329 break; 2330 2331 case TGSI_OPCODE_EX2: 2332 FETCH(&r[0], 0, CHAN_X); 2333 2334 micro_exp2( &r[0], &r[0] ); 2335 2336 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2337 STORE( &r[0], 0, chan_index ); 2338 } 2339 break; 2340 2341 case TGSI_OPCODE_LG2: 2342 FETCH( &r[0], 0, CHAN_X ); 2343 micro_lg2( &r[0], &r[0] ); 2344 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2345 STORE( &r[0], 0, chan_index ); 2346 } 2347 break; 2348 2349 case TGSI_OPCODE_POW: 2350 FETCH(&r[0], 0, CHAN_X); 2351 FETCH(&r[1], 1, CHAN_X); 2352 2353 micro_pow( &r[0], &r[0], &r[1] ); 2354 2355 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2356 STORE( &r[0], 0, chan_index ); 2357 } 2358 break; 2359 2360 case TGSI_OPCODE_XPD: 2361 FETCH(&r[0], 0, CHAN_Y); 2362 FETCH(&r[1], 1, CHAN_Z); 2363 2364 micro_mul( &r[2], &r[0], &r[1] ); 2365 2366 FETCH(&r[3], 0, CHAN_Z); 2367 FETCH(&r[4], 1, CHAN_Y); 2368 2369 micro_mul( &r[5], &r[3], &r[4] ); 2370 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2371 2372 FETCH(&r[2], 1, CHAN_X); 2373 2374 micro_mul( &r[3], &r[3], &r[2] ); 2375 2376 FETCH(&r[5], 0, CHAN_X); 2377 2378 micro_mul( &r[1], &r[1], &r[5] ); 2379 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2380 2381 micro_mul( &r[5], &r[5], &r[4] ); 2382 micro_mul( &r[0], &r[0], &r[2] ); 2383 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2384 2385 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2386 STORE(&d[CHAN_X], 0, CHAN_X); 2387 } 2388 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2389 STORE(&d[CHAN_Y], 0, CHAN_Y); 2390 } 2391 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2392 STORE(&d[CHAN_Z], 0, CHAN_Z); 2393 } 2394 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2395 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2396 } 2397 break; 2398 2399 case TGSI_OPCODE_ABS: 2400 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2401 FETCH(&r[0], 0, chan_index); 2402 micro_abs(&d[chan_index], &r[0]); 2403 } 2404 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2405 STORE(&d[chan_index], 0, chan_index); 2406 } 2407 break; 2408 2409 case TGSI_OPCODE_RCC: 2410 FETCH(&r[0], 0, CHAN_X); 2411 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2412 micro_float_clamp(&r[0], &r[0]); 2413 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2414 STORE(&r[0], 0, chan_index); 2415 } 2416 break; 2417 2418 case TGSI_OPCODE_DPH: 2419 FETCH(&r[0], 0, CHAN_X); 2420 FETCH(&r[1], 1, CHAN_X); 2421 2422 micro_mul( &r[0], &r[0], &r[1] ); 2423 2424 FETCH(&r[1], 0, CHAN_Y); 2425 FETCH(&r[2], 1, CHAN_Y); 2426 2427 micro_mul( &r[1], &r[1], &r[2] ); 2428 micro_add( &r[0], &r[0], &r[1] ); 2429 2430 FETCH(&r[1], 0, CHAN_Z); 2431 FETCH(&r[2], 1, CHAN_Z); 2432 2433 micro_mul( &r[1], &r[1], &r[2] ); 2434 micro_add( &r[0], &r[0], &r[1] ); 2435 2436 FETCH(&r[1], 1, CHAN_W); 2437 2438 micro_add( &r[0], &r[0], &r[1] ); 2439 2440 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2441 STORE( &r[0], 0, chan_index ); 2442 } 2443 break; 2444 2445 case TGSI_OPCODE_COS: 2446 FETCH(&r[0], 0, CHAN_X); 2447 2448 micro_cos( &r[0], &r[0] ); 2449 2450 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2451 STORE( &r[0], 0, chan_index ); 2452 } 2453 break; 2454 2455 case TGSI_OPCODE_DDX: 2456 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2457 FETCH( &r[0], 0, chan_index ); 2458 micro_ddx(&d[chan_index], &r[0]); 2459 } 2460 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2461 STORE(&d[chan_index], 0, chan_index); 2462 } 2463 break; 2464 2465 case TGSI_OPCODE_DDY: 2466 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2467 FETCH( &r[0], 0, chan_index ); 2468 micro_ddy(&d[chan_index], &r[0]); 2469 } 2470 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2471 STORE(&d[chan_index], 0, chan_index); 2472 } 2473 break; 2474 2475 case TGSI_OPCODE_KILP: 2476 exec_kilp (mach, inst); 2477 break; 2478 2479 case TGSI_OPCODE_KIL: 2480 exec_kil (mach, inst); 2481 break; 2482 2483 case TGSI_OPCODE_PK2H: 2484 assert (0); 2485 break; 2486 2487 case TGSI_OPCODE_PK2US: 2488 assert (0); 2489 break; 2490 2491 case TGSI_OPCODE_PK4B: 2492 assert (0); 2493 break; 2494 2495 case TGSI_OPCODE_PK4UB: 2496 assert (0); 2497 break; 2498 2499 case TGSI_OPCODE_RFL: 2500 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2501 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2502 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2503 /* r0 = dp3(src0, src0) */ 2504 FETCH(&r[2], 0, CHAN_X); 2505 micro_mul(&r[0], &r[2], &r[2]); 2506 FETCH(&r[4], 0, CHAN_Y); 2507 micro_mul(&r[8], &r[4], &r[4]); 2508 micro_add(&r[0], &r[0], &r[8]); 2509 FETCH(&r[6], 0, CHAN_Z); 2510 micro_mul(&r[8], &r[6], &r[6]); 2511 micro_add(&r[0], &r[0], &r[8]); 2512 2513 /* r1 = dp3(src0, src1) */ 2514 FETCH(&r[3], 1, CHAN_X); 2515 micro_mul(&r[1], &r[2], &r[3]); 2516 FETCH(&r[5], 1, CHAN_Y); 2517 micro_mul(&r[8], &r[4], &r[5]); 2518 micro_add(&r[1], &r[1], &r[8]); 2519 FETCH(&r[7], 1, CHAN_Z); 2520 micro_mul(&r[8], &r[6], &r[7]); 2521 micro_add(&r[1], &r[1], &r[8]); 2522 2523 /* r1 = 2 * r1 / r0 */ 2524 micro_add(&r[1], &r[1], &r[1]); 2525 micro_div(&r[1], &r[1], &r[0]); 2526 2527 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2528 micro_mul(&r[2], &r[2], &r[1]); 2529 micro_sub(&r[2], &r[2], &r[3]); 2530 STORE(&r[2], 0, CHAN_X); 2531 } 2532 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2533 micro_mul(&r[4], &r[4], &r[1]); 2534 micro_sub(&r[4], &r[4], &r[5]); 2535 STORE(&r[4], 0, CHAN_Y); 2536 } 2537 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2538 micro_mul(&r[6], &r[6], &r[1]); 2539 micro_sub(&r[6], &r[6], &r[7]); 2540 STORE(&r[6], 0, CHAN_Z); 2541 } 2542 } 2543 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2544 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2545 } 2546 break; 2547 2548 case TGSI_OPCODE_SEQ: 2549 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2550 FETCH( &r[0], 0, chan_index ); 2551 FETCH( &r[1], 1, chan_index ); 2552 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2553 } 2554 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2555 STORE(&d[chan_index], 0, chan_index); 2556 } 2557 break; 2558 2559 case TGSI_OPCODE_SFL: 2560 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2561 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2562 } 2563 break; 2564 2565 case TGSI_OPCODE_SGT: 2566 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2567 FETCH( &r[0], 0, chan_index ); 2568 FETCH( &r[1], 1, chan_index ); 2569 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2570 } 2571 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2572 STORE(&d[chan_index], 0, chan_index); 2573 } 2574 break; 2575 2576 case TGSI_OPCODE_SIN: 2577 FETCH( &r[0], 0, CHAN_X ); 2578 micro_sin( &r[0], &r[0] ); 2579 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2580 STORE( &r[0], 0, chan_index ); 2581 } 2582 break; 2583 2584 case TGSI_OPCODE_SLE: 2585 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2586 FETCH( &r[0], 0, chan_index ); 2587 FETCH( &r[1], 1, chan_index ); 2588 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2589 } 2590 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2591 STORE(&d[chan_index], 0, chan_index); 2592 } 2593 break; 2594 2595 case TGSI_OPCODE_SNE: 2596 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2597 FETCH( &r[0], 0, chan_index ); 2598 FETCH( &r[1], 1, chan_index ); 2599 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2600 } 2601 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2602 STORE(&d[chan_index], 0, chan_index); 2603 } 2604 break; 2605 2606 case TGSI_OPCODE_STR: 2607 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2608 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2609 } 2610 break; 2611 2612 case TGSI_OPCODE_TEX: 2613 /* simple texture lookup */ 2614 /* src[0] = texcoord */ 2615 /* src[1] = sampler unit */ 2616 exec_tex(mach, inst, FALSE, FALSE); 2617 break; 2618 2619 case TGSI_OPCODE_TXB: 2620 /* Texture lookup with lod bias */ 2621 /* src[0] = texcoord (src[0].w = LOD bias) */ 2622 /* src[1] = sampler unit */ 2623 exec_tex(mach, inst, TRUE, FALSE); 2624 break; 2625 2626 case TGSI_OPCODE_TXD: 2627 /* Texture lookup with explict partial derivatives */ 2628 /* src[0] = texcoord */ 2629 /* src[1] = d[strq]/dx */ 2630 /* src[2] = d[strq]/dy */ 2631 /* src[3] = sampler unit */ 2632 exec_txd(mach, inst); 2633 break; 2634 2635 case TGSI_OPCODE_TXL: 2636 /* Texture lookup with explit LOD */ 2637 /* src[0] = texcoord (src[0].w = LOD) */ 2638 /* src[1] = sampler unit */ 2639 exec_tex(mach, inst, TRUE, FALSE); 2640 break; 2641 2642 case TGSI_OPCODE_TXP: 2643 /* Texture lookup with projection */ 2644 /* src[0] = texcoord (src[0].w = projection) */ 2645 /* src[1] = sampler unit */ 2646 exec_tex(mach, inst, FALSE, TRUE); 2647 break; 2648 2649 case TGSI_OPCODE_UP2H: 2650 assert (0); 2651 break; 2652 2653 case TGSI_OPCODE_UP2US: 2654 assert (0); 2655 break; 2656 2657 case TGSI_OPCODE_UP4B: 2658 assert (0); 2659 break; 2660 2661 case TGSI_OPCODE_UP4UB: 2662 assert (0); 2663 break; 2664 2665 case TGSI_OPCODE_X2D: 2666 FETCH(&r[0], 1, CHAN_X); 2667 FETCH(&r[1], 1, CHAN_Y); 2668 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2669 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2670 FETCH(&r[2], 2, CHAN_X); 2671 micro_mul(&r[2], &r[2], &r[0]); 2672 FETCH(&r[3], 2, CHAN_Y); 2673 micro_mul(&r[3], &r[3], &r[1]); 2674 micro_add(&r[2], &r[2], &r[3]); 2675 FETCH(&r[3], 0, CHAN_X); 2676 micro_add(&d[CHAN_X], &r[2], &r[3]); 2677 2678 } 2679 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2680 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2681 FETCH(&r[2], 2, CHAN_Z); 2682 micro_mul(&r[2], &r[2], &r[0]); 2683 FETCH(&r[3], 2, CHAN_W); 2684 micro_mul(&r[3], &r[3], &r[1]); 2685 micro_add(&r[2], &r[2], &r[3]); 2686 FETCH(&r[3], 0, CHAN_Y); 2687 micro_add(&d[CHAN_Y], &r[2], &r[3]); 2688 2689 } 2690 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2691 STORE(&d[CHAN_X], 0, CHAN_X); 2692 } 2693 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2694 STORE(&d[CHAN_Y], 0, CHAN_Y); 2695 } 2696 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2697 STORE(&d[CHAN_X], 0, CHAN_Z); 2698 } 2699 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2700 STORE(&d[CHAN_Y], 0, CHAN_W); 2701 } 2702 break; 2703 2704 case TGSI_OPCODE_ARA: 2705 assert (0); 2706 break; 2707 2708 case TGSI_OPCODE_BRA: 2709 assert (0); 2710 break; 2711 2712 case TGSI_OPCODE_CAL: 2713 /* skip the call if no execution channels are enabled */ 2714 if (mach->ExecMask) { 2715 /* do the call */ 2716 2717 /* First, record the depths of the execution stacks. 2718 * This is important for deeply nested/looped return statements. 2719 * We have to unwind the stacks by the correct amount. For a 2720 * real code generator, we could determine the number of entries 2721 * to pop off each stack with simple static analysis and avoid 2722 * implementing this data structure at run time. 2723 */ 2724 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 2725 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 2726 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 2727 /* note that PC was already incremented above */ 2728 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 2729 2730 mach->CallStackTop++; 2731 2732 /* Second, push the Cond, Loop, Cont, Func stacks */ 2733 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2734 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2735 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2736 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2737 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2738 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2739 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2740 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2741 2742 /* Finally, jump to the subroutine */ 2743 *pc = inst->InstructionLabel.Label; 2744 } 2745 break; 2746 2747 case TGSI_OPCODE_RET: 2748 mach->FuncMask &= ~mach->ExecMask; 2749 UPDATE_EXEC_MASK(mach); 2750 2751 if (mach->FuncMask == 0x0) { 2752 /* really return now (otherwise, keep executing */ 2753 2754 if (mach->CallStackTop == 0) { 2755 /* returning from main() */ 2756 *pc = -1; 2757 return; 2758 } 2759 2760 assert(mach->CallStackTop > 0); 2761 mach->CallStackTop--; 2762 2763 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 2764 mach->CondMask = mach->CondStack[mach->CondStackTop]; 2765 2766 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 2767 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 2768 2769 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 2770 mach->ContMask = mach->ContStack[mach->ContStackTop]; 2771 2772 assert(mach->FuncStackTop > 0); 2773 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2774 2775 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 2776 2777 UPDATE_EXEC_MASK(mach); 2778 } 2779 break; 2780 2781 case TGSI_OPCODE_SSG: 2782 /* TGSI_OPCODE_SGN */ 2783 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2784 FETCH( &r[0], 0, chan_index ); 2785 micro_sgn(&d[chan_index], &r[0]); 2786 } 2787 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2788 STORE(&d[chan_index], 0, chan_index); 2789 } 2790 break; 2791 2792 case TGSI_OPCODE_CMP: 2793 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2794 FETCH(&r[0], 0, chan_index); 2795 FETCH(&r[1], 1, chan_index); 2796 FETCH(&r[2], 2, chan_index); 2797 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); 2798 } 2799 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2800 STORE(&d[chan_index], 0, chan_index); 2801 } 2802 break; 2803 2804 case TGSI_OPCODE_SCS: 2805 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2806 FETCH( &r[0], 0, CHAN_X ); 2807 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2808 micro_cos(&r[1], &r[0]); 2809 STORE(&r[1], 0, CHAN_X); 2810 } 2811 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2812 micro_sin(&r[1], &r[0]); 2813 STORE(&r[1], 0, CHAN_Y); 2814 } 2815 } 2816 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2817 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2818 } 2819 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2820 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2821 } 2822 break; 2823 2824 case TGSI_OPCODE_NRM: 2825 /* 3-component vector normalize */ 2826 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2827 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2828 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2829 /* r3 = sqrt(dp3(src0, src0)) */ 2830 FETCH(&r[0], 0, CHAN_X); 2831 micro_mul(&r[3], &r[0], &r[0]); 2832 FETCH(&r[1], 0, CHAN_Y); 2833 micro_mul(&r[4], &r[1], &r[1]); 2834 micro_add(&r[3], &r[3], &r[4]); 2835 FETCH(&r[2], 0, CHAN_Z); 2836 micro_mul(&r[4], &r[2], &r[2]); 2837 micro_add(&r[3], &r[3], &r[4]); 2838 micro_sqrt(&r[3], &r[3]); 2839 2840 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2841 micro_div(&r[0], &r[0], &r[3]); 2842 STORE(&r[0], 0, CHAN_X); 2843 } 2844 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2845 micro_div(&r[1], &r[1], &r[3]); 2846 STORE(&r[1], 0, CHAN_Y); 2847 } 2848 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2849 micro_div(&r[2], &r[2], &r[3]); 2850 STORE(&r[2], 0, CHAN_Z); 2851 } 2852 } 2853 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2854 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2855 } 2856 break; 2857 2858 case TGSI_OPCODE_NRM4: 2859 /* 4-component vector normalize */ 2860 { 2861 union tgsi_exec_channel tmp, dot; 2862 2863 /* tmp = dp4(src0, src0): */ 2864 FETCH( &r[0], 0, CHAN_X ); 2865 micro_mul( &tmp, &r[0], &r[0] ); 2866 2867 FETCH( &r[1], 0, CHAN_Y ); 2868 micro_mul( &dot, &r[1], &r[1] ); 2869 micro_add( &tmp, &tmp, &dot ); 2870 2871 FETCH( &r[2], 0, CHAN_Z ); 2872 micro_mul( &dot, &r[2], &r[2] ); 2873 micro_add( &tmp, &tmp, &dot ); 2874 2875 FETCH( &r[3], 0, CHAN_W ); 2876 micro_mul( &dot, &r[3], &r[3] ); 2877 micro_add( &tmp, &tmp, &dot ); 2878 2879 /* tmp = 1 / sqrt(tmp) */ 2880 micro_sqrt( &tmp, &tmp ); 2881 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2882 2883 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2884 /* chan = chan * tmp */ 2885 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2886 STORE( &r[chan_index], 0, chan_index ); 2887 } 2888 } 2889 break; 2890 2891 case TGSI_OPCODE_DIV: 2892 assert( 0 ); 2893 break; 2894 2895 case TGSI_OPCODE_DP2: 2896 FETCH( &r[0], 0, CHAN_X ); 2897 FETCH( &r[1], 1, CHAN_X ); 2898 micro_mul( &r[0], &r[0], &r[1] ); 2899 2900 FETCH( &r[1], 0, CHAN_Y ); 2901 FETCH( &r[2], 1, CHAN_Y ); 2902 micro_mul( &r[1], &r[1], &r[2] ); 2903 micro_add( &r[0], &r[0], &r[1] ); 2904 2905 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2906 STORE( &r[0], 0, chan_index ); 2907 } 2908 break; 2909 2910 case TGSI_OPCODE_IF: 2911 /* push CondMask */ 2912 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2913 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2914 FETCH( &r[0], 0, CHAN_X ); 2915 /* update CondMask */ 2916 if( ! r[0].u[0] ) { 2917 mach->CondMask &= ~0x1; 2918 } 2919 if( ! r[0].u[1] ) { 2920 mach->CondMask &= ~0x2; 2921 } 2922 if( ! r[0].u[2] ) { 2923 mach->CondMask &= ~0x4; 2924 } 2925 if( ! r[0].u[3] ) { 2926 mach->CondMask &= ~0x8; 2927 } 2928 UPDATE_EXEC_MASK(mach); 2929 /* Todo: If CondMask==0, jump to ELSE */ 2930 break; 2931 2932 case TGSI_OPCODE_ELSE: 2933 /* invert CondMask wrt previous mask */ 2934 { 2935 uint prevMask; 2936 assert(mach->CondStackTop > 0); 2937 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2938 mach->CondMask = ~mach->CondMask & prevMask; 2939 UPDATE_EXEC_MASK(mach); 2940 /* Todo: If CondMask==0, jump to ENDIF */ 2941 } 2942 break; 2943 2944 case TGSI_OPCODE_ENDIF: 2945 /* pop CondMask */ 2946 assert(mach->CondStackTop > 0); 2947 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2948 UPDATE_EXEC_MASK(mach); 2949 break; 2950 2951 case TGSI_OPCODE_END: 2952 /* halt execution */ 2953 *pc = -1; 2954 break; 2955 2956 case TGSI_OPCODE_REP: 2957 assert (0); 2958 break; 2959 2960 case TGSI_OPCODE_ENDREP: 2961 assert (0); 2962 break; 2963 2964 case TGSI_OPCODE_PUSHA: 2965 assert (0); 2966 break; 2967 2968 case TGSI_OPCODE_POPA: 2969 assert (0); 2970 break; 2971 2972 case TGSI_OPCODE_CEIL: 2973 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2974 FETCH( &r[0], 0, chan_index ); 2975 micro_ceil(&d[chan_index], &r[0]); 2976 } 2977 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2978 STORE(&d[chan_index], 0, chan_index); 2979 } 2980 break; 2981 2982 case TGSI_OPCODE_I2F: 2983 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2984 FETCH( &r[0], 0, chan_index ); 2985 micro_i2f(&d[chan_index], &r[0]); 2986 } 2987 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2988 STORE(&d[chan_index], 0, chan_index); 2989 } 2990 break; 2991 2992 case TGSI_OPCODE_NOT: 2993 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2994 FETCH( &r[0], 0, chan_index ); 2995 micro_not(&d[chan_index], &r[0]); 2996 } 2997 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2998 STORE(&d[chan_index], 0, chan_index); 2999 } 3000 break; 3001 3002 case TGSI_OPCODE_TRUNC: 3003 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3004 FETCH( &r[0], 0, chan_index ); 3005 micro_trunc(&d[chan_index], &r[0]); 3006 } 3007 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3008 STORE(&d[chan_index], 0, chan_index); 3009 } 3010 break; 3011 3012 case TGSI_OPCODE_SHL: 3013 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3014 FETCH( &r[0], 0, chan_index ); 3015 FETCH( &r[1], 1, chan_index ); 3016 micro_shl(&d[chan_index], &r[0], &r[1]); 3017 } 3018 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3019 STORE(&d[chan_index], 0, chan_index); 3020 } 3021 break; 3022 3023 case TGSI_OPCODE_SHR: 3024 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3025 FETCH( &r[0], 0, chan_index ); 3026 FETCH( &r[1], 1, chan_index ); 3027 micro_ishr(&d[chan_index], &r[0], &r[1]); 3028 } 3029 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3030 STORE(&d[chan_index], 0, chan_index); 3031 } 3032 break; 3033 3034 case TGSI_OPCODE_AND: 3035 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3036 FETCH( &r[0], 0, chan_index ); 3037 FETCH( &r[1], 1, chan_index ); 3038 micro_and(&d[chan_index], &r[0], &r[1]); 3039 } 3040 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3041 STORE(&d[chan_index], 0, chan_index); 3042 } 3043 break; 3044 3045 case TGSI_OPCODE_OR: 3046 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3047 FETCH( &r[0], 0, chan_index ); 3048 FETCH( &r[1], 1, chan_index ); 3049 micro_or(&d[chan_index], &r[0], &r[1]); 3050 } 3051 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3052 STORE(&d[chan_index], 0, chan_index); 3053 } 3054 break; 3055 3056 case TGSI_OPCODE_MOD: 3057 assert (0); 3058 break; 3059 3060 case TGSI_OPCODE_XOR: 3061 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3062 FETCH( &r[0], 0, chan_index ); 3063 FETCH( &r[1], 1, chan_index ); 3064 micro_xor(&d[chan_index], &r[0], &r[1]); 3065 } 3066 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3067 STORE(&d[chan_index], 0, chan_index); 3068 } 3069 break; 3070 3071 case TGSI_OPCODE_SAD: 3072 assert (0); 3073 break; 3074 3075 case TGSI_OPCODE_TXF: 3076 assert (0); 3077 break; 3078 3079 case TGSI_OPCODE_TXQ: 3080 assert (0); 3081 break; 3082 3083 case TGSI_OPCODE_EMIT: 3084 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 3085 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 3086 break; 3087 3088 case TGSI_OPCODE_ENDPRIM: 3089 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 3090 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 3091 break; 3092 3093 case TGSI_OPCODE_BGNFOR: 3094 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3095 for (chan_index = 0; chan_index < 3; chan_index++) { 3096 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3097 } 3098 ++mach->LoopCounterStackTop; 3099 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); 3100 /* update LoopMask */ 3101 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3102 mach->LoopMask &= ~0x1; 3103 } 3104 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3105 mach->LoopMask &= ~0x2; 3106 } 3107 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3108 mach->LoopMask &= ~0x4; 3109 } 3110 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3111 mach->LoopMask &= ~0x8; 3112 } 3113 /* TODO: if mach->LoopMask == 0, jump to end of loop */ 3114 UPDATE_EXEC_MASK(mach); 3115 /* fall-through (for now) */ 3116 case TGSI_OPCODE_BGNLOOP: 3117 /* push LoopMask and ContMasks */ 3118 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3119 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3120 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3121 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3122 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3123 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3124 break; 3125 3126 case TGSI_OPCODE_ENDFOR: 3127 assert(mach->LoopCounterStackTop > 0); 3128 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3129 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3130 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 3131 /* update LoopMask */ 3132 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3133 mach->LoopMask &= ~0x1; 3134 } 3135 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3136 mach->LoopMask &= ~0x2; 3137 } 3138 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3139 mach->LoopMask &= ~0x4; 3140 } 3141 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3142 mach->LoopMask &= ~0x8; 3143 } 3144 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3145 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3146 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3147 assert(mach->LoopLabelStackTop > 0); 3148 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3149 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); 3150 /* Restore ContMask, but don't pop */ 3151 assert(mach->ContStackTop > 0); 3152 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3153 UPDATE_EXEC_MASK(mach); 3154 if (mach->ExecMask) { 3155 /* repeat loop: jump to instruction just past BGNLOOP */ 3156 assert(mach->LoopLabelStackTop > 0); 3157 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3158 } 3159 else { 3160 /* exit loop: pop LoopMask */ 3161 assert(mach->LoopStackTop > 0); 3162 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3163 /* pop ContMask */ 3164 assert(mach->ContStackTop > 0); 3165 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3166 assert(mach->LoopLabelStackTop > 0); 3167 --mach->LoopLabelStackTop; 3168 assert(mach->LoopCounterStackTop > 0); 3169 --mach->LoopCounterStackTop; 3170 } 3171 UPDATE_EXEC_MASK(mach); 3172 break; 3173 3174 case TGSI_OPCODE_ENDLOOP: 3175 /* Restore ContMask, but don't pop */ 3176 assert(mach->ContStackTop > 0); 3177 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3178 UPDATE_EXEC_MASK(mach); 3179 if (mach->ExecMask) { 3180 /* repeat loop: jump to instruction just past BGNLOOP */ 3181 assert(mach->LoopLabelStackTop > 0); 3182 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3183 } 3184 else { 3185 /* exit loop: pop LoopMask */ 3186 assert(mach->LoopStackTop > 0); 3187 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3188 /* pop ContMask */ 3189 assert(mach->ContStackTop > 0); 3190 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3191 assert(mach->LoopLabelStackTop > 0); 3192 --mach->LoopLabelStackTop; 3193 } 3194 UPDATE_EXEC_MASK(mach); 3195 break; 3196 3197 case TGSI_OPCODE_BRK: 3198 /* turn off loop channels for each enabled exec channel */ 3199 mach->LoopMask &= ~mach->ExecMask; 3200 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3201 UPDATE_EXEC_MASK(mach); 3202 break; 3203 3204 case TGSI_OPCODE_CONT: 3205 /* turn off cont channels for each enabled exec channel */ 3206 mach->ContMask &= ~mach->ExecMask; 3207 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3208 UPDATE_EXEC_MASK(mach); 3209 break; 3210 3211 case TGSI_OPCODE_BGNSUB: 3212 /* no-op */ 3213 break; 3214 3215 case TGSI_OPCODE_ENDSUB: 3216 /* no-op */ 3217 break; 3218 3219 case TGSI_OPCODE_NOP: 3220 break; 3221 3222 default: 3223 assert( 0 ); 3224 } 3225} 3226 3227#define DEBUG_EXECUTION 0 3228 3229 3230/** 3231 * Run TGSI interpreter. 3232 * \return bitmask of "alive" quad components 3233 */ 3234uint 3235tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3236{ 3237 uint i; 3238 int pc = 0; 3239 3240 mach->CondMask = 0xf; 3241 mach->LoopMask = 0xf; 3242 mach->ContMask = 0xf; 3243 mach->FuncMask = 0xf; 3244 mach->ExecMask = 0xf; 3245 3246 assert(mach->CondStackTop == 0); 3247 assert(mach->LoopStackTop == 0); 3248 assert(mach->ContStackTop == 0); 3249 assert(mach->CallStackTop == 0); 3250 3251 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3252 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3253 3254 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3255 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3256 mach->Primitives[0] = 0; 3257 } 3258 3259 for (i = 0; i < QUAD_SIZE; i++) { 3260 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3261 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3262 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3263 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3264 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3265 } 3266 3267 /* execute declarations (interpolants) */ 3268 for (i = 0; i < mach->NumDeclarations; i++) { 3269 exec_declaration( mach, mach->Declarations+i ); 3270 } 3271 3272 { 3273#if DEBUG_EXECUTION 3274 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 3275 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 3276 uint inst = 1; 3277 3278 memcpy(temps, mach->Temps, sizeof(temps)); 3279 memcpy(outputs, mach->Outputs, sizeof(outputs)); 3280#endif 3281 3282 /* execute instructions, until pc is set to -1 */ 3283 while (pc != -1) { 3284 3285#if DEBUG_EXECUTION 3286 uint i; 3287 3288 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 3289#endif 3290 3291 assert(pc < (int) mach->NumInstructions); 3292 exec_instruction(mach, mach->Instructions + pc, &pc); 3293 3294#if DEBUG_EXECUTION 3295 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 3296 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 3297 uint j; 3298 3299 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 3300 debug_printf("TEMP[%2u] = ", i); 3301 for (j = 0; j < 4; j++) { 3302 if (j > 0) { 3303 debug_printf(" "); 3304 } 3305 debug_printf("(%6f, %6f, %6f, %6f)\n", 3306 temps[i].xyzw[0].f[j], 3307 temps[i].xyzw[1].f[j], 3308 temps[i].xyzw[2].f[j], 3309 temps[i].xyzw[3].f[j]); 3310 } 3311 } 3312 } 3313 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 3314 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 3315 uint j; 3316 3317 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 3318 debug_printf("OUT[%2u] = ", i); 3319 for (j = 0; j < 4; j++) { 3320 if (j > 0) { 3321 debug_printf(" "); 3322 } 3323 debug_printf("{%6f, %6f, %6f, %6f}\n", 3324 outputs[i].xyzw[0].f[j], 3325 outputs[i].xyzw[1].f[j], 3326 outputs[i].xyzw[2].f[j], 3327 outputs[i].xyzw[3].f[j]); 3328 } 3329 } 3330 } 3331#endif 3332 } 3333 } 3334 3335#if 0 3336 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3337 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3338 /* 3339 * Scale back depth component. 3340 */ 3341 for (i = 0; i < 4; i++) 3342 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3343 } 3344#endif 3345 3346 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3347} 3348