tgsi_exec.c revision ede9f3b52ecb27ada81fee06a943bb595c60eaee
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65#define TILE_TOP_LEFT 0 66#define TILE_TOP_RIGHT 1 67#define TILE_BOTTOM_LEFT 2 68#define TILE_BOTTOM_RIGHT 3 69 70#define CHAN_X 0 71#define CHAN_Y 1 72#define CHAN_Z 2 73#define CHAN_W 3 74 75/* 76 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 77 */ 78#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 79#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 80#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 81#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 82#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 83#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 84#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 85#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 86#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 87#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 88#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 89#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 90#define TEMP_128_I TGSI_EXEC_TEMP_128_I 91#define TEMP_128_C TGSI_EXEC_TEMP_128_C 92#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 93#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 94#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 95#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 96#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 97#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 98#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 99#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 100#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 101#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 102#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 103#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 104#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 105#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 106#define TEMP_R0 TGSI_EXEC_TEMP_R0 107 108#define IS_CHANNEL_ENABLED(INST, CHAN)\ 109 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 110 111#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 112 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 113 114#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 115 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 116 if (IS_CHANNEL_ENABLED( INST, CHAN )) 117 118#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 119 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 120 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 121 122 123/** The execution mask depends on the conditional mask and the loop mask */ 124#define UPDATE_EXEC_MASK(MACH) \ 125 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 126 127 128static const union tgsi_exec_channel ZeroVec = 129 { { 0.0, 0.0, 0.0, 0.0 } }; 130 131 132#ifdef DEBUG 133static void 134check_inf_or_nan(const union tgsi_exec_channel *chan) 135{ 136 assert(!util_is_inf_or_nan(chan->f[0])); 137 assert(!util_is_inf_or_nan(chan->f[1])); 138 assert(!util_is_inf_or_nan(chan->f[2])); 139 assert(!util_is_inf_or_nan(chan->f[3])); 140} 141#endif 142 143 144#ifdef DEBUG 145static void 146print_chan(const char *msg, const union tgsi_exec_channel *chan) 147{ 148 debug_printf("%s = {%f, %f, %f, %f}\n", 149 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 150} 151#endif 152 153 154#ifdef DEBUG 155static void 156print_temp(const struct tgsi_exec_machine *mach, uint index) 157{ 158 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 159 int i; 160 debug_printf("Temp[%u] =\n", index); 161 for (i = 0; i < 4; i++) { 162 debug_printf(" %c: { %f, %f, %f, %f }\n", 163 "XYZW"[i], 164 tmp->xyzw[i].f[0], 165 tmp->xyzw[i].f[1], 166 tmp->xyzw[i].f[2], 167 tmp->xyzw[i].f[3]); 168 } 169} 170#endif 171 172 173/** 174 * Check if there's a potential src/dst register data dependency when 175 * using SOA execution. 176 * Example: 177 * MOV T, T.yxwz; 178 * This would expand into: 179 * MOV t0, t1; 180 * MOV t1, t0; 181 * MOV t2, t3; 182 * MOV t3, t2; 183 * The second instruction will have the wrong value for t0 if executed as-is. 184 */ 185static boolean 186tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 187{ 188 uint i, chan; 189 190 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 191 if (writemask == TGSI_WRITEMASK_X || 192 writemask == TGSI_WRITEMASK_Y || 193 writemask == TGSI_WRITEMASK_Z || 194 writemask == TGSI_WRITEMASK_W || 195 writemask == TGSI_WRITEMASK_NONE) { 196 /* no chance of data dependency */ 197 return FALSE; 198 } 199 200 /* loop over src regs */ 201 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 202 if ((inst->FullSrcRegisters[i].SrcRegister.File == 203 inst->FullDstRegisters[0].DstRegister.File) && 204 (inst->FullSrcRegisters[i].SrcRegister.Index == 205 inst->FullDstRegisters[0].DstRegister.Index)) { 206 /* loop over dest channels */ 207 uint channelsWritten = 0x0; 208 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 209 /* check if we're reading a channel that's been written */ 210 uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan); 211 if (swizzle <= TGSI_SWIZZLE_W && 212 (channelsWritten & (1 << swizzle))) { 213 return TRUE; 214 } 215 216 channelsWritten |= (1 << chan); 217 } 218 } 219 } 220 return FALSE; 221} 222 223 224/** 225 * Initialize machine state by expanding tokens to full instructions, 226 * allocating temporary storage, setting up constants, etc. 227 * After this, we can call tgsi_exec_machine_run() many times. 228 */ 229void 230tgsi_exec_machine_bind_shader( 231 struct tgsi_exec_machine *mach, 232 const struct tgsi_token *tokens, 233 uint numSamplers, 234 struct tgsi_sampler **samplers) 235{ 236 uint k; 237 struct tgsi_parse_context parse; 238 struct tgsi_exec_labels *labels = &mach->Labels; 239 struct tgsi_full_instruction *instructions; 240 struct tgsi_full_declaration *declarations; 241 uint maxInstructions = 10, numInstructions = 0; 242 uint maxDeclarations = 10, numDeclarations = 0; 243 uint instno = 0; 244 245#if 0 246 tgsi_dump(tokens, 0); 247#endif 248 249 util_init_math(); 250 251 mach->Tokens = tokens; 252 mach->Samplers = samplers; 253 254 k = tgsi_parse_init (&parse, mach->Tokens); 255 if (k != TGSI_PARSE_OK) { 256 debug_printf( "Problem parsing!\n" ); 257 return; 258 } 259 260 mach->Processor = parse.FullHeader.Processor.Processor; 261 mach->ImmLimit = 0; 262 labels->count = 0; 263 264 declarations = (struct tgsi_full_declaration *) 265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 266 267 if (!declarations) { 268 return; 269 } 270 271 instructions = (struct tgsi_full_instruction *) 272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 273 274 if (!instructions) { 275 FREE( declarations ); 276 return; 277 } 278 279 while( !tgsi_parse_end_of_tokens( &parse ) ) { 280 uint pointer = parse.Position; 281 uint i; 282 283 tgsi_parse_token( &parse ); 284 switch( parse.FullToken.Token.Type ) { 285 case TGSI_TOKEN_TYPE_DECLARATION: 286 /* save expanded declaration */ 287 if (numDeclarations == maxDeclarations) { 288 declarations = REALLOC(declarations, 289 maxDeclarations 290 * sizeof(struct tgsi_full_declaration), 291 (maxDeclarations + 10) 292 * sizeof(struct tgsi_full_declaration)); 293 maxDeclarations += 10; 294 } 295 memcpy(declarations + numDeclarations, 296 &parse.FullToken.FullDeclaration, 297 sizeof(declarations[0])); 298 numDeclarations++; 299 break; 300 301 case TGSI_TOKEN_TYPE_IMMEDIATE: 302 { 303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 304 assert( size <= 4 ); 305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 306 307 for( i = 0; i < size; i++ ) { 308 mach->Imms[mach->ImmLimit][i] = 309 parse.FullToken.FullImmediate.u[i].Float; 310 } 311 mach->ImmLimit += 1; 312 } 313 break; 314 315 case TGSI_TOKEN_TYPE_INSTRUCTION: 316 assert( labels->count < MAX_LABELS ); 317 318 labels->labels[labels->count][0] = instno; 319 labels->labels[labels->count][1] = pointer; 320 labels->count++; 321 322 /* save expanded instruction */ 323 if (numInstructions == maxInstructions) { 324 instructions = REALLOC(instructions, 325 maxInstructions 326 * sizeof(struct tgsi_full_instruction), 327 (maxInstructions + 10) 328 * sizeof(struct tgsi_full_instruction)); 329 maxInstructions += 10; 330 } 331 memcpy(instructions + numInstructions, 332 &parse.FullToken.FullInstruction, 333 sizeof(instructions[0])); 334 335#if 0 336 if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { 337 debug_printf("SOA dependency in instruction:\n"); 338 tgsi_dump_instruction(&parse.FullToken.FullInstruction, 339 numInstructions); 340 } 341#else 342 (void) tgsi_check_soa_dependencies; 343#endif 344 345 numInstructions++; 346 break; 347 348 default: 349 assert( 0 ); 350 } 351 } 352 tgsi_parse_free (&parse); 353 354 if (mach->Declarations) { 355 FREE( mach->Declarations ); 356 } 357 mach->Declarations = declarations; 358 mach->NumDeclarations = numDeclarations; 359 360 if (mach->Instructions) { 361 FREE( mach->Instructions ); 362 } 363 mach->Instructions = instructions; 364 mach->NumInstructions = numInstructions; 365} 366 367 368struct tgsi_exec_machine * 369tgsi_exec_machine_create( void ) 370{ 371 struct tgsi_exec_machine *mach; 372 uint i; 373 374 mach = align_malloc( sizeof *mach, 16 ); 375 if (!mach) 376 goto fail; 377 378 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 379 380 mach->Samplers = NULL; 381 mach->Consts = NULL; 382 mach->Tokens = NULL; 383 mach->Primitives = NULL; 384 mach->InterpCoefs = NULL; 385 mach->Instructions = NULL; 386 mach->Declarations = NULL; 387 388 /* Setup constants. */ 389 for( i = 0; i < 4; i++ ) { 390 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 391 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 392 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 393 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 394 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 395 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 396 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 397 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 398 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 399 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 400 } 401 402#ifdef DEBUG 403 /* silence warnings */ 404 (void) print_chan; 405 (void) print_temp; 406#endif 407 408 return mach; 409 410fail: 411 align_free(mach); 412 return NULL; 413} 414 415 416void 417tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 418{ 419 if (mach) { 420 FREE(mach->Instructions); 421 FREE(mach->Declarations); 422 } 423 424 align_free(mach); 425} 426 427 428static void 429micro_abs( 430 union tgsi_exec_channel *dst, 431 const union tgsi_exec_channel *src ) 432{ 433 dst->f[0] = fabsf( src->f[0] ); 434 dst->f[1] = fabsf( src->f[1] ); 435 dst->f[2] = fabsf( src->f[2] ); 436 dst->f[3] = fabsf( src->f[3] ); 437} 438 439static void 440micro_add( 441 union tgsi_exec_channel *dst, 442 const union tgsi_exec_channel *src0, 443 const union tgsi_exec_channel *src1 ) 444{ 445 dst->f[0] = src0->f[0] + src1->f[0]; 446 dst->f[1] = src0->f[1] + src1->f[1]; 447 dst->f[2] = src0->f[2] + src1->f[2]; 448 dst->f[3] = src0->f[3] + src1->f[3]; 449} 450 451#if 0 452static void 453micro_iadd( 454 union tgsi_exec_channel *dst, 455 const union tgsi_exec_channel *src0, 456 const union tgsi_exec_channel *src1 ) 457{ 458 dst->i[0] = src0->i[0] + src1->i[0]; 459 dst->i[1] = src0->i[1] + src1->i[1]; 460 dst->i[2] = src0->i[2] + src1->i[2]; 461 dst->i[3] = src0->i[3] + src1->i[3]; 462} 463#endif 464 465static void 466micro_and( 467 union tgsi_exec_channel *dst, 468 const union tgsi_exec_channel *src0, 469 const union tgsi_exec_channel *src1 ) 470{ 471 dst->u[0] = src0->u[0] & src1->u[0]; 472 dst->u[1] = src0->u[1] & src1->u[1]; 473 dst->u[2] = src0->u[2] & src1->u[2]; 474 dst->u[3] = src0->u[3] & src1->u[3]; 475} 476 477static void 478micro_ceil( 479 union tgsi_exec_channel *dst, 480 const union tgsi_exec_channel *src ) 481{ 482 dst->f[0] = ceilf( src->f[0] ); 483 dst->f[1] = ceilf( src->f[1] ); 484 dst->f[2] = ceilf( src->f[2] ); 485 dst->f[3] = ceilf( src->f[3] ); 486} 487 488static void 489micro_cos( 490 union tgsi_exec_channel *dst, 491 const union tgsi_exec_channel *src ) 492{ 493 dst->f[0] = cosf( src->f[0] ); 494 dst->f[1] = cosf( src->f[1] ); 495 dst->f[2] = cosf( src->f[2] ); 496 dst->f[3] = cosf( src->f[3] ); 497} 498 499static void 500micro_ddx( 501 union tgsi_exec_channel *dst, 502 const union tgsi_exec_channel *src ) 503{ 504 dst->f[0] = 505 dst->f[1] = 506 dst->f[2] = 507 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 508} 509 510static void 511micro_ddy( 512 union tgsi_exec_channel *dst, 513 const union tgsi_exec_channel *src ) 514{ 515 dst->f[0] = 516 dst->f[1] = 517 dst->f[2] = 518 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 519} 520 521static void 522micro_div( 523 union tgsi_exec_channel *dst, 524 const union tgsi_exec_channel *src0, 525 const union tgsi_exec_channel *src1 ) 526{ 527 if (src1->f[0] != 0) { 528 dst->f[0] = src0->f[0] / src1->f[0]; 529 } 530 if (src1->f[1] != 0) { 531 dst->f[1] = src0->f[1] / src1->f[1]; 532 } 533 if (src1->f[2] != 0) { 534 dst->f[2] = src0->f[2] / src1->f[2]; 535 } 536 if (src1->f[3] != 0) { 537 dst->f[3] = src0->f[3] / src1->f[3]; 538 } 539} 540 541#if 0 542static void 543micro_udiv( 544 union tgsi_exec_channel *dst, 545 const union tgsi_exec_channel *src0, 546 const union tgsi_exec_channel *src1 ) 547{ 548 dst->u[0] = src0->u[0] / src1->u[0]; 549 dst->u[1] = src0->u[1] / src1->u[1]; 550 dst->u[2] = src0->u[2] / src1->u[2]; 551 dst->u[3] = src0->u[3] / src1->u[3]; 552} 553#endif 554 555static void 556micro_eq( 557 union tgsi_exec_channel *dst, 558 const union tgsi_exec_channel *src0, 559 const union tgsi_exec_channel *src1, 560 const union tgsi_exec_channel *src2, 561 const union tgsi_exec_channel *src3 ) 562{ 563 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 564 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 565 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 566 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 567} 568 569#if 0 570static void 571micro_ieq( 572 union tgsi_exec_channel *dst, 573 const union tgsi_exec_channel *src0, 574 const union tgsi_exec_channel *src1, 575 const union tgsi_exec_channel *src2, 576 const union tgsi_exec_channel *src3 ) 577{ 578 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 579 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 580 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 581 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 582} 583#endif 584 585static void 586micro_exp2( 587 union tgsi_exec_channel *dst, 588 const union tgsi_exec_channel *src) 589{ 590#if FAST_MATH 591 dst->f[0] = util_fast_exp2( src->f[0] ); 592 dst->f[1] = util_fast_exp2( src->f[1] ); 593 dst->f[2] = util_fast_exp2( src->f[2] ); 594 dst->f[3] = util_fast_exp2( src->f[3] ); 595#else 596 dst->f[0] = powf( 2.0f, src->f[0] ); 597 dst->f[1] = powf( 2.0f, src->f[1] ); 598 dst->f[2] = powf( 2.0f, src->f[2] ); 599 dst->f[3] = powf( 2.0f, src->f[3] ); 600#endif 601} 602 603#if 0 604static void 605micro_f2ut( 606 union tgsi_exec_channel *dst, 607 const union tgsi_exec_channel *src ) 608{ 609 dst->u[0] = (uint) src->f[0]; 610 dst->u[1] = (uint) src->f[1]; 611 dst->u[2] = (uint) src->f[2]; 612 dst->u[3] = (uint) src->f[3]; 613} 614#endif 615 616static void 617micro_float_clamp(union tgsi_exec_channel *dst, 618 const union tgsi_exec_channel *src) 619{ 620 uint i; 621 622 for (i = 0; i < 4; i++) { 623 if (src->f[i] > 0.0f) { 624 if (src->f[i] > 1.884467e+019f) 625 dst->f[i] = 1.884467e+019f; 626 else if (src->f[i] < 5.42101e-020f) 627 dst->f[i] = 5.42101e-020f; 628 else 629 dst->f[i] = src->f[i]; 630 } 631 else { 632 if (src->f[i] < -1.884467e+019f) 633 dst->f[i] = -1.884467e+019f; 634 else if (src->f[i] > -5.42101e-020f) 635 dst->f[i] = -5.42101e-020f; 636 else 637 dst->f[i] = src->f[i]; 638 } 639 } 640} 641 642static void 643micro_flr( 644 union tgsi_exec_channel *dst, 645 const union tgsi_exec_channel *src ) 646{ 647 dst->f[0] = floorf( src->f[0] ); 648 dst->f[1] = floorf( src->f[1] ); 649 dst->f[2] = floorf( src->f[2] ); 650 dst->f[3] = floorf( src->f[3] ); 651} 652 653static void 654micro_frc( 655 union tgsi_exec_channel *dst, 656 const union tgsi_exec_channel *src ) 657{ 658 dst->f[0] = src->f[0] - floorf( src->f[0] ); 659 dst->f[1] = src->f[1] - floorf( src->f[1] ); 660 dst->f[2] = src->f[2] - floorf( src->f[2] ); 661 dst->f[3] = src->f[3] - floorf( src->f[3] ); 662} 663 664static void 665micro_i2f( 666 union tgsi_exec_channel *dst, 667 const union tgsi_exec_channel *src ) 668{ 669 dst->f[0] = (float) src->i[0]; 670 dst->f[1] = (float) src->i[1]; 671 dst->f[2] = (float) src->i[2]; 672 dst->f[3] = (float) src->i[3]; 673} 674 675static void 676micro_lg2( 677 union tgsi_exec_channel *dst, 678 const union tgsi_exec_channel *src ) 679{ 680#if FAST_MATH 681 dst->f[0] = util_fast_log2( src->f[0] ); 682 dst->f[1] = util_fast_log2( src->f[1] ); 683 dst->f[2] = util_fast_log2( src->f[2] ); 684 dst->f[3] = util_fast_log2( src->f[3] ); 685#else 686 dst->f[0] = logf( src->f[0] ) * 1.442695f; 687 dst->f[1] = logf( src->f[1] ) * 1.442695f; 688 dst->f[2] = logf( src->f[2] ) * 1.442695f; 689 dst->f[3] = logf( src->f[3] ) * 1.442695f; 690#endif 691} 692 693static void 694micro_le( 695 union tgsi_exec_channel *dst, 696 const union tgsi_exec_channel *src0, 697 const union tgsi_exec_channel *src1, 698 const union tgsi_exec_channel *src2, 699 const union tgsi_exec_channel *src3 ) 700{ 701 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 702 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 703 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 704 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 705} 706 707static void 708micro_lt( 709 union tgsi_exec_channel *dst, 710 const union tgsi_exec_channel *src0, 711 const union tgsi_exec_channel *src1, 712 const union tgsi_exec_channel *src2, 713 const union tgsi_exec_channel *src3 ) 714{ 715 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 716 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 717 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 718 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 719} 720 721#if 0 722static void 723micro_ilt( 724 union tgsi_exec_channel *dst, 725 const union tgsi_exec_channel *src0, 726 const union tgsi_exec_channel *src1, 727 const union tgsi_exec_channel *src2, 728 const union tgsi_exec_channel *src3 ) 729{ 730 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 731 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 732 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 733 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 734} 735#endif 736 737#if 0 738static void 739micro_ult( 740 union tgsi_exec_channel *dst, 741 const union tgsi_exec_channel *src0, 742 const union tgsi_exec_channel *src1, 743 const union tgsi_exec_channel *src2, 744 const union tgsi_exec_channel *src3 ) 745{ 746 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 747 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 748 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 749 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 750} 751#endif 752 753static void 754micro_max( 755 union tgsi_exec_channel *dst, 756 const union tgsi_exec_channel *src0, 757 const union tgsi_exec_channel *src1 ) 758{ 759 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 760 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 761 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 762 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 763} 764 765#if 0 766static void 767micro_imax( 768 union tgsi_exec_channel *dst, 769 const union tgsi_exec_channel *src0, 770 const union tgsi_exec_channel *src1 ) 771{ 772 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 773 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 774 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 775 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 776} 777#endif 778 779#if 0 780static void 781micro_umax( 782 union tgsi_exec_channel *dst, 783 const union tgsi_exec_channel *src0, 784 const union tgsi_exec_channel *src1 ) 785{ 786 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 787 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 788 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 789 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 790} 791#endif 792 793static void 794micro_min( 795 union tgsi_exec_channel *dst, 796 const union tgsi_exec_channel *src0, 797 const union tgsi_exec_channel *src1 ) 798{ 799 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 800 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 801 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 802 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 803} 804 805#if 0 806static void 807micro_imin( 808 union tgsi_exec_channel *dst, 809 const union tgsi_exec_channel *src0, 810 const union tgsi_exec_channel *src1 ) 811{ 812 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 813 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 814 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 815 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 816} 817#endif 818 819#if 0 820static void 821micro_umin( 822 union tgsi_exec_channel *dst, 823 const union tgsi_exec_channel *src0, 824 const union tgsi_exec_channel *src1 ) 825{ 826 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 827 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 828 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 829 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 830} 831#endif 832 833#if 0 834static void 835micro_umod( 836 union tgsi_exec_channel *dst, 837 const union tgsi_exec_channel *src0, 838 const union tgsi_exec_channel *src1 ) 839{ 840 dst->u[0] = src0->u[0] % src1->u[0]; 841 dst->u[1] = src0->u[1] % src1->u[1]; 842 dst->u[2] = src0->u[2] % src1->u[2]; 843 dst->u[3] = src0->u[3] % src1->u[3]; 844} 845#endif 846 847static void 848micro_mul( 849 union tgsi_exec_channel *dst, 850 const union tgsi_exec_channel *src0, 851 const union tgsi_exec_channel *src1 ) 852{ 853 dst->f[0] = src0->f[0] * src1->f[0]; 854 dst->f[1] = src0->f[1] * src1->f[1]; 855 dst->f[2] = src0->f[2] * src1->f[2]; 856 dst->f[3] = src0->f[3] * src1->f[3]; 857} 858 859#if 0 860static void 861micro_imul( 862 union tgsi_exec_channel *dst, 863 const union tgsi_exec_channel *src0, 864 const union tgsi_exec_channel *src1 ) 865{ 866 dst->i[0] = src0->i[0] * src1->i[0]; 867 dst->i[1] = src0->i[1] * src1->i[1]; 868 dst->i[2] = src0->i[2] * src1->i[2]; 869 dst->i[3] = src0->i[3] * src1->i[3]; 870} 871#endif 872 873#if 0 874static void 875micro_imul64( 876 union tgsi_exec_channel *dst0, 877 union tgsi_exec_channel *dst1, 878 const union tgsi_exec_channel *src0, 879 const union tgsi_exec_channel *src1 ) 880{ 881 dst1->i[0] = src0->i[0] * src1->i[0]; 882 dst1->i[1] = src0->i[1] * src1->i[1]; 883 dst1->i[2] = src0->i[2] * src1->i[2]; 884 dst1->i[3] = src0->i[3] * src1->i[3]; 885 dst0->i[0] = 0; 886 dst0->i[1] = 0; 887 dst0->i[2] = 0; 888 dst0->i[3] = 0; 889} 890#endif 891 892#if 0 893static void 894micro_umul64( 895 union tgsi_exec_channel *dst0, 896 union tgsi_exec_channel *dst1, 897 const union tgsi_exec_channel *src0, 898 const union tgsi_exec_channel *src1 ) 899{ 900 dst1->u[0] = src0->u[0] * src1->u[0]; 901 dst1->u[1] = src0->u[1] * src1->u[1]; 902 dst1->u[2] = src0->u[2] * src1->u[2]; 903 dst1->u[3] = src0->u[3] * src1->u[3]; 904 dst0->u[0] = 0; 905 dst0->u[1] = 0; 906 dst0->u[2] = 0; 907 dst0->u[3] = 0; 908} 909#endif 910 911 912#if 0 913static void 914micro_movc( 915 union tgsi_exec_channel *dst, 916 const union tgsi_exec_channel *src0, 917 const union tgsi_exec_channel *src1, 918 const union tgsi_exec_channel *src2 ) 919{ 920 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 921 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 922 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 923 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 924} 925#endif 926 927static void 928micro_neg( 929 union tgsi_exec_channel *dst, 930 const union tgsi_exec_channel *src ) 931{ 932 dst->f[0] = -src->f[0]; 933 dst->f[1] = -src->f[1]; 934 dst->f[2] = -src->f[2]; 935 dst->f[3] = -src->f[3]; 936} 937 938#if 0 939static void 940micro_ineg( 941 union tgsi_exec_channel *dst, 942 const union tgsi_exec_channel *src ) 943{ 944 dst->i[0] = -src->i[0]; 945 dst->i[1] = -src->i[1]; 946 dst->i[2] = -src->i[2]; 947 dst->i[3] = -src->i[3]; 948} 949#endif 950 951static void 952micro_not( 953 union tgsi_exec_channel *dst, 954 const union tgsi_exec_channel *src ) 955{ 956 dst->u[0] = ~src->u[0]; 957 dst->u[1] = ~src->u[1]; 958 dst->u[2] = ~src->u[2]; 959 dst->u[3] = ~src->u[3]; 960} 961 962static void 963micro_or( 964 union tgsi_exec_channel *dst, 965 const union tgsi_exec_channel *src0, 966 const union tgsi_exec_channel *src1 ) 967{ 968 dst->u[0] = src0->u[0] | src1->u[0]; 969 dst->u[1] = src0->u[1] | src1->u[1]; 970 dst->u[2] = src0->u[2] | src1->u[2]; 971 dst->u[3] = src0->u[3] | src1->u[3]; 972} 973 974static void 975micro_pow( 976 union tgsi_exec_channel *dst, 977 const union tgsi_exec_channel *src0, 978 const union tgsi_exec_channel *src1 ) 979{ 980#if FAST_MATH 981 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 982 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 983 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 984 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 985#else 986 dst->f[0] = powf( src0->f[0], src1->f[0] ); 987 dst->f[1] = powf( src0->f[1], src1->f[1] ); 988 dst->f[2] = powf( src0->f[2], src1->f[2] ); 989 dst->f[3] = powf( src0->f[3], src1->f[3] ); 990#endif 991} 992 993static void 994micro_rnd( 995 union tgsi_exec_channel *dst, 996 const union tgsi_exec_channel *src ) 997{ 998 dst->f[0] = floorf( src->f[0] + 0.5f ); 999 dst->f[1] = floorf( src->f[1] + 0.5f ); 1000 dst->f[2] = floorf( src->f[2] + 0.5f ); 1001 dst->f[3] = floorf( src->f[3] + 0.5f ); 1002} 1003 1004static void 1005micro_sgn( 1006 union tgsi_exec_channel *dst, 1007 const union tgsi_exec_channel *src ) 1008{ 1009 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 1010 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 1011 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 1012 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1013} 1014 1015static void 1016micro_shl( 1017 union tgsi_exec_channel *dst, 1018 const union tgsi_exec_channel *src0, 1019 const union tgsi_exec_channel *src1 ) 1020{ 1021 dst->i[0] = src0->i[0] << src1->i[0]; 1022 dst->i[1] = src0->i[1] << src1->i[1]; 1023 dst->i[2] = src0->i[2] << src1->i[2]; 1024 dst->i[3] = src0->i[3] << src1->i[3]; 1025} 1026 1027static void 1028micro_ishr( 1029 union tgsi_exec_channel *dst, 1030 const union tgsi_exec_channel *src0, 1031 const union tgsi_exec_channel *src1 ) 1032{ 1033 dst->i[0] = src0->i[0] >> src1->i[0]; 1034 dst->i[1] = src0->i[1] >> src1->i[1]; 1035 dst->i[2] = src0->i[2] >> src1->i[2]; 1036 dst->i[3] = src0->i[3] >> src1->i[3]; 1037} 1038 1039static void 1040micro_trunc( 1041 union tgsi_exec_channel *dst, 1042 const union tgsi_exec_channel *src0 ) 1043{ 1044 dst->f[0] = (float) (int) src0->f[0]; 1045 dst->f[1] = (float) (int) src0->f[1]; 1046 dst->f[2] = (float) (int) src0->f[2]; 1047 dst->f[3] = (float) (int) src0->f[3]; 1048} 1049 1050#if 0 1051static void 1052micro_ushr( 1053 union tgsi_exec_channel *dst, 1054 const union tgsi_exec_channel *src0, 1055 const union tgsi_exec_channel *src1 ) 1056{ 1057 dst->u[0] = src0->u[0] >> src1->u[0]; 1058 dst->u[1] = src0->u[1] >> src1->u[1]; 1059 dst->u[2] = src0->u[2] >> src1->u[2]; 1060 dst->u[3] = src0->u[3] >> src1->u[3]; 1061} 1062#endif 1063 1064static void 1065micro_sin( 1066 union tgsi_exec_channel *dst, 1067 const union tgsi_exec_channel *src ) 1068{ 1069 dst->f[0] = sinf( src->f[0] ); 1070 dst->f[1] = sinf( src->f[1] ); 1071 dst->f[2] = sinf( src->f[2] ); 1072 dst->f[3] = sinf( src->f[3] ); 1073} 1074 1075static void 1076micro_sqrt( union tgsi_exec_channel *dst, 1077 const union tgsi_exec_channel *src ) 1078{ 1079 dst->f[0] = sqrtf( src->f[0] ); 1080 dst->f[1] = sqrtf( src->f[1] ); 1081 dst->f[2] = sqrtf( src->f[2] ); 1082 dst->f[3] = sqrtf( src->f[3] ); 1083} 1084 1085static void 1086micro_sub( 1087 union tgsi_exec_channel *dst, 1088 const union tgsi_exec_channel *src0, 1089 const union tgsi_exec_channel *src1 ) 1090{ 1091 dst->f[0] = src0->f[0] - src1->f[0]; 1092 dst->f[1] = src0->f[1] - src1->f[1]; 1093 dst->f[2] = src0->f[2] - src1->f[2]; 1094 dst->f[3] = src0->f[3] - src1->f[3]; 1095} 1096 1097#if 0 1098static void 1099micro_u2f( 1100 union tgsi_exec_channel *dst, 1101 const union tgsi_exec_channel *src ) 1102{ 1103 dst->f[0] = (float) src->u[0]; 1104 dst->f[1] = (float) src->u[1]; 1105 dst->f[2] = (float) src->u[2]; 1106 dst->f[3] = (float) src->u[3]; 1107} 1108#endif 1109 1110static void 1111micro_xor( 1112 union tgsi_exec_channel *dst, 1113 const union tgsi_exec_channel *src0, 1114 const union tgsi_exec_channel *src1 ) 1115{ 1116 dst->u[0] = src0->u[0] ^ src1->u[0]; 1117 dst->u[1] = src0->u[1] ^ src1->u[1]; 1118 dst->u[2] = src0->u[2] ^ src1->u[2]; 1119 dst->u[3] = src0->u[3] ^ src1->u[3]; 1120} 1121 1122static void 1123fetch_src_file_channel( 1124 const struct tgsi_exec_machine *mach, 1125 const uint file, 1126 const uint swizzle, 1127 const union tgsi_exec_channel *index, 1128 union tgsi_exec_channel *chan ) 1129{ 1130 switch( swizzle ) { 1131 case TGSI_EXTSWIZZLE_X: 1132 case TGSI_EXTSWIZZLE_Y: 1133 case TGSI_EXTSWIZZLE_Z: 1134 case TGSI_EXTSWIZZLE_W: 1135 switch( file ) { 1136 case TGSI_FILE_CONSTANT: 1137 assert(mach->Consts); 1138 if (index->i[0] < 0) 1139 chan->f[0] = 0.0f; 1140 else 1141 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1142 if (index->i[1] < 0) 1143 chan->f[1] = 0.0f; 1144 else 1145 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1146 if (index->i[2] < 0) 1147 chan->f[2] = 0.0f; 1148 else 1149 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1150 if (index->i[3] < 0) 1151 chan->f[3] = 0.0f; 1152 else 1153 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1154 break; 1155 1156 case TGSI_FILE_INPUT: 1157 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1158 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1159 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1160 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1161 break; 1162 1163 case TGSI_FILE_TEMPORARY: 1164 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1165 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1166 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1167 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1168 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1169 break; 1170 1171 case TGSI_FILE_IMMEDIATE: 1172 assert( index->i[0] < (int) mach->ImmLimit ); 1173 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1174 assert( index->i[1] < (int) mach->ImmLimit ); 1175 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1176 assert( index->i[2] < (int) mach->ImmLimit ); 1177 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1178 assert( index->i[3] < (int) mach->ImmLimit ); 1179 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1180 break; 1181 1182 case TGSI_FILE_ADDRESS: 1183 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1184 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1185 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1186 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1187 break; 1188 1189 case TGSI_FILE_OUTPUT: 1190 /* vertex/fragment output vars can be read too */ 1191 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1192 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1193 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1194 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1195 break; 1196 1197 default: 1198 assert( 0 ); 1199 } 1200 break; 1201 1202 case TGSI_EXTSWIZZLE_ZERO: 1203 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1204 break; 1205 1206 case TGSI_EXTSWIZZLE_ONE: 1207 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1208 break; 1209 1210 default: 1211 assert( 0 ); 1212 } 1213} 1214 1215static void 1216fetch_source( 1217 const struct tgsi_exec_machine *mach, 1218 union tgsi_exec_channel *chan, 1219 const struct tgsi_full_src_register *reg, 1220 const uint chan_index ) 1221{ 1222 union tgsi_exec_channel index; 1223 uint swizzle; 1224 1225 /* We start with a direct index into a register file. 1226 * 1227 * file[1], 1228 * where: 1229 * file = SrcRegister.File 1230 * [1] = SrcRegister.Index 1231 */ 1232 index.i[0] = 1233 index.i[1] = 1234 index.i[2] = 1235 index.i[3] = reg->SrcRegister.Index; 1236 1237 /* There is an extra source register that indirectly subscripts 1238 * a register file. The direct index now becomes an offset 1239 * that is being added to the indirect register. 1240 * 1241 * file[ind[2].x+1], 1242 * where: 1243 * ind = SrcRegisterInd.File 1244 * [2] = SrcRegisterInd.Index 1245 * .x = SrcRegisterInd.SwizzleX 1246 */ 1247 if (reg->SrcRegister.Indirect) { 1248 union tgsi_exec_channel index2; 1249 union tgsi_exec_channel indir_index; 1250 const uint execmask = mach->ExecMask; 1251 uint i; 1252 1253 /* which address register (always zero now) */ 1254 index2.i[0] = 1255 index2.i[1] = 1256 index2.i[2] = 1257 index2.i[3] = reg->SrcRegisterInd.Index; 1258 1259 /* get current value of address register[swizzle] */ 1260 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1261 fetch_src_file_channel( 1262 mach, 1263 reg->SrcRegisterInd.File, 1264 swizzle, 1265 &index2, 1266 &indir_index ); 1267 1268 /* add value of address register to the offset */ 1269 index.i[0] += (int) indir_index.f[0]; 1270 index.i[1] += (int) indir_index.f[1]; 1271 index.i[2] += (int) indir_index.f[2]; 1272 index.i[3] += (int) indir_index.f[3]; 1273 1274 /* for disabled execution channels, zero-out the index to 1275 * avoid using a potential garbage value. 1276 */ 1277 for (i = 0; i < QUAD_SIZE; i++) { 1278 if ((execmask & (1 << i)) == 0) 1279 index.i[i] = 0; 1280 } 1281 } 1282 1283 /* There is an extra source register that is a second 1284 * subscript to a register file. Effectively it means that 1285 * the register file is actually a 2D array of registers. 1286 * 1287 * file[1][3] == file[1*sizeof(file[1])+3], 1288 * where: 1289 * [3] = SrcRegisterDim.Index 1290 */ 1291 if (reg->SrcRegister.Dimension) { 1292 /* The size of the first-order array depends on the register file type. 1293 * We need to multiply the index to the first array to get an effective, 1294 * "flat" index that points to the beginning of the second-order array. 1295 */ 1296 switch (reg->SrcRegister.File) { 1297 case TGSI_FILE_INPUT: 1298 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1299 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1300 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1301 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1302 break; 1303 case TGSI_FILE_CONSTANT: 1304 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1305 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1306 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1307 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1308 break; 1309 default: 1310 assert( 0 ); 1311 } 1312 1313 index.i[0] += reg->SrcRegisterDim.Index; 1314 index.i[1] += reg->SrcRegisterDim.Index; 1315 index.i[2] += reg->SrcRegisterDim.Index; 1316 index.i[3] += reg->SrcRegisterDim.Index; 1317 1318 /* Again, the second subscript index can be addressed indirectly 1319 * identically to the first one. 1320 * Nothing stops us from indirectly addressing the indirect register, 1321 * but there is no need for that, so we won't exercise it. 1322 * 1323 * file[1][ind[4].y+3], 1324 * where: 1325 * ind = SrcRegisterDimInd.File 1326 * [4] = SrcRegisterDimInd.Index 1327 * .y = SrcRegisterDimInd.SwizzleX 1328 */ 1329 if (reg->SrcRegisterDim.Indirect) { 1330 union tgsi_exec_channel index2; 1331 union tgsi_exec_channel indir_index; 1332 const uint execmask = mach->ExecMask; 1333 uint i; 1334 1335 index2.i[0] = 1336 index2.i[1] = 1337 index2.i[2] = 1338 index2.i[3] = reg->SrcRegisterDimInd.Index; 1339 1340 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1341 fetch_src_file_channel( 1342 mach, 1343 reg->SrcRegisterDimInd.File, 1344 swizzle, 1345 &index2, 1346 &indir_index ); 1347 1348 index.i[0] += (int) indir_index.f[0]; 1349 index.i[1] += (int) indir_index.f[1]; 1350 index.i[2] += (int) indir_index.f[2]; 1351 index.i[3] += (int) indir_index.f[3]; 1352 1353 /* for disabled execution channels, zero-out the index to 1354 * avoid using a potential garbage value. 1355 */ 1356 for (i = 0; i < QUAD_SIZE; i++) { 1357 if ((execmask & (1 << i)) == 0) 1358 index.i[i] = 0; 1359 } 1360 } 1361 1362 /* If by any chance there was a need for a 3D array of register 1363 * files, we would have to check whether SrcRegisterDim is followed 1364 * by a dimension register and continue the saga. 1365 */ 1366 } 1367 1368 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1369 fetch_src_file_channel( 1370 mach, 1371 reg->SrcRegister.File, 1372 swizzle, 1373 &index, 1374 chan ); 1375 1376 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1377 case TGSI_UTIL_SIGN_CLEAR: 1378 micro_abs( chan, chan ); 1379 break; 1380 1381 case TGSI_UTIL_SIGN_SET: 1382 micro_abs( chan, chan ); 1383 micro_neg( chan, chan ); 1384 break; 1385 1386 case TGSI_UTIL_SIGN_TOGGLE: 1387 micro_neg( chan, chan ); 1388 break; 1389 1390 case TGSI_UTIL_SIGN_KEEP: 1391 break; 1392 } 1393 1394 if (reg->SrcRegisterExtMod.Complement) { 1395 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1396 } 1397} 1398 1399static void 1400store_dest( 1401 struct tgsi_exec_machine *mach, 1402 const union tgsi_exec_channel *chan, 1403 const struct tgsi_full_dst_register *reg, 1404 const struct tgsi_full_instruction *inst, 1405 uint chan_index ) 1406{ 1407 uint i; 1408 union tgsi_exec_channel null; 1409 union tgsi_exec_channel *dst; 1410 uint execmask = mach->ExecMask; 1411 int offset = 0; /* indirection offset */ 1412 int index; 1413 1414#ifdef DEBUG 1415 check_inf_or_nan(chan); 1416#endif 1417 1418 /* There is an extra source register that indirectly subscripts 1419 * a register file. The direct index now becomes an offset 1420 * that is being added to the indirect register. 1421 * 1422 * file[ind[2].x+1], 1423 * where: 1424 * ind = DstRegisterInd.File 1425 * [2] = DstRegisterInd.Index 1426 * .x = DstRegisterInd.SwizzleX 1427 */ 1428 if (reg->DstRegister.Indirect) { 1429 union tgsi_exec_channel index; 1430 union tgsi_exec_channel indir_index; 1431 uint swizzle; 1432 1433 /* which address register (always zero for now) */ 1434 index.i[0] = 1435 index.i[1] = 1436 index.i[2] = 1437 index.i[3] = reg->DstRegisterInd.Index; 1438 1439 /* get current value of address register[swizzle] */ 1440 swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); 1441 1442 /* fetch values from the address/indirection register */ 1443 fetch_src_file_channel( 1444 mach, 1445 reg->DstRegisterInd.File, 1446 swizzle, 1447 &index, 1448 &indir_index ); 1449 1450 /* save indirection offset */ 1451 offset = (int) indir_index.f[0]; 1452 } 1453 1454 switch (reg->DstRegister.File) { 1455 case TGSI_FILE_NULL: 1456 dst = &null; 1457 break; 1458 1459 case TGSI_FILE_OUTPUT: 1460 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1461 + reg->DstRegister.Index; 1462 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1463 break; 1464 1465 case TGSI_FILE_TEMPORARY: 1466 index = reg->DstRegister.Index; 1467 assert( index < TGSI_EXEC_NUM_TEMPS ); 1468 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1469 break; 1470 1471 case TGSI_FILE_ADDRESS: 1472 index = reg->DstRegister.Index; 1473 dst = &mach->Addrs[index].xyzw[chan_index]; 1474 break; 1475 1476 default: 1477 assert( 0 ); 1478 return; 1479 } 1480 1481 if (inst->InstructionExtNv.CondFlowEnable) { 1482 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1483 uint swizzle; 1484 uint shift; 1485 uint mask; 1486 uint test; 1487 1488 /* Only CC0 supported. 1489 */ 1490 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1491 1492 switch (chan_index) { 1493 case CHAN_X: 1494 swizzle = inst->InstructionExtNv.CondSwizzleX; 1495 break; 1496 case CHAN_Y: 1497 swizzle = inst->InstructionExtNv.CondSwizzleY; 1498 break; 1499 case CHAN_Z: 1500 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1501 break; 1502 case CHAN_W: 1503 swizzle = inst->InstructionExtNv.CondSwizzleW; 1504 break; 1505 default: 1506 assert( 0 ); 1507 return; 1508 } 1509 1510 switch (swizzle) { 1511 case TGSI_SWIZZLE_X: 1512 shift = TGSI_EXEC_CC_X_SHIFT; 1513 mask = TGSI_EXEC_CC_X_MASK; 1514 break; 1515 case TGSI_SWIZZLE_Y: 1516 shift = TGSI_EXEC_CC_Y_SHIFT; 1517 mask = TGSI_EXEC_CC_Y_MASK; 1518 break; 1519 case TGSI_SWIZZLE_Z: 1520 shift = TGSI_EXEC_CC_Z_SHIFT; 1521 mask = TGSI_EXEC_CC_Z_MASK; 1522 break; 1523 case TGSI_SWIZZLE_W: 1524 shift = TGSI_EXEC_CC_W_SHIFT; 1525 mask = TGSI_EXEC_CC_W_MASK; 1526 break; 1527 default: 1528 assert( 0 ); 1529 return; 1530 } 1531 1532 switch (inst->InstructionExtNv.CondMask) { 1533 case TGSI_CC_GT: 1534 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1535 for (i = 0; i < QUAD_SIZE; i++) 1536 if (cc->u[i] & test) 1537 execmask &= ~(1 << i); 1538 break; 1539 1540 case TGSI_CC_EQ: 1541 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1542 for (i = 0; i < QUAD_SIZE; i++) 1543 if (cc->u[i] & test) 1544 execmask &= ~(1 << i); 1545 break; 1546 1547 case TGSI_CC_LT: 1548 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1549 for (i = 0; i < QUAD_SIZE; i++) 1550 if (cc->u[i] & test) 1551 execmask &= ~(1 << i); 1552 break; 1553 1554 case TGSI_CC_GE: 1555 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1556 for (i = 0; i < QUAD_SIZE; i++) 1557 if (cc->u[i] & test) 1558 execmask &= ~(1 << i); 1559 break; 1560 1561 case TGSI_CC_LE: 1562 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1563 for (i = 0; i < QUAD_SIZE; i++) 1564 if (cc->u[i] & test) 1565 execmask &= ~(1 << i); 1566 break; 1567 1568 case TGSI_CC_NE: 1569 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1570 for (i = 0; i < QUAD_SIZE; i++) 1571 if (cc->u[i] & test) 1572 execmask &= ~(1 << i); 1573 break; 1574 1575 case TGSI_CC_TR: 1576 break; 1577 1578 case TGSI_CC_FL: 1579 for (i = 0; i < QUAD_SIZE; i++) 1580 execmask &= ~(1 << i); 1581 break; 1582 1583 default: 1584 assert( 0 ); 1585 return; 1586 } 1587 } 1588 1589 switch (inst->Instruction.Saturate) { 1590 case TGSI_SAT_NONE: 1591 for (i = 0; i < QUAD_SIZE; i++) 1592 if (execmask & (1 << i)) 1593 dst->i[i] = chan->i[i]; 1594 break; 1595 1596 case TGSI_SAT_ZERO_ONE: 1597 for (i = 0; i < QUAD_SIZE; i++) 1598 if (execmask & (1 << i)) { 1599 if (chan->f[i] < 0.0f) 1600 dst->f[i] = 0.0f; 1601 else if (chan->f[i] > 1.0f) 1602 dst->f[i] = 1.0f; 1603 else 1604 dst->i[i] = chan->i[i]; 1605 } 1606 break; 1607 1608 case TGSI_SAT_MINUS_PLUS_ONE: 1609 for (i = 0; i < QUAD_SIZE; i++) 1610 if (execmask & (1 << i)) { 1611 if (chan->f[i] < -1.0f) 1612 dst->f[i] = -1.0f; 1613 else if (chan->f[i] > 1.0f) 1614 dst->f[i] = 1.0f; 1615 else 1616 dst->i[i] = chan->i[i]; 1617 } 1618 break; 1619 1620 default: 1621 assert( 0 ); 1622 } 1623 1624 if (inst->InstructionExtNv.CondDstUpdate) { 1625 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1626 uint shift; 1627 uint mask; 1628 1629 /* Only CC0 supported. 1630 */ 1631 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1632 1633 switch (chan_index) { 1634 case CHAN_X: 1635 shift = TGSI_EXEC_CC_X_SHIFT; 1636 mask = ~TGSI_EXEC_CC_X_MASK; 1637 break; 1638 case CHAN_Y: 1639 shift = TGSI_EXEC_CC_Y_SHIFT; 1640 mask = ~TGSI_EXEC_CC_Y_MASK; 1641 break; 1642 case CHAN_Z: 1643 shift = TGSI_EXEC_CC_Z_SHIFT; 1644 mask = ~TGSI_EXEC_CC_Z_MASK; 1645 break; 1646 case CHAN_W: 1647 shift = TGSI_EXEC_CC_W_SHIFT; 1648 mask = ~TGSI_EXEC_CC_W_MASK; 1649 break; 1650 default: 1651 assert( 0 ); 1652 return; 1653 } 1654 1655 for (i = 0; i < QUAD_SIZE; i++) 1656 if (execmask & (1 << i)) { 1657 cc->u[i] &= mask; 1658 if (dst->f[i] < 0.0f) 1659 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1660 else if (dst->f[i] > 0.0f) 1661 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1662 else if (dst->f[i] == 0.0f) 1663 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1664 else 1665 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1666 } 1667 } 1668} 1669 1670#define FETCH(VAL,INDEX,CHAN)\ 1671 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1672 1673#define STORE(VAL,INDEX,CHAN)\ 1674 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1675 1676 1677/** 1678 * Execute ARB-style KIL which is predicated by a src register. 1679 * Kill fragment if any of the four values is less than zero. 1680 */ 1681static void 1682exec_kil(struct tgsi_exec_machine *mach, 1683 const struct tgsi_full_instruction *inst) 1684{ 1685 uint uniquemask; 1686 uint chan_index; 1687 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1688 union tgsi_exec_channel r[1]; 1689 1690 /* This mask stores component bits that were already tested. Note that 1691 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1692 * tested. */ 1693 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1694 1695 for (chan_index = 0; chan_index < 4; chan_index++) 1696 { 1697 uint swizzle; 1698 uint i; 1699 1700 /* unswizzle channel */ 1701 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1702 &inst->FullSrcRegisters[0], 1703 chan_index); 1704 1705 /* check if the component has not been already tested */ 1706 if (uniquemask & (1 << swizzle)) 1707 continue; 1708 uniquemask |= 1 << swizzle; 1709 1710 FETCH(&r[0], 0, chan_index); 1711 for (i = 0; i < 4; i++) 1712 if (r[0].f[i] < 0.0f) 1713 kilmask |= 1 << i; 1714 } 1715 1716 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1717} 1718 1719/** 1720 * Execute NVIDIA-style KIL which is predicated by a condition code. 1721 * Kill fragment if the condition code is TRUE. 1722 */ 1723static void 1724exec_kilp(struct tgsi_exec_machine *mach, 1725 const struct tgsi_full_instruction *inst) 1726{ 1727 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1728 1729 if (inst->InstructionExtNv.CondFlowEnable) { 1730 uint swizzle[4]; 1731 uint chan_index; 1732 1733 kilmask = 0x0; 1734 1735 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1736 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1737 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1738 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1739 1740 for (chan_index = 0; chan_index < 4; chan_index++) 1741 { 1742 uint i; 1743 1744 for (i = 0; i < 4; i++) { 1745 /* TODO: evaluate the condition code */ 1746 if (0) 1747 kilmask |= 1 << i; 1748 } 1749 } 1750 } 1751 else { 1752 /* "unconditional" kil */ 1753 kilmask = mach->ExecMask; 1754 } 1755 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1756} 1757 1758 1759/* 1760 * Fetch a four texture samples using STR texture coordinates. 1761 */ 1762static void 1763fetch_texel( struct tgsi_sampler *sampler, 1764 const union tgsi_exec_channel *s, 1765 const union tgsi_exec_channel *t, 1766 const union tgsi_exec_channel *p, 1767 float lodbias, /* XXX should be float[4] */ 1768 union tgsi_exec_channel *r, 1769 union tgsi_exec_channel *g, 1770 union tgsi_exec_channel *b, 1771 union tgsi_exec_channel *a ) 1772{ 1773 uint j; 1774 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1775 1776 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1777 1778 for (j = 0; j < 4; j++) { 1779 r->f[j] = rgba[0][j]; 1780 g->f[j] = rgba[1][j]; 1781 b->f[j] = rgba[2][j]; 1782 a->f[j] = rgba[3][j]; 1783 } 1784} 1785 1786 1787static void 1788exec_tex(struct tgsi_exec_machine *mach, 1789 const struct tgsi_full_instruction *inst, 1790 boolean biasLod, 1791 boolean projected) 1792{ 1793 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1794 union tgsi_exec_channel r[4]; 1795 uint chan_index; 1796 float lodBias; 1797 1798 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1799 1800 switch (inst->InstructionExtTexture.Texture) { 1801 case TGSI_TEXTURE_1D: 1802 case TGSI_TEXTURE_SHADOW1D: 1803 1804 FETCH(&r[0], 0, CHAN_X); 1805 1806 if (projected) { 1807 FETCH(&r[1], 0, CHAN_W); 1808 micro_div( &r[0], &r[0], &r[1] ); 1809 } 1810 1811 if (biasLod) { 1812 FETCH(&r[1], 0, CHAN_W); 1813 lodBias = r[2].f[0]; 1814 } 1815 else 1816 lodBias = 0.0; 1817 1818 fetch_texel(mach->Samplers[unit], 1819 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1820 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1821 break; 1822 1823 case TGSI_TEXTURE_2D: 1824 case TGSI_TEXTURE_RECT: 1825 case TGSI_TEXTURE_SHADOW2D: 1826 case TGSI_TEXTURE_SHADOWRECT: 1827 1828 FETCH(&r[0], 0, CHAN_X); 1829 FETCH(&r[1], 0, CHAN_Y); 1830 FETCH(&r[2], 0, CHAN_Z); 1831 1832 if (projected) { 1833 FETCH(&r[3], 0, CHAN_W); 1834 micro_div( &r[0], &r[0], &r[3] ); 1835 micro_div( &r[1], &r[1], &r[3] ); 1836 micro_div( &r[2], &r[2], &r[3] ); 1837 } 1838 1839 if (biasLod) { 1840 FETCH(&r[3], 0, CHAN_W); 1841 lodBias = r[3].f[0]; 1842 } 1843 else 1844 lodBias = 0.0; 1845 1846 fetch_texel(mach->Samplers[unit], 1847 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1848 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1849 break; 1850 1851 case TGSI_TEXTURE_3D: 1852 case TGSI_TEXTURE_CUBE: 1853 1854 FETCH(&r[0], 0, CHAN_X); 1855 FETCH(&r[1], 0, CHAN_Y); 1856 FETCH(&r[2], 0, CHAN_Z); 1857 1858 if (projected) { 1859 FETCH(&r[3], 0, CHAN_W); 1860 micro_div( &r[0], &r[0], &r[3] ); 1861 micro_div( &r[1], &r[1], &r[3] ); 1862 micro_div( &r[2], &r[2], &r[3] ); 1863 } 1864 1865 if (biasLod) { 1866 FETCH(&r[3], 0, CHAN_W); 1867 lodBias = r[3].f[0]; 1868 } 1869 else 1870 lodBias = 0.0; 1871 1872 fetch_texel(mach->Samplers[unit], 1873 &r[0], &r[1], &r[2], lodBias, 1874 &r[0], &r[1], &r[2], &r[3]); 1875 break; 1876 1877 default: 1878 assert (0); 1879 } 1880 1881 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1882 STORE( &r[chan_index], 0, chan_index ); 1883 } 1884} 1885 1886 1887/** 1888 * Evaluate a constant-valued coefficient at the position of the 1889 * current quad. 1890 */ 1891static void 1892eval_constant_coef( 1893 struct tgsi_exec_machine *mach, 1894 unsigned attrib, 1895 unsigned chan ) 1896{ 1897 unsigned i; 1898 1899 for( i = 0; i < QUAD_SIZE; i++ ) { 1900 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1901 } 1902} 1903 1904/** 1905 * Evaluate a linear-valued coefficient at the position of the 1906 * current quad. 1907 */ 1908static void 1909eval_linear_coef( 1910 struct tgsi_exec_machine *mach, 1911 unsigned attrib, 1912 unsigned chan ) 1913{ 1914 const float x = mach->QuadPos.xyzw[0].f[0]; 1915 const float y = mach->QuadPos.xyzw[1].f[0]; 1916 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1917 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1918 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1919 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1920 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1921 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1922 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1923} 1924 1925/** 1926 * Evaluate a perspective-valued coefficient at the position of the 1927 * current quad. 1928 */ 1929static void 1930eval_perspective_coef( 1931 struct tgsi_exec_machine *mach, 1932 unsigned attrib, 1933 unsigned chan ) 1934{ 1935 const float x = mach->QuadPos.xyzw[0].f[0]; 1936 const float y = mach->QuadPos.xyzw[1].f[0]; 1937 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1938 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1939 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1940 const float *w = mach->QuadPos.xyzw[3].f; 1941 /* divide by W here */ 1942 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1943 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1944 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1945 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1946} 1947 1948 1949typedef void (* eval_coef_func)( 1950 struct tgsi_exec_machine *mach, 1951 unsigned attrib, 1952 unsigned chan ); 1953 1954static void 1955exec_declaration( 1956 struct tgsi_exec_machine *mach, 1957 const struct tgsi_full_declaration *decl ) 1958{ 1959 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1960 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1961 unsigned first, last, mask; 1962 eval_coef_func eval; 1963 1964 first = decl->DeclarationRange.First; 1965 last = decl->DeclarationRange.Last; 1966 mask = decl->Declaration.UsageMask; 1967 1968 switch( decl->Declaration.Interpolate ) { 1969 case TGSI_INTERPOLATE_CONSTANT: 1970 eval = eval_constant_coef; 1971 break; 1972 1973 case TGSI_INTERPOLATE_LINEAR: 1974 eval = eval_linear_coef; 1975 break; 1976 1977 case TGSI_INTERPOLATE_PERSPECTIVE: 1978 eval = eval_perspective_coef; 1979 break; 1980 1981 default: 1982 eval = NULL; 1983 assert( 0 ); 1984 } 1985 1986 if( mask == TGSI_WRITEMASK_XYZW ) { 1987 unsigned i, j; 1988 1989 for( i = first; i <= last; i++ ) { 1990 for( j = 0; j < NUM_CHANNELS; j++ ) { 1991 eval( mach, i, j ); 1992 } 1993 } 1994 } 1995 else { 1996 unsigned i, j; 1997 1998 for( j = 0; j < NUM_CHANNELS; j++ ) { 1999 if( mask & (1 << j) ) { 2000 for( i = first; i <= last; i++ ) { 2001 eval( mach, i, j ); 2002 } 2003 } 2004 } 2005 } 2006 } 2007 } 2008} 2009 2010static void 2011exec_instruction( 2012 struct tgsi_exec_machine *mach, 2013 const struct tgsi_full_instruction *inst, 2014 int *pc ) 2015{ 2016 uint chan_index; 2017 union tgsi_exec_channel r[10]; 2018 2019 (*pc)++; 2020 2021 switch (inst->Instruction.Opcode) { 2022 case TGSI_OPCODE_ARL: 2023 case TGSI_OPCODE_FLR: 2024 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2025 FETCH( &r[0], 0, chan_index ); 2026 micro_flr( &r[0], &r[0] ); 2027 STORE( &r[0], 0, chan_index ); 2028 } 2029 break; 2030 2031 case TGSI_OPCODE_MOV: 2032 case TGSI_OPCODE_SWZ: 2033 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2034 FETCH( &r[0], 0, chan_index ); 2035 STORE( &r[0], 0, chan_index ); 2036 } 2037 break; 2038 2039 case TGSI_OPCODE_LIT: 2040 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2041 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2042 } 2043 2044 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2045 FETCH( &r[0], 0, CHAN_X ); 2046 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2047 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2048 STORE( &r[0], 0, CHAN_Y ); 2049 } 2050 2051 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2052 FETCH( &r[1], 0, CHAN_Y ); 2053 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2054 2055 FETCH( &r[2], 0, CHAN_W ); 2056 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2057 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2058 micro_pow( &r[1], &r[1], &r[2] ); 2059 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2060 STORE( &r[0], 0, CHAN_Z ); 2061 } 2062 } 2063 2064 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2065 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2066 } 2067 break; 2068 2069 case TGSI_OPCODE_RCP: 2070 /* TGSI_OPCODE_RECIP */ 2071 FETCH( &r[0], 0, CHAN_X ); 2072 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2074 STORE( &r[0], 0, chan_index ); 2075 } 2076 break; 2077 2078 case TGSI_OPCODE_RSQ: 2079 /* TGSI_OPCODE_RECIPSQRT */ 2080 FETCH( &r[0], 0, CHAN_X ); 2081 micro_abs( &r[0], &r[0] ); 2082 micro_sqrt( &r[0], &r[0] ); 2083 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 2084 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2085 STORE( &r[0], 0, chan_index ); 2086 } 2087 break; 2088 2089 case TGSI_OPCODE_EXP: 2090 FETCH( &r[0], 0, CHAN_X ); 2091 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2092 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2093 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2094 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2095 } 2096 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2097 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2098 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2099 } 2100 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2101 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2102 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2103 } 2104 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2105 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2106 } 2107 break; 2108 2109 case TGSI_OPCODE_LOG: 2110 FETCH( &r[0], 0, CHAN_X ); 2111 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2112 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2113 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2114 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2115 STORE( &r[0], 0, CHAN_X ); 2116 } 2117 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2118 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2119 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2120 STORE( &r[0], 0, CHAN_Y ); 2121 } 2122 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2123 STORE( &r[1], 0, CHAN_Z ); 2124 } 2125 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2126 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2127 } 2128 break; 2129 2130 case TGSI_OPCODE_MUL: 2131 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 2132 { 2133 FETCH(&r[0], 0, chan_index); 2134 FETCH(&r[1], 1, chan_index); 2135 2136 micro_mul( &r[0], &r[0], &r[1] ); 2137 2138 STORE(&r[0], 0, chan_index); 2139 } 2140 break; 2141 2142 case TGSI_OPCODE_ADD: 2143 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2144 FETCH( &r[0], 0, chan_index ); 2145 FETCH( &r[1], 1, chan_index ); 2146 micro_add( &r[0], &r[0], &r[1] ); 2147 STORE( &r[0], 0, chan_index ); 2148 } 2149 break; 2150 2151 case TGSI_OPCODE_DP3: 2152 /* TGSI_OPCODE_DOT3 */ 2153 FETCH( &r[0], 0, CHAN_X ); 2154 FETCH( &r[1], 1, CHAN_X ); 2155 micro_mul( &r[0], &r[0], &r[1] ); 2156 2157 FETCH( &r[1], 0, CHAN_Y ); 2158 FETCH( &r[2], 1, CHAN_Y ); 2159 micro_mul( &r[1], &r[1], &r[2] ); 2160 micro_add( &r[0], &r[0], &r[1] ); 2161 2162 FETCH( &r[1], 0, CHAN_Z ); 2163 FETCH( &r[2], 1, CHAN_Z ); 2164 micro_mul( &r[1], &r[1], &r[2] ); 2165 micro_add( &r[0], &r[0], &r[1] ); 2166 2167 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2168 STORE( &r[0], 0, chan_index ); 2169 } 2170 break; 2171 2172 case TGSI_OPCODE_DP4: 2173 /* TGSI_OPCODE_DOT4 */ 2174 FETCH(&r[0], 0, CHAN_X); 2175 FETCH(&r[1], 1, CHAN_X); 2176 2177 micro_mul( &r[0], &r[0], &r[1] ); 2178 2179 FETCH(&r[1], 0, CHAN_Y); 2180 FETCH(&r[2], 1, CHAN_Y); 2181 2182 micro_mul( &r[1], &r[1], &r[2] ); 2183 micro_add( &r[0], &r[0], &r[1] ); 2184 2185 FETCH(&r[1], 0, CHAN_Z); 2186 FETCH(&r[2], 1, CHAN_Z); 2187 2188 micro_mul( &r[1], &r[1], &r[2] ); 2189 micro_add( &r[0], &r[0], &r[1] ); 2190 2191 FETCH(&r[1], 0, CHAN_W); 2192 FETCH(&r[2], 1, CHAN_W); 2193 2194 micro_mul( &r[1], &r[1], &r[2] ); 2195 micro_add( &r[0], &r[0], &r[1] ); 2196 2197 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2198 STORE( &r[0], 0, chan_index ); 2199 } 2200 break; 2201 2202 case TGSI_OPCODE_DST: 2203 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2204 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2205 } 2206 2207 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2208 FETCH( &r[0], 0, CHAN_Y ); 2209 FETCH( &r[1], 1, CHAN_Y); 2210 micro_mul( &r[0], &r[0], &r[1] ); 2211 STORE( &r[0], 0, CHAN_Y ); 2212 } 2213 2214 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2215 FETCH( &r[0], 0, CHAN_Z ); 2216 STORE( &r[0], 0, CHAN_Z ); 2217 } 2218 2219 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2220 FETCH( &r[0], 1, CHAN_W ); 2221 STORE( &r[0], 0, CHAN_W ); 2222 } 2223 break; 2224 2225 case TGSI_OPCODE_MIN: 2226 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2227 FETCH(&r[0], 0, chan_index); 2228 FETCH(&r[1], 1, chan_index); 2229 2230 /* XXX use micro_min()?? */ 2231 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2232 2233 STORE(&r[0], 0, chan_index); 2234 } 2235 break; 2236 2237 case TGSI_OPCODE_MAX: 2238 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2239 FETCH(&r[0], 0, chan_index); 2240 FETCH(&r[1], 1, chan_index); 2241 2242 /* XXX use micro_max()?? */ 2243 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2244 2245 STORE(&r[0], 0, chan_index ); 2246 } 2247 break; 2248 2249 case TGSI_OPCODE_SLT: 2250 /* TGSI_OPCODE_SETLT */ 2251 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2252 FETCH( &r[0], 0, chan_index ); 2253 FETCH( &r[1], 1, chan_index ); 2254 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2255 STORE( &r[0], 0, chan_index ); 2256 } 2257 break; 2258 2259 case TGSI_OPCODE_SGE: 2260 /* TGSI_OPCODE_SETGE */ 2261 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2262 FETCH( &r[0], 0, chan_index ); 2263 FETCH( &r[1], 1, chan_index ); 2264 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2265 STORE( &r[0], 0, chan_index ); 2266 } 2267 break; 2268 2269 case TGSI_OPCODE_MAD: 2270 /* TGSI_OPCODE_MADD */ 2271 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2272 FETCH( &r[0], 0, chan_index ); 2273 FETCH( &r[1], 1, chan_index ); 2274 micro_mul( &r[0], &r[0], &r[1] ); 2275 FETCH( &r[1], 2, chan_index ); 2276 micro_add( &r[0], &r[0], &r[1] ); 2277 STORE( &r[0], 0, chan_index ); 2278 } 2279 break; 2280 2281 case TGSI_OPCODE_SUB: 2282 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2283 FETCH(&r[0], 0, chan_index); 2284 FETCH(&r[1], 1, chan_index); 2285 2286 micro_sub( &r[0], &r[0], &r[1] ); 2287 2288 STORE(&r[0], 0, chan_index); 2289 } 2290 break; 2291 2292 case TGSI_OPCODE_LRP: 2293 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2294 FETCH(&r[0], 0, chan_index); 2295 FETCH(&r[1], 1, chan_index); 2296 FETCH(&r[2], 2, chan_index); 2297 2298 micro_sub( &r[1], &r[1], &r[2] ); 2299 micro_mul( &r[0], &r[0], &r[1] ); 2300 micro_add( &r[0], &r[0], &r[2] ); 2301 2302 STORE(&r[0], 0, chan_index); 2303 } 2304 break; 2305 2306 case TGSI_OPCODE_CND: 2307 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2308 FETCH(&r[0], 0, chan_index); 2309 FETCH(&r[1], 1, chan_index); 2310 FETCH(&r[2], 2, chan_index); 2311 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2312 STORE(&r[0], 0, chan_index); 2313 } 2314 break; 2315 2316 case TGSI_OPCODE_CND0: 2317 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2318 FETCH(&r[0], 0, chan_index); 2319 FETCH(&r[1], 1, chan_index); 2320 FETCH(&r[2], 2, chan_index); 2321 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); 2322 STORE(&r[0], 0, chan_index); 2323 } 2324 break; 2325 2326 case TGSI_OPCODE_DP2A: 2327 FETCH( &r[0], 0, CHAN_X ); 2328 FETCH( &r[1], 1, CHAN_X ); 2329 micro_mul( &r[0], &r[0], &r[1] ); 2330 2331 FETCH( &r[1], 0, CHAN_Y ); 2332 FETCH( &r[2], 1, CHAN_Y ); 2333 micro_mul( &r[1], &r[1], &r[2] ); 2334 micro_add( &r[0], &r[0], &r[1] ); 2335 2336 FETCH( &r[2], 2, CHAN_X ); 2337 micro_add( &r[0], &r[0], &r[2] ); 2338 2339 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2340 STORE( &r[0], 0, chan_index ); 2341 } 2342 break; 2343 2344 case TGSI_OPCODE_INDEX: 2345 /* XXX: considered for removal */ 2346 assert (0); 2347 break; 2348 2349 case TGSI_OPCODE_NEGATE: 2350 /* XXX: considered for removal */ 2351 assert (0); 2352 break; 2353 2354 case TGSI_OPCODE_FRC: 2355 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2356 FETCH( &r[0], 0, chan_index ); 2357 micro_frc( &r[0], &r[0] ); 2358 STORE( &r[0], 0, chan_index ); 2359 } 2360 break; 2361 2362 case TGSI_OPCODE_CLAMP: 2363 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2364 FETCH(&r[0], 0, chan_index); 2365 FETCH(&r[1], 1, chan_index); 2366 micro_max(&r[0], &r[0], &r[1]); 2367 FETCH(&r[1], 2, chan_index); 2368 micro_min(&r[0], &r[0], &r[1]); 2369 STORE(&r[0], 0, chan_index); 2370 } 2371 break; 2372 2373 case TGSI_OPCODE_ROUND: 2374 case TGSI_OPCODE_ARR: 2375 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2376 FETCH( &r[0], 0, chan_index ); 2377 micro_rnd( &r[0], &r[0] ); 2378 STORE( &r[0], 0, chan_index ); 2379 } 2380 break; 2381 2382 case TGSI_OPCODE_EX2: 2383 FETCH(&r[0], 0, CHAN_X); 2384 2385#if FAST_MATH 2386 micro_exp2( &r[0], &r[0] ); 2387#else 2388 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2389#endif 2390 2391 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2392 STORE( &r[0], 0, chan_index ); 2393 } 2394 break; 2395 2396 case TGSI_OPCODE_LG2: 2397 FETCH( &r[0], 0, CHAN_X ); 2398 micro_lg2( &r[0], &r[0] ); 2399 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2400 STORE( &r[0], 0, chan_index ); 2401 } 2402 break; 2403 2404 case TGSI_OPCODE_POW: 2405 FETCH(&r[0], 0, CHAN_X); 2406 FETCH(&r[1], 1, CHAN_X); 2407 2408 micro_pow( &r[0], &r[0], &r[1] ); 2409 2410 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2411 STORE( &r[0], 0, chan_index ); 2412 } 2413 break; 2414 2415 case TGSI_OPCODE_XPD: 2416 FETCH(&r[0], 0, CHAN_Y); 2417 FETCH(&r[1], 1, CHAN_Z); 2418 2419 micro_mul( &r[2], &r[0], &r[1] ); 2420 2421 FETCH(&r[3], 0, CHAN_Z); 2422 FETCH(&r[4], 1, CHAN_Y); 2423 2424 micro_mul( &r[5], &r[3], &r[4] ); 2425 micro_sub( &r[2], &r[2], &r[5] ); 2426 2427 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2428 STORE( &r[2], 0, CHAN_X ); 2429 } 2430 2431 FETCH(&r[2], 1, CHAN_X); 2432 2433 micro_mul( &r[3], &r[3], &r[2] ); 2434 2435 FETCH(&r[5], 0, CHAN_X); 2436 2437 micro_mul( &r[1], &r[1], &r[5] ); 2438 micro_sub( &r[3], &r[3], &r[1] ); 2439 2440 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2441 STORE( &r[3], 0, CHAN_Y ); 2442 } 2443 2444 micro_mul( &r[5], &r[5], &r[4] ); 2445 micro_mul( &r[0], &r[0], &r[2] ); 2446 micro_sub( &r[5], &r[5], &r[0] ); 2447 2448 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2449 STORE( &r[5], 0, CHAN_Z ); 2450 } 2451 2452 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2453 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2454 } 2455 break; 2456 2457 case TGSI_OPCODE_MULTIPLYMATRIX: 2458 /* XXX: considered for removal */ 2459 assert (0); 2460 break; 2461 2462 case TGSI_OPCODE_ABS: 2463 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2464 FETCH(&r[0], 0, chan_index); 2465 2466 micro_abs( &r[0], &r[0] ); 2467 2468 STORE(&r[0], 0, chan_index); 2469 } 2470 break; 2471 2472 case TGSI_OPCODE_RCC: 2473 FETCH(&r[0], 0, CHAN_X); 2474 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2475 micro_float_clamp(&r[0], &r[0]); 2476 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2477 STORE(&r[0], 0, chan_index); 2478 } 2479 break; 2480 2481 case TGSI_OPCODE_DPH: 2482 FETCH(&r[0], 0, CHAN_X); 2483 FETCH(&r[1], 1, CHAN_X); 2484 2485 micro_mul( &r[0], &r[0], &r[1] ); 2486 2487 FETCH(&r[1], 0, CHAN_Y); 2488 FETCH(&r[2], 1, CHAN_Y); 2489 2490 micro_mul( &r[1], &r[1], &r[2] ); 2491 micro_add( &r[0], &r[0], &r[1] ); 2492 2493 FETCH(&r[1], 0, CHAN_Z); 2494 FETCH(&r[2], 1, CHAN_Z); 2495 2496 micro_mul( &r[1], &r[1], &r[2] ); 2497 micro_add( &r[0], &r[0], &r[1] ); 2498 2499 FETCH(&r[1], 1, CHAN_W); 2500 2501 micro_add( &r[0], &r[0], &r[1] ); 2502 2503 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2504 STORE( &r[0], 0, chan_index ); 2505 } 2506 break; 2507 2508 case TGSI_OPCODE_COS: 2509 FETCH(&r[0], 0, CHAN_X); 2510 2511 micro_cos( &r[0], &r[0] ); 2512 2513 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2514 STORE( &r[0], 0, chan_index ); 2515 } 2516 break; 2517 2518 case TGSI_OPCODE_DDX: 2519 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2520 FETCH( &r[0], 0, chan_index ); 2521 micro_ddx( &r[0], &r[0] ); 2522 STORE( &r[0], 0, chan_index ); 2523 } 2524 break; 2525 2526 case TGSI_OPCODE_DDY: 2527 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2528 FETCH( &r[0], 0, chan_index ); 2529 micro_ddy( &r[0], &r[0] ); 2530 STORE( &r[0], 0, chan_index ); 2531 } 2532 break; 2533 2534 case TGSI_OPCODE_KILP: 2535 exec_kilp (mach, inst); 2536 break; 2537 2538 case TGSI_OPCODE_KIL: 2539 exec_kil (mach, inst); 2540 break; 2541 2542 case TGSI_OPCODE_PK2H: 2543 assert (0); 2544 break; 2545 2546 case TGSI_OPCODE_PK2US: 2547 assert (0); 2548 break; 2549 2550 case TGSI_OPCODE_PK4B: 2551 assert (0); 2552 break; 2553 2554 case TGSI_OPCODE_PK4UB: 2555 assert (0); 2556 break; 2557 2558 case TGSI_OPCODE_RFL: 2559 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2560 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2561 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2562 /* r0 = dp3(src0, src0) */ 2563 FETCH(&r[2], 0, CHAN_X); 2564 micro_mul(&r[0], &r[2], &r[2]); 2565 FETCH(&r[4], 0, CHAN_Y); 2566 micro_mul(&r[8], &r[4], &r[4]); 2567 micro_add(&r[0], &r[0], &r[8]); 2568 FETCH(&r[6], 0, CHAN_Z); 2569 micro_mul(&r[8], &r[6], &r[6]); 2570 micro_add(&r[0], &r[0], &r[8]); 2571 2572 /* r1 = dp3(src0, src1) */ 2573 FETCH(&r[3], 1, CHAN_X); 2574 micro_mul(&r[1], &r[2], &r[3]); 2575 FETCH(&r[5], 1, CHAN_Y); 2576 micro_mul(&r[8], &r[4], &r[5]); 2577 micro_add(&r[1], &r[1], &r[8]); 2578 FETCH(&r[7], 1, CHAN_Z); 2579 micro_mul(&r[8], &r[6], &r[7]); 2580 micro_add(&r[1], &r[1], &r[8]); 2581 2582 /* r1 = 2 * r1 / r0 */ 2583 micro_add(&r[1], &r[1], &r[1]); 2584 micro_div(&r[1], &r[1], &r[0]); 2585 2586 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2587 micro_mul(&r[2], &r[2], &r[1]); 2588 micro_sub(&r[2], &r[2], &r[3]); 2589 STORE(&r[2], 0, CHAN_X); 2590 } 2591 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2592 micro_mul(&r[4], &r[4], &r[1]); 2593 micro_sub(&r[4], &r[4], &r[5]); 2594 STORE(&r[4], 0, CHAN_Y); 2595 } 2596 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2597 micro_mul(&r[6], &r[6], &r[1]); 2598 micro_sub(&r[6], &r[6], &r[7]); 2599 STORE(&r[6], 0, CHAN_Z); 2600 } 2601 } 2602 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2603 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2604 } 2605 break; 2606 2607 case TGSI_OPCODE_SEQ: 2608 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2609 FETCH( &r[0], 0, chan_index ); 2610 FETCH( &r[1], 1, chan_index ); 2611 micro_eq( &r[0], &r[0], &r[1], 2612 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2613 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2614 STORE( &r[0], 0, chan_index ); 2615 } 2616 break; 2617 2618 case TGSI_OPCODE_SFL: 2619 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2620 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2621 } 2622 break; 2623 2624 case TGSI_OPCODE_SGT: 2625 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2626 FETCH( &r[0], 0, chan_index ); 2627 FETCH( &r[1], 1, chan_index ); 2628 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2629 STORE( &r[0], 0, chan_index ); 2630 } 2631 break; 2632 2633 case TGSI_OPCODE_SIN: 2634 FETCH( &r[0], 0, CHAN_X ); 2635 micro_sin( &r[0], &r[0] ); 2636 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2637 STORE( &r[0], 0, chan_index ); 2638 } 2639 break; 2640 2641 case TGSI_OPCODE_SLE: 2642 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2643 FETCH( &r[0], 0, chan_index ); 2644 FETCH( &r[1], 1, chan_index ); 2645 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2646 STORE( &r[0], 0, chan_index ); 2647 } 2648 break; 2649 2650 case TGSI_OPCODE_SNE: 2651 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2652 FETCH( &r[0], 0, chan_index ); 2653 FETCH( &r[1], 1, chan_index ); 2654 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2655 STORE( &r[0], 0, chan_index ); 2656 } 2657 break; 2658 2659 case TGSI_OPCODE_STR: 2660 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2661 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2662 } 2663 break; 2664 2665 case TGSI_OPCODE_TEX: 2666 /* simple texture lookup */ 2667 /* src[0] = texcoord */ 2668 /* src[1] = sampler unit */ 2669 exec_tex(mach, inst, FALSE, FALSE); 2670 break; 2671 2672 case TGSI_OPCODE_TXB: 2673 /* Texture lookup with lod bias */ 2674 /* src[0] = texcoord (src[0].w = LOD bias) */ 2675 /* src[1] = sampler unit */ 2676 exec_tex(mach, inst, TRUE, FALSE); 2677 break; 2678 2679 case TGSI_OPCODE_TXD: 2680 /* Texture lookup with explict partial derivatives */ 2681 /* src[0] = texcoord */ 2682 /* src[1] = d[strq]/dx */ 2683 /* src[2] = d[strq]/dy */ 2684 /* src[3] = sampler unit */ 2685 assert (0); 2686 break; 2687 2688 case TGSI_OPCODE_TXL: 2689 /* Texture lookup with explit LOD */ 2690 /* src[0] = texcoord (src[0].w = LOD) */ 2691 /* src[1] = sampler unit */ 2692 exec_tex(mach, inst, TRUE, FALSE); 2693 break; 2694 2695 case TGSI_OPCODE_TXP: 2696 /* Texture lookup with projection */ 2697 /* src[0] = texcoord (src[0].w = projection) */ 2698 /* src[1] = sampler unit */ 2699 exec_tex(mach, inst, FALSE, TRUE); 2700 break; 2701 2702 case TGSI_OPCODE_UP2H: 2703 assert (0); 2704 break; 2705 2706 case TGSI_OPCODE_UP2US: 2707 assert (0); 2708 break; 2709 2710 case TGSI_OPCODE_UP4B: 2711 assert (0); 2712 break; 2713 2714 case TGSI_OPCODE_UP4UB: 2715 assert (0); 2716 break; 2717 2718 case TGSI_OPCODE_X2D: 2719 FETCH(&r[0], 1, CHAN_X); 2720 FETCH(&r[1], 1, CHAN_Y); 2721 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2722 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2723 FETCH(&r[2], 2, CHAN_X); 2724 micro_mul(&r[2], &r[2], &r[0]); 2725 FETCH(&r[3], 2, CHAN_Y); 2726 micro_mul(&r[3], &r[3], &r[1]); 2727 micro_add(&r[2], &r[2], &r[3]); 2728 FETCH(&r[3], 0, CHAN_X); 2729 micro_add(&r[2], &r[2], &r[3]); 2730 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2731 STORE(&r[2], 0, CHAN_X); 2732 } 2733 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2734 STORE(&r[2], 0, CHAN_Z); 2735 } 2736 } 2737 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2738 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2739 FETCH(&r[2], 2, CHAN_Z); 2740 micro_mul(&r[2], &r[2], &r[0]); 2741 FETCH(&r[3], 2, CHAN_W); 2742 micro_mul(&r[3], &r[3], &r[1]); 2743 micro_add(&r[2], &r[2], &r[3]); 2744 FETCH(&r[3], 0, CHAN_Y); 2745 micro_add(&r[2], &r[2], &r[3]); 2746 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2747 STORE(&r[2], 0, CHAN_Y); 2748 } 2749 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2750 STORE(&r[2], 0, CHAN_W); 2751 } 2752 } 2753 break; 2754 2755 case TGSI_OPCODE_ARA: 2756 assert (0); 2757 break; 2758 2759 case TGSI_OPCODE_BRA: 2760 assert (0); 2761 break; 2762 2763 case TGSI_OPCODE_CAL: 2764 /* skip the call if no execution channels are enabled */ 2765 if (mach->ExecMask) { 2766 /* do the call */ 2767 2768 /* push the Cond, Loop, Cont stacks */ 2769 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2770 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2771 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2772 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2773 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2774 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2775 2776 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2777 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2778 2779 /* note that PC was already incremented above */ 2780 mach->CallStack[mach->CallStackTop++] = *pc; 2781 *pc = inst->InstructionExtLabel.Label; 2782 } 2783 break; 2784 2785 case TGSI_OPCODE_RET: 2786 mach->FuncMask &= ~mach->ExecMask; 2787 UPDATE_EXEC_MASK(mach); 2788 2789 if (mach->FuncMask == 0x0) { 2790 /* really return now (otherwise, keep executing */ 2791 2792 if (mach->CallStackTop == 0) { 2793 /* returning from main() */ 2794 *pc = -1; 2795 return; 2796 } 2797 *pc = mach->CallStack[--mach->CallStackTop]; 2798 2799 /* pop the Cond, Loop, Cont stacks */ 2800 assert(mach->CondStackTop > 0); 2801 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2802 assert(mach->LoopStackTop > 0); 2803 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2804 assert(mach->ContStackTop > 0); 2805 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2806 assert(mach->FuncStackTop > 0); 2807 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2808 2809 UPDATE_EXEC_MASK(mach); 2810 } 2811 break; 2812 2813 case TGSI_OPCODE_SSG: 2814 /* TGSI_OPCODE_SGN */ 2815 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2816 FETCH( &r[0], 0, chan_index ); 2817 micro_sgn( &r[0], &r[0] ); 2818 STORE( &r[0], 0, chan_index ); 2819 } 2820 break; 2821 2822 case TGSI_OPCODE_CMP: 2823 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2824 FETCH(&r[0], 0, chan_index); 2825 FETCH(&r[1], 1, chan_index); 2826 FETCH(&r[2], 2, chan_index); 2827 2828 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2829 2830 STORE(&r[0], 0, chan_index); 2831 } 2832 break; 2833 2834 case TGSI_OPCODE_SCS: 2835 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2836 FETCH( &r[0], 0, CHAN_X ); 2837 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2838 micro_cos(&r[1], &r[0]); 2839 STORE(&r[1], 0, CHAN_X); 2840 } 2841 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2842 micro_sin(&r[1], &r[0]); 2843 STORE(&r[1], 0, CHAN_Y); 2844 } 2845 } 2846 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2847 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2848 } 2849 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2850 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2851 } 2852 break; 2853 2854 case TGSI_OPCODE_NRM: 2855 /* 3-component vector normalize */ 2856 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2857 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2858 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2859 /* r3 = sqrt(dp3(src0, src0)) */ 2860 FETCH(&r[0], 0, CHAN_X); 2861 micro_mul(&r[3], &r[0], &r[0]); 2862 FETCH(&r[1], 0, CHAN_Y); 2863 micro_mul(&r[4], &r[1], &r[1]); 2864 micro_add(&r[3], &r[3], &r[4]); 2865 FETCH(&r[2], 0, CHAN_Z); 2866 micro_mul(&r[4], &r[2], &r[2]); 2867 micro_add(&r[3], &r[3], &r[4]); 2868 micro_sqrt(&r[3], &r[3]); 2869 2870 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2871 micro_div(&r[0], &r[0], &r[3]); 2872 STORE(&r[0], 0, CHAN_X); 2873 } 2874 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2875 micro_div(&r[1], &r[1], &r[3]); 2876 STORE(&r[1], 0, CHAN_Y); 2877 } 2878 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2879 micro_div(&r[2], &r[2], &r[3]); 2880 STORE(&r[2], 0, CHAN_Z); 2881 } 2882 } 2883 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2884 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2885 } 2886 break; 2887 2888 case TGSI_OPCODE_NRM4: 2889 /* 4-component vector normalize */ 2890 { 2891 union tgsi_exec_channel tmp, dot; 2892 2893 /* tmp = dp4(src0, src0): */ 2894 FETCH( &r[0], 0, CHAN_X ); 2895 micro_mul( &tmp, &r[0], &r[0] ); 2896 2897 FETCH( &r[1], 0, CHAN_Y ); 2898 micro_mul( &dot, &r[1], &r[1] ); 2899 micro_add( &tmp, &tmp, &dot ); 2900 2901 FETCH( &r[2], 0, CHAN_Z ); 2902 micro_mul( &dot, &r[2], &r[2] ); 2903 micro_add( &tmp, &tmp, &dot ); 2904 2905 FETCH( &r[3], 0, CHAN_W ); 2906 micro_mul( &dot, &r[3], &r[3] ); 2907 micro_add( &tmp, &tmp, &dot ); 2908 2909 /* tmp = 1 / sqrt(tmp) */ 2910 micro_sqrt( &tmp, &tmp ); 2911 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2912 2913 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2914 /* chan = chan * tmp */ 2915 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2916 STORE( &r[chan_index], 0, chan_index ); 2917 } 2918 } 2919 break; 2920 2921 case TGSI_OPCODE_DIV: 2922 assert( 0 ); 2923 break; 2924 2925 case TGSI_OPCODE_DP2: 2926 FETCH( &r[0], 0, CHAN_X ); 2927 FETCH( &r[1], 1, CHAN_X ); 2928 micro_mul( &r[0], &r[0], &r[1] ); 2929 2930 FETCH( &r[1], 0, CHAN_Y ); 2931 FETCH( &r[2], 1, CHAN_Y ); 2932 micro_mul( &r[1], &r[1], &r[2] ); 2933 micro_add( &r[0], &r[0], &r[1] ); 2934 2935 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2936 STORE( &r[0], 0, chan_index ); 2937 } 2938 break; 2939 2940 case TGSI_OPCODE_IF: 2941 /* push CondMask */ 2942 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2943 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2944 FETCH( &r[0], 0, CHAN_X ); 2945 /* update CondMask */ 2946 if( ! r[0].u[0] ) { 2947 mach->CondMask &= ~0x1; 2948 } 2949 if( ! r[0].u[1] ) { 2950 mach->CondMask &= ~0x2; 2951 } 2952 if( ! r[0].u[2] ) { 2953 mach->CondMask &= ~0x4; 2954 } 2955 if( ! r[0].u[3] ) { 2956 mach->CondMask &= ~0x8; 2957 } 2958 UPDATE_EXEC_MASK(mach); 2959 /* Todo: If CondMask==0, jump to ELSE */ 2960 break; 2961 2962 case TGSI_OPCODE_ELSE: 2963 /* invert CondMask wrt previous mask */ 2964 { 2965 uint prevMask; 2966 assert(mach->CondStackTop > 0); 2967 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2968 mach->CondMask = ~mach->CondMask & prevMask; 2969 UPDATE_EXEC_MASK(mach); 2970 /* Todo: If CondMask==0, jump to ENDIF */ 2971 } 2972 break; 2973 2974 case TGSI_OPCODE_ENDIF: 2975 /* pop CondMask */ 2976 assert(mach->CondStackTop > 0); 2977 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2978 UPDATE_EXEC_MASK(mach); 2979 break; 2980 2981 case TGSI_OPCODE_END: 2982 /* halt execution */ 2983 *pc = -1; 2984 break; 2985 2986 case TGSI_OPCODE_REP: 2987 assert (0); 2988 break; 2989 2990 case TGSI_OPCODE_ENDREP: 2991 assert (0); 2992 break; 2993 2994 case TGSI_OPCODE_PUSHA: 2995 assert (0); 2996 break; 2997 2998 case TGSI_OPCODE_POPA: 2999 assert (0); 3000 break; 3001 3002 case TGSI_OPCODE_CEIL: 3003 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3004 FETCH( &r[0], 0, chan_index ); 3005 micro_ceil( &r[0], &r[0] ); 3006 STORE( &r[0], 0, chan_index ); 3007 } 3008 break; 3009 3010 case TGSI_OPCODE_I2F: 3011 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3012 FETCH( &r[0], 0, chan_index ); 3013 micro_i2f( &r[0], &r[0] ); 3014 STORE( &r[0], 0, chan_index ); 3015 } 3016 break; 3017 3018 case TGSI_OPCODE_NOT: 3019 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3020 FETCH( &r[0], 0, chan_index ); 3021 micro_not( &r[0], &r[0] ); 3022 STORE( &r[0], 0, chan_index ); 3023 } 3024 break; 3025 3026 case TGSI_OPCODE_TRUNC: 3027 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3028 FETCH( &r[0], 0, chan_index ); 3029 micro_trunc( &r[0], &r[0] ); 3030 STORE( &r[0], 0, chan_index ); 3031 } 3032 break; 3033 3034 case TGSI_OPCODE_SHL: 3035 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3036 FETCH( &r[0], 0, chan_index ); 3037 FETCH( &r[1], 1, chan_index ); 3038 micro_shl( &r[0], &r[0], &r[1] ); 3039 STORE( &r[0], 0, chan_index ); 3040 } 3041 break; 3042 3043 case TGSI_OPCODE_SHR: 3044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3045 FETCH( &r[0], 0, chan_index ); 3046 FETCH( &r[1], 1, chan_index ); 3047 micro_ishr( &r[0], &r[0], &r[1] ); 3048 STORE( &r[0], 0, chan_index ); 3049 } 3050 break; 3051 3052 case TGSI_OPCODE_AND: 3053 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3054 FETCH( &r[0], 0, chan_index ); 3055 FETCH( &r[1], 1, chan_index ); 3056 micro_and( &r[0], &r[0], &r[1] ); 3057 STORE( &r[0], 0, chan_index ); 3058 } 3059 break; 3060 3061 case TGSI_OPCODE_OR: 3062 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3063 FETCH( &r[0], 0, chan_index ); 3064 FETCH( &r[1], 1, chan_index ); 3065 micro_or( &r[0], &r[0], &r[1] ); 3066 STORE( &r[0], 0, chan_index ); 3067 } 3068 break; 3069 3070 case TGSI_OPCODE_MOD: 3071 assert (0); 3072 break; 3073 3074 case TGSI_OPCODE_XOR: 3075 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3076 FETCH( &r[0], 0, chan_index ); 3077 FETCH( &r[1], 1, chan_index ); 3078 micro_xor( &r[0], &r[0], &r[1] ); 3079 STORE( &r[0], 0, chan_index ); 3080 } 3081 break; 3082 3083 case TGSI_OPCODE_SAD: 3084 assert (0); 3085 break; 3086 3087 case TGSI_OPCODE_TXF: 3088 assert (0); 3089 break; 3090 3091 case TGSI_OPCODE_TXQ: 3092 assert (0); 3093 break; 3094 3095 case TGSI_OPCODE_EMIT: 3096 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 3097 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 3098 break; 3099 3100 case TGSI_OPCODE_ENDPRIM: 3101 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 3102 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 3103 break; 3104 3105 case TGSI_OPCODE_LOOP: 3106 /* fall-through (for now) */ 3107 case TGSI_OPCODE_BGNLOOP2: 3108 /* push LoopMask and ContMasks */ 3109 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3110 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3111 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3112 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3113 break; 3114 3115 case TGSI_OPCODE_ENDLOOP: 3116 /* fall-through (for now at least) */ 3117 case TGSI_OPCODE_ENDLOOP2: 3118 /* Restore ContMask, but don't pop */ 3119 assert(mach->ContStackTop > 0); 3120 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3121 UPDATE_EXEC_MASK(mach); 3122 if (mach->ExecMask) { 3123 /* repeat loop: jump to instruction just past BGNLOOP */ 3124 *pc = inst->InstructionExtLabel.Label + 1; 3125 } 3126 else { 3127 /* exit loop: pop LoopMask */ 3128 assert(mach->LoopStackTop > 0); 3129 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3130 /* pop ContMask */ 3131 assert(mach->ContStackTop > 0); 3132 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3133 } 3134 UPDATE_EXEC_MASK(mach); 3135 break; 3136 3137 case TGSI_OPCODE_BRK: 3138 /* turn off loop channels for each enabled exec channel */ 3139 mach->LoopMask &= ~mach->ExecMask; 3140 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3141 UPDATE_EXEC_MASK(mach); 3142 break; 3143 3144 case TGSI_OPCODE_CONT: 3145 /* turn off cont channels for each enabled exec channel */ 3146 mach->ContMask &= ~mach->ExecMask; 3147 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3148 UPDATE_EXEC_MASK(mach); 3149 break; 3150 3151 case TGSI_OPCODE_BGNSUB: 3152 /* no-op */ 3153 break; 3154 3155 case TGSI_OPCODE_ENDSUB: 3156 /* no-op */ 3157 break; 3158 3159 case TGSI_OPCODE_NOISE1: 3160 assert( 0 ); 3161 break; 3162 3163 case TGSI_OPCODE_NOISE2: 3164 assert( 0 ); 3165 break; 3166 3167 case TGSI_OPCODE_NOISE3: 3168 assert( 0 ); 3169 break; 3170 3171 case TGSI_OPCODE_NOISE4: 3172 assert( 0 ); 3173 break; 3174 3175 case TGSI_OPCODE_NOP: 3176 break; 3177 3178 default: 3179 assert( 0 ); 3180 } 3181} 3182 3183 3184/** 3185 * Run TGSI interpreter. 3186 * \return bitmask of "alive" quad components 3187 */ 3188uint 3189tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3190{ 3191 uint i; 3192 int pc = 0; 3193 3194 mach->CondMask = 0xf; 3195 mach->LoopMask = 0xf; 3196 mach->ContMask = 0xf; 3197 mach->FuncMask = 0xf; 3198 mach->ExecMask = 0xf; 3199 3200 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 3201 assert(mach->CondStackTop == 0); 3202 assert(mach->LoopStackTop == 0); 3203 assert(mach->ContStackTop == 0); 3204 assert(mach->CallStackTop == 0); 3205 3206 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3207 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3208 3209 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3210 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3211 mach->Primitives[0] = 0; 3212 } 3213 3214 for (i = 0; i < QUAD_SIZE; i++) { 3215 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3216 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3217 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3218 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3219 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3220 } 3221 3222 /* execute declarations (interpolants) */ 3223 for (i = 0; i < mach->NumDeclarations; i++) { 3224 exec_declaration( mach, mach->Declarations+i ); 3225 } 3226 3227 /* execute instructions, until pc is set to -1 */ 3228 while (pc != -1) { 3229 assert(pc < (int) mach->NumInstructions); 3230 exec_instruction( mach, mach->Instructions + pc, &pc ); 3231 } 3232 3233#if 0 3234 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3235 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3236 /* 3237 * Scale back depth component. 3238 */ 3239 for (i = 0; i < 4; i++) 3240 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3241 } 3242#endif 3243 3244 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3245} 3246