tgsi_exec.c revision cde758a2b50da8d7a8db5467f5629ce366380c41
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65#define TILE_TOP_LEFT 0 66#define TILE_TOP_RIGHT 1 67#define TILE_BOTTOM_LEFT 2 68#define TILE_BOTTOM_RIGHT 3 69 70#define CHAN_X 0 71#define CHAN_Y 1 72#define CHAN_Z 2 73#define CHAN_W 3 74 75/* 76 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 77 */ 78#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 79#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 80#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 81#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 82#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 83#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 84#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 85#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 86#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 87#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 88#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 89#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 90#define TEMP_128_I TGSI_EXEC_TEMP_128_I 91#define TEMP_128_C TGSI_EXEC_TEMP_128_C 92#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 93#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 94#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 95#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 96#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 97#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 98#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 99#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 100#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 101#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 102#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 103#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 104#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 105#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 106#define TEMP_R0 TGSI_EXEC_TEMP_R0 107#define TEMP_P0 TGSI_EXEC_TEMP_P0 108 109#define IS_CHANNEL_ENABLED(INST, CHAN)\ 110 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 111 112#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 113 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 114 115#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 116 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 117 if (IS_CHANNEL_ENABLED( INST, CHAN )) 118 119#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 120 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 121 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 122 123 124/** The execution mask depends on the conditional mask and the loop mask */ 125#define UPDATE_EXEC_MASK(MACH) \ 126 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 127 128 129static const union tgsi_exec_channel ZeroVec = 130 { { 0.0, 0.0, 0.0, 0.0 } }; 131 132 133#ifdef DEBUG 134static void 135check_inf_or_nan(const union tgsi_exec_channel *chan) 136{ 137 assert(!util_is_inf_or_nan(chan->f[0])); 138 assert(!util_is_inf_or_nan(chan->f[1])); 139 assert(!util_is_inf_or_nan(chan->f[2])); 140 assert(!util_is_inf_or_nan(chan->f[3])); 141} 142#endif 143 144 145#ifdef DEBUG 146static void 147print_chan(const char *msg, const union tgsi_exec_channel *chan) 148{ 149 debug_printf("%s = {%f, %f, %f, %f}\n", 150 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 151} 152#endif 153 154 155#ifdef DEBUG 156static void 157print_temp(const struct tgsi_exec_machine *mach, uint index) 158{ 159 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 160 int i; 161 debug_printf("Temp[%u] =\n", index); 162 for (i = 0; i < 4; i++) { 163 debug_printf(" %c: { %f, %f, %f, %f }\n", 164 "XYZW"[i], 165 tmp->xyzw[i].f[0], 166 tmp->xyzw[i].f[1], 167 tmp->xyzw[i].f[2], 168 tmp->xyzw[i].f[3]); 169 } 170} 171#endif 172 173 174/** 175 * Check if there's a potential src/dst register data dependency when 176 * using SOA execution. 177 * Example: 178 * MOV T, T.yxwz; 179 * This would expand into: 180 * MOV t0, t1; 181 * MOV t1, t0; 182 * MOV t2, t3; 183 * MOV t3, t2; 184 * The second instruction will have the wrong value for t0 if executed as-is. 185 */ 186boolean 187tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 188{ 189 uint i, chan; 190 191 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 192 if (writemask == TGSI_WRITEMASK_X || 193 writemask == TGSI_WRITEMASK_Y || 194 writemask == TGSI_WRITEMASK_Z || 195 writemask == TGSI_WRITEMASK_W || 196 writemask == TGSI_WRITEMASK_NONE) { 197 /* no chance of data dependency */ 198 return FALSE; 199 } 200 201 /* loop over src regs */ 202 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 203 if ((inst->FullSrcRegisters[i].SrcRegister.File == 204 inst->FullDstRegisters[0].DstRegister.File) && 205 (inst->FullSrcRegisters[i].SrcRegister.Index == 206 inst->FullDstRegisters[0].DstRegister.Index)) { 207 /* loop over dest channels */ 208 uint channelsWritten = 0x0; 209 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 210 /* check if we're reading a channel that's been written */ 211 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); 212 if (channelsWritten & (1 << swizzle)) { 213 return TRUE; 214 } 215 216 channelsWritten |= (1 << chan); 217 } 218 } 219 } 220 return FALSE; 221} 222 223 224/** 225 * Initialize machine state by expanding tokens to full instructions, 226 * allocating temporary storage, setting up constants, etc. 227 * After this, we can call tgsi_exec_machine_run() many times. 228 */ 229void 230tgsi_exec_machine_bind_shader( 231 struct tgsi_exec_machine *mach, 232 const struct tgsi_token *tokens, 233 uint numSamplers, 234 struct tgsi_sampler **samplers) 235{ 236 uint k; 237 struct tgsi_parse_context parse; 238 struct tgsi_exec_labels *labels = &mach->Labels; 239 struct tgsi_full_instruction *instructions; 240 struct tgsi_full_declaration *declarations; 241 uint maxInstructions = 10, numInstructions = 0; 242 uint maxDeclarations = 10, numDeclarations = 0; 243 uint instno = 0; 244 245#if 0 246 tgsi_dump(tokens, 0); 247#endif 248 249 util_init_math(); 250 251 mach->Tokens = tokens; 252 mach->Samplers = samplers; 253 254 k = tgsi_parse_init (&parse, mach->Tokens); 255 if (k != TGSI_PARSE_OK) { 256 debug_printf( "Problem parsing!\n" ); 257 return; 258 } 259 260 mach->Processor = parse.FullHeader.Processor.Processor; 261 mach->ImmLimit = 0; 262 labels->count = 0; 263 264 declarations = (struct tgsi_full_declaration *) 265 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 266 267 if (!declarations) { 268 return; 269 } 270 271 instructions = (struct tgsi_full_instruction *) 272 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 273 274 if (!instructions) { 275 FREE( declarations ); 276 return; 277 } 278 279 while( !tgsi_parse_end_of_tokens( &parse ) ) { 280 uint pointer = parse.Position; 281 uint i; 282 283 tgsi_parse_token( &parse ); 284 switch( parse.FullToken.Token.Type ) { 285 case TGSI_TOKEN_TYPE_DECLARATION: 286 /* save expanded declaration */ 287 if (numDeclarations == maxDeclarations) { 288 declarations = REALLOC(declarations, 289 maxDeclarations 290 * sizeof(struct tgsi_full_declaration), 291 (maxDeclarations + 10) 292 * sizeof(struct tgsi_full_declaration)); 293 maxDeclarations += 10; 294 } 295 memcpy(declarations + numDeclarations, 296 &parse.FullToken.FullDeclaration, 297 sizeof(declarations[0])); 298 numDeclarations++; 299 break; 300 301 case TGSI_TOKEN_TYPE_IMMEDIATE: 302 { 303 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 304 assert( size <= 4 ); 305 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 306 307 for( i = 0; i < size; i++ ) { 308 mach->Imms[mach->ImmLimit][i] = 309 parse.FullToken.FullImmediate.u[i].Float; 310 } 311 mach->ImmLimit += 1; 312 } 313 break; 314 315 case TGSI_TOKEN_TYPE_INSTRUCTION: 316 assert( labels->count < MAX_LABELS ); 317 318 labels->labels[labels->count][0] = instno; 319 labels->labels[labels->count][1] = pointer; 320 labels->count++; 321 322 /* save expanded instruction */ 323 if (numInstructions == maxInstructions) { 324 instructions = REALLOC(instructions, 325 maxInstructions 326 * sizeof(struct tgsi_full_instruction), 327 (maxInstructions + 10) 328 * sizeof(struct tgsi_full_instruction)); 329 maxInstructions += 10; 330 } 331 332 memcpy(instructions + numInstructions, 333 &parse.FullToken.FullInstruction, 334 sizeof(instructions[0])); 335 336 numInstructions++; 337 break; 338 339 default: 340 assert( 0 ); 341 } 342 } 343 tgsi_parse_free (&parse); 344 345 if (mach->Declarations) { 346 FREE( mach->Declarations ); 347 } 348 mach->Declarations = declarations; 349 mach->NumDeclarations = numDeclarations; 350 351 if (mach->Instructions) { 352 FREE( mach->Instructions ); 353 } 354 mach->Instructions = instructions; 355 mach->NumInstructions = numInstructions; 356} 357 358 359struct tgsi_exec_machine * 360tgsi_exec_machine_create( void ) 361{ 362 struct tgsi_exec_machine *mach; 363 uint i; 364 365 mach = align_malloc( sizeof *mach, 16 ); 366 if (!mach) 367 goto fail; 368 369 memset(mach, 0, sizeof(*mach)); 370 371 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 372 373 /* Setup constants. */ 374 for( i = 0; i < 4; i++ ) { 375 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 376 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 377 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 378 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 379 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 380 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 381 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 382 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 383 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 384 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 385 } 386 387#ifdef DEBUG 388 /* silence warnings */ 389 (void) print_chan; 390 (void) print_temp; 391#endif 392 393 return mach; 394 395fail: 396 align_free(mach); 397 return NULL; 398} 399 400 401void 402tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 403{ 404 if (mach) { 405 FREE(mach->Instructions); 406 FREE(mach->Declarations); 407 } 408 409 align_free(mach); 410} 411 412 413static void 414micro_abs( 415 union tgsi_exec_channel *dst, 416 const union tgsi_exec_channel *src ) 417{ 418 dst->f[0] = fabsf( src->f[0] ); 419 dst->f[1] = fabsf( src->f[1] ); 420 dst->f[2] = fabsf( src->f[2] ); 421 dst->f[3] = fabsf( src->f[3] ); 422} 423 424static void 425micro_add( 426 union tgsi_exec_channel *dst, 427 const union tgsi_exec_channel *src0, 428 const union tgsi_exec_channel *src1 ) 429{ 430 dst->f[0] = src0->f[0] + src1->f[0]; 431 dst->f[1] = src0->f[1] + src1->f[1]; 432 dst->f[2] = src0->f[2] + src1->f[2]; 433 dst->f[3] = src0->f[3] + src1->f[3]; 434} 435 436#if 0 437static void 438micro_iadd( 439 union tgsi_exec_channel *dst, 440 const union tgsi_exec_channel *src0, 441 const union tgsi_exec_channel *src1 ) 442{ 443 dst->i[0] = src0->i[0] + src1->i[0]; 444 dst->i[1] = src0->i[1] + src1->i[1]; 445 dst->i[2] = src0->i[2] + src1->i[2]; 446 dst->i[3] = src0->i[3] + src1->i[3]; 447} 448#endif 449 450static void 451micro_and( 452 union tgsi_exec_channel *dst, 453 const union tgsi_exec_channel *src0, 454 const union tgsi_exec_channel *src1 ) 455{ 456 dst->u[0] = src0->u[0] & src1->u[0]; 457 dst->u[1] = src0->u[1] & src1->u[1]; 458 dst->u[2] = src0->u[2] & src1->u[2]; 459 dst->u[3] = src0->u[3] & src1->u[3]; 460} 461 462static void 463micro_ceil( 464 union tgsi_exec_channel *dst, 465 const union tgsi_exec_channel *src ) 466{ 467 dst->f[0] = ceilf( src->f[0] ); 468 dst->f[1] = ceilf( src->f[1] ); 469 dst->f[2] = ceilf( src->f[2] ); 470 dst->f[3] = ceilf( src->f[3] ); 471} 472 473static void 474micro_cos( 475 union tgsi_exec_channel *dst, 476 const union tgsi_exec_channel *src ) 477{ 478 dst->f[0] = cosf( src->f[0] ); 479 dst->f[1] = cosf( src->f[1] ); 480 dst->f[2] = cosf( src->f[2] ); 481 dst->f[3] = cosf( src->f[3] ); 482} 483 484static void 485micro_ddx( 486 union tgsi_exec_channel *dst, 487 const union tgsi_exec_channel *src ) 488{ 489 dst->f[0] = 490 dst->f[1] = 491 dst->f[2] = 492 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 493} 494 495static void 496micro_ddy( 497 union tgsi_exec_channel *dst, 498 const union tgsi_exec_channel *src ) 499{ 500 dst->f[0] = 501 dst->f[1] = 502 dst->f[2] = 503 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 504} 505 506static void 507micro_div( 508 union tgsi_exec_channel *dst, 509 const union tgsi_exec_channel *src0, 510 const union tgsi_exec_channel *src1 ) 511{ 512 if (src1->f[0] != 0) { 513 dst->f[0] = src0->f[0] / src1->f[0]; 514 } 515 if (src1->f[1] != 0) { 516 dst->f[1] = src0->f[1] / src1->f[1]; 517 } 518 if (src1->f[2] != 0) { 519 dst->f[2] = src0->f[2] / src1->f[2]; 520 } 521 if (src1->f[3] != 0) { 522 dst->f[3] = src0->f[3] / src1->f[3]; 523 } 524} 525 526#if 0 527static void 528micro_udiv( 529 union tgsi_exec_channel *dst, 530 const union tgsi_exec_channel *src0, 531 const union tgsi_exec_channel *src1 ) 532{ 533 dst->u[0] = src0->u[0] / src1->u[0]; 534 dst->u[1] = src0->u[1] / src1->u[1]; 535 dst->u[2] = src0->u[2] / src1->u[2]; 536 dst->u[3] = src0->u[3] / src1->u[3]; 537} 538#endif 539 540static void 541micro_eq( 542 union tgsi_exec_channel *dst, 543 const union tgsi_exec_channel *src0, 544 const union tgsi_exec_channel *src1, 545 const union tgsi_exec_channel *src2, 546 const union tgsi_exec_channel *src3 ) 547{ 548 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 549 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 550 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 551 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 552} 553 554#if 0 555static void 556micro_ieq( 557 union tgsi_exec_channel *dst, 558 const union tgsi_exec_channel *src0, 559 const union tgsi_exec_channel *src1, 560 const union tgsi_exec_channel *src2, 561 const union tgsi_exec_channel *src3 ) 562{ 563 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 564 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 565 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 566 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 567} 568#endif 569 570static void 571micro_exp2( 572 union tgsi_exec_channel *dst, 573 const union tgsi_exec_channel *src) 574{ 575#if FAST_MATH 576 dst->f[0] = util_fast_exp2( src->f[0] ); 577 dst->f[1] = util_fast_exp2( src->f[1] ); 578 dst->f[2] = util_fast_exp2( src->f[2] ); 579 dst->f[3] = util_fast_exp2( src->f[3] ); 580#else 581 dst->f[0] = powf( 2.0f, src->f[0] ); 582 dst->f[1] = powf( 2.0f, src->f[1] ); 583 dst->f[2] = powf( 2.0f, src->f[2] ); 584 dst->f[3] = powf( 2.0f, src->f[3] ); 585#endif 586} 587 588#if 0 589static void 590micro_f2ut( 591 union tgsi_exec_channel *dst, 592 const union tgsi_exec_channel *src ) 593{ 594 dst->u[0] = (uint) src->f[0]; 595 dst->u[1] = (uint) src->f[1]; 596 dst->u[2] = (uint) src->f[2]; 597 dst->u[3] = (uint) src->f[3]; 598} 599#endif 600 601static void 602micro_float_clamp(union tgsi_exec_channel *dst, 603 const union tgsi_exec_channel *src) 604{ 605 uint i; 606 607 for (i = 0; i < 4; i++) { 608 if (src->f[i] > 0.0f) { 609 if (src->f[i] > 1.884467e+019f) 610 dst->f[i] = 1.884467e+019f; 611 else if (src->f[i] < 5.42101e-020f) 612 dst->f[i] = 5.42101e-020f; 613 else 614 dst->f[i] = src->f[i]; 615 } 616 else { 617 if (src->f[i] < -1.884467e+019f) 618 dst->f[i] = -1.884467e+019f; 619 else if (src->f[i] > -5.42101e-020f) 620 dst->f[i] = -5.42101e-020f; 621 else 622 dst->f[i] = src->f[i]; 623 } 624 } 625} 626 627static void 628micro_flr( 629 union tgsi_exec_channel *dst, 630 const union tgsi_exec_channel *src ) 631{ 632 dst->f[0] = floorf( src->f[0] ); 633 dst->f[1] = floorf( src->f[1] ); 634 dst->f[2] = floorf( src->f[2] ); 635 dst->f[3] = floorf( src->f[3] ); 636} 637 638static void 639micro_frc( 640 union tgsi_exec_channel *dst, 641 const union tgsi_exec_channel *src ) 642{ 643 dst->f[0] = src->f[0] - floorf( src->f[0] ); 644 dst->f[1] = src->f[1] - floorf( src->f[1] ); 645 dst->f[2] = src->f[2] - floorf( src->f[2] ); 646 dst->f[3] = src->f[3] - floorf( src->f[3] ); 647} 648 649static void 650micro_i2f( 651 union tgsi_exec_channel *dst, 652 const union tgsi_exec_channel *src ) 653{ 654 dst->f[0] = (float) src->i[0]; 655 dst->f[1] = (float) src->i[1]; 656 dst->f[2] = (float) src->i[2]; 657 dst->f[3] = (float) src->i[3]; 658} 659 660static void 661micro_lg2( 662 union tgsi_exec_channel *dst, 663 const union tgsi_exec_channel *src ) 664{ 665#if FAST_MATH 666 dst->f[0] = util_fast_log2( src->f[0] ); 667 dst->f[1] = util_fast_log2( src->f[1] ); 668 dst->f[2] = util_fast_log2( src->f[2] ); 669 dst->f[3] = util_fast_log2( src->f[3] ); 670#else 671 dst->f[0] = logf( src->f[0] ) * 1.442695f; 672 dst->f[1] = logf( src->f[1] ) * 1.442695f; 673 dst->f[2] = logf( src->f[2] ) * 1.442695f; 674 dst->f[3] = logf( src->f[3] ) * 1.442695f; 675#endif 676} 677 678static void 679micro_le( 680 union tgsi_exec_channel *dst, 681 const union tgsi_exec_channel *src0, 682 const union tgsi_exec_channel *src1, 683 const union tgsi_exec_channel *src2, 684 const union tgsi_exec_channel *src3 ) 685{ 686 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 687 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 688 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 689 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 690} 691 692static void 693micro_lt( 694 union tgsi_exec_channel *dst, 695 const union tgsi_exec_channel *src0, 696 const union tgsi_exec_channel *src1, 697 const union tgsi_exec_channel *src2, 698 const union tgsi_exec_channel *src3 ) 699{ 700 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 701 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 702 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 703 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 704} 705 706#if 0 707static void 708micro_ilt( 709 union tgsi_exec_channel *dst, 710 const union tgsi_exec_channel *src0, 711 const union tgsi_exec_channel *src1, 712 const union tgsi_exec_channel *src2, 713 const union tgsi_exec_channel *src3 ) 714{ 715 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 716 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 717 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 718 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 719} 720#endif 721 722#if 0 723static void 724micro_ult( 725 union tgsi_exec_channel *dst, 726 const union tgsi_exec_channel *src0, 727 const union tgsi_exec_channel *src1, 728 const union tgsi_exec_channel *src2, 729 const union tgsi_exec_channel *src3 ) 730{ 731 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 732 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 733 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 734 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 735} 736#endif 737 738static void 739micro_max( 740 union tgsi_exec_channel *dst, 741 const union tgsi_exec_channel *src0, 742 const union tgsi_exec_channel *src1 ) 743{ 744 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 745 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 746 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 747 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 748} 749 750#if 0 751static void 752micro_imax( 753 union tgsi_exec_channel *dst, 754 const union tgsi_exec_channel *src0, 755 const union tgsi_exec_channel *src1 ) 756{ 757 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 758 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 759 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 760 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 761} 762#endif 763 764#if 0 765static void 766micro_umax( 767 union tgsi_exec_channel *dst, 768 const union tgsi_exec_channel *src0, 769 const union tgsi_exec_channel *src1 ) 770{ 771 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 772 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 773 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 774 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 775} 776#endif 777 778static void 779micro_min( 780 union tgsi_exec_channel *dst, 781 const union tgsi_exec_channel *src0, 782 const union tgsi_exec_channel *src1 ) 783{ 784 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 785 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 786 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 787 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 788} 789 790#if 0 791static void 792micro_imin( 793 union tgsi_exec_channel *dst, 794 const union tgsi_exec_channel *src0, 795 const union tgsi_exec_channel *src1 ) 796{ 797 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 798 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 799 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 800 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 801} 802#endif 803 804#if 0 805static void 806micro_umin( 807 union tgsi_exec_channel *dst, 808 const union tgsi_exec_channel *src0, 809 const union tgsi_exec_channel *src1 ) 810{ 811 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 812 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 813 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 814 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 815} 816#endif 817 818#if 0 819static void 820micro_umod( 821 union tgsi_exec_channel *dst, 822 const union tgsi_exec_channel *src0, 823 const union tgsi_exec_channel *src1 ) 824{ 825 dst->u[0] = src0->u[0] % src1->u[0]; 826 dst->u[1] = src0->u[1] % src1->u[1]; 827 dst->u[2] = src0->u[2] % src1->u[2]; 828 dst->u[3] = src0->u[3] % src1->u[3]; 829} 830#endif 831 832static void 833micro_mul( 834 union tgsi_exec_channel *dst, 835 const union tgsi_exec_channel *src0, 836 const union tgsi_exec_channel *src1 ) 837{ 838 dst->f[0] = src0->f[0] * src1->f[0]; 839 dst->f[1] = src0->f[1] * src1->f[1]; 840 dst->f[2] = src0->f[2] * src1->f[2]; 841 dst->f[3] = src0->f[3] * src1->f[3]; 842} 843 844#if 0 845static void 846micro_imul( 847 union tgsi_exec_channel *dst, 848 const union tgsi_exec_channel *src0, 849 const union tgsi_exec_channel *src1 ) 850{ 851 dst->i[0] = src0->i[0] * src1->i[0]; 852 dst->i[1] = src0->i[1] * src1->i[1]; 853 dst->i[2] = src0->i[2] * src1->i[2]; 854 dst->i[3] = src0->i[3] * src1->i[3]; 855} 856#endif 857 858#if 0 859static void 860micro_imul64( 861 union tgsi_exec_channel *dst0, 862 union tgsi_exec_channel *dst1, 863 const union tgsi_exec_channel *src0, 864 const union tgsi_exec_channel *src1 ) 865{ 866 dst1->i[0] = src0->i[0] * src1->i[0]; 867 dst1->i[1] = src0->i[1] * src1->i[1]; 868 dst1->i[2] = src0->i[2] * src1->i[2]; 869 dst1->i[3] = src0->i[3] * src1->i[3]; 870 dst0->i[0] = 0; 871 dst0->i[1] = 0; 872 dst0->i[2] = 0; 873 dst0->i[3] = 0; 874} 875#endif 876 877#if 0 878static void 879micro_umul64( 880 union tgsi_exec_channel *dst0, 881 union tgsi_exec_channel *dst1, 882 const union tgsi_exec_channel *src0, 883 const union tgsi_exec_channel *src1 ) 884{ 885 dst1->u[0] = src0->u[0] * src1->u[0]; 886 dst1->u[1] = src0->u[1] * src1->u[1]; 887 dst1->u[2] = src0->u[2] * src1->u[2]; 888 dst1->u[3] = src0->u[3] * src1->u[3]; 889 dst0->u[0] = 0; 890 dst0->u[1] = 0; 891 dst0->u[2] = 0; 892 dst0->u[3] = 0; 893} 894#endif 895 896 897#if 0 898static void 899micro_movc( 900 union tgsi_exec_channel *dst, 901 const union tgsi_exec_channel *src0, 902 const union tgsi_exec_channel *src1, 903 const union tgsi_exec_channel *src2 ) 904{ 905 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 906 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 907 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 908 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 909} 910#endif 911 912static void 913micro_neg( 914 union tgsi_exec_channel *dst, 915 const union tgsi_exec_channel *src ) 916{ 917 dst->f[0] = -src->f[0]; 918 dst->f[1] = -src->f[1]; 919 dst->f[2] = -src->f[2]; 920 dst->f[3] = -src->f[3]; 921} 922 923#if 0 924static void 925micro_ineg( 926 union tgsi_exec_channel *dst, 927 const union tgsi_exec_channel *src ) 928{ 929 dst->i[0] = -src->i[0]; 930 dst->i[1] = -src->i[1]; 931 dst->i[2] = -src->i[2]; 932 dst->i[3] = -src->i[3]; 933} 934#endif 935 936static void 937micro_not( 938 union tgsi_exec_channel *dst, 939 const union tgsi_exec_channel *src ) 940{ 941 dst->u[0] = ~src->u[0]; 942 dst->u[1] = ~src->u[1]; 943 dst->u[2] = ~src->u[2]; 944 dst->u[3] = ~src->u[3]; 945} 946 947static void 948micro_or( 949 union tgsi_exec_channel *dst, 950 const union tgsi_exec_channel *src0, 951 const union tgsi_exec_channel *src1 ) 952{ 953 dst->u[0] = src0->u[0] | src1->u[0]; 954 dst->u[1] = src0->u[1] | src1->u[1]; 955 dst->u[2] = src0->u[2] | src1->u[2]; 956 dst->u[3] = src0->u[3] | src1->u[3]; 957} 958 959static void 960micro_pow( 961 union tgsi_exec_channel *dst, 962 const union tgsi_exec_channel *src0, 963 const union tgsi_exec_channel *src1 ) 964{ 965#if FAST_MATH 966 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 967 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 968 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 969 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 970#else 971 dst->f[0] = powf( src0->f[0], src1->f[0] ); 972 dst->f[1] = powf( src0->f[1], src1->f[1] ); 973 dst->f[2] = powf( src0->f[2], src1->f[2] ); 974 dst->f[3] = powf( src0->f[3], src1->f[3] ); 975#endif 976} 977 978static void 979micro_rnd( 980 union tgsi_exec_channel *dst, 981 const union tgsi_exec_channel *src ) 982{ 983 dst->f[0] = floorf( src->f[0] + 0.5f ); 984 dst->f[1] = floorf( src->f[1] + 0.5f ); 985 dst->f[2] = floorf( src->f[2] + 0.5f ); 986 dst->f[3] = floorf( src->f[3] + 0.5f ); 987} 988 989static void 990micro_sgn( 991 union tgsi_exec_channel *dst, 992 const union tgsi_exec_channel *src ) 993{ 994 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 995 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 996 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 997 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 998} 999 1000static void 1001micro_shl( 1002 union tgsi_exec_channel *dst, 1003 const union tgsi_exec_channel *src0, 1004 const union tgsi_exec_channel *src1 ) 1005{ 1006 dst->i[0] = src0->i[0] << src1->i[0]; 1007 dst->i[1] = src0->i[1] << src1->i[1]; 1008 dst->i[2] = src0->i[2] << src1->i[2]; 1009 dst->i[3] = src0->i[3] << src1->i[3]; 1010} 1011 1012static void 1013micro_ishr( 1014 union tgsi_exec_channel *dst, 1015 const union tgsi_exec_channel *src0, 1016 const union tgsi_exec_channel *src1 ) 1017{ 1018 dst->i[0] = src0->i[0] >> src1->i[0]; 1019 dst->i[1] = src0->i[1] >> src1->i[1]; 1020 dst->i[2] = src0->i[2] >> src1->i[2]; 1021 dst->i[3] = src0->i[3] >> src1->i[3]; 1022} 1023 1024static void 1025micro_trunc( 1026 union tgsi_exec_channel *dst, 1027 const union tgsi_exec_channel *src0 ) 1028{ 1029 dst->f[0] = (float) (int) src0->f[0]; 1030 dst->f[1] = (float) (int) src0->f[1]; 1031 dst->f[2] = (float) (int) src0->f[2]; 1032 dst->f[3] = (float) (int) src0->f[3]; 1033} 1034 1035#if 0 1036static void 1037micro_ushr( 1038 union tgsi_exec_channel *dst, 1039 const union tgsi_exec_channel *src0, 1040 const union tgsi_exec_channel *src1 ) 1041{ 1042 dst->u[0] = src0->u[0] >> src1->u[0]; 1043 dst->u[1] = src0->u[1] >> src1->u[1]; 1044 dst->u[2] = src0->u[2] >> src1->u[2]; 1045 dst->u[3] = src0->u[3] >> src1->u[3]; 1046} 1047#endif 1048 1049static void 1050micro_sin( 1051 union tgsi_exec_channel *dst, 1052 const union tgsi_exec_channel *src ) 1053{ 1054 dst->f[0] = sinf( src->f[0] ); 1055 dst->f[1] = sinf( src->f[1] ); 1056 dst->f[2] = sinf( src->f[2] ); 1057 dst->f[3] = sinf( src->f[3] ); 1058} 1059 1060static void 1061micro_sqrt( union tgsi_exec_channel *dst, 1062 const union tgsi_exec_channel *src ) 1063{ 1064 dst->f[0] = sqrtf( src->f[0] ); 1065 dst->f[1] = sqrtf( src->f[1] ); 1066 dst->f[2] = sqrtf( src->f[2] ); 1067 dst->f[3] = sqrtf( src->f[3] ); 1068} 1069 1070static void 1071micro_sub( 1072 union tgsi_exec_channel *dst, 1073 const union tgsi_exec_channel *src0, 1074 const union tgsi_exec_channel *src1 ) 1075{ 1076 dst->f[0] = src0->f[0] - src1->f[0]; 1077 dst->f[1] = src0->f[1] - src1->f[1]; 1078 dst->f[2] = src0->f[2] - src1->f[2]; 1079 dst->f[3] = src0->f[3] - src1->f[3]; 1080} 1081 1082#if 0 1083static void 1084micro_u2f( 1085 union tgsi_exec_channel *dst, 1086 const union tgsi_exec_channel *src ) 1087{ 1088 dst->f[0] = (float) src->u[0]; 1089 dst->f[1] = (float) src->u[1]; 1090 dst->f[2] = (float) src->u[2]; 1091 dst->f[3] = (float) src->u[3]; 1092} 1093#endif 1094 1095static void 1096micro_xor( 1097 union tgsi_exec_channel *dst, 1098 const union tgsi_exec_channel *src0, 1099 const union tgsi_exec_channel *src1 ) 1100{ 1101 dst->u[0] = src0->u[0] ^ src1->u[0]; 1102 dst->u[1] = src0->u[1] ^ src1->u[1]; 1103 dst->u[2] = src0->u[2] ^ src1->u[2]; 1104 dst->u[3] = src0->u[3] ^ src1->u[3]; 1105} 1106 1107static void 1108fetch_src_file_channel( 1109 const struct tgsi_exec_machine *mach, 1110 const uint file, 1111 const uint swizzle, 1112 const union tgsi_exec_channel *index, 1113 union tgsi_exec_channel *chan ) 1114{ 1115 switch( swizzle ) { 1116 case TGSI_SWIZZLE_X: 1117 case TGSI_SWIZZLE_Y: 1118 case TGSI_SWIZZLE_Z: 1119 case TGSI_SWIZZLE_W: 1120 switch( file ) { 1121 case TGSI_FILE_CONSTANT: 1122 assert(mach->Consts); 1123 if (index->i[0] < 0) 1124 chan->f[0] = 0.0f; 1125 else 1126 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1127 if (index->i[1] < 0) 1128 chan->f[1] = 0.0f; 1129 else 1130 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1131 if (index->i[2] < 0) 1132 chan->f[2] = 0.0f; 1133 else 1134 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1135 if (index->i[3] < 0) 1136 chan->f[3] = 0.0f; 1137 else 1138 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1139 break; 1140 1141 case TGSI_FILE_INPUT: 1142 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1143 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1144 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1145 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1146 break; 1147 1148 case TGSI_FILE_TEMPORARY: 1149 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1150 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1151 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1152 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1153 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1154 break; 1155 1156 case TGSI_FILE_IMMEDIATE: 1157 assert( index->i[0] < (int) mach->ImmLimit ); 1158 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1159 assert( index->i[1] < (int) mach->ImmLimit ); 1160 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1161 assert( index->i[2] < (int) mach->ImmLimit ); 1162 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1163 assert( index->i[3] < (int) mach->ImmLimit ); 1164 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1165 break; 1166 1167 case TGSI_FILE_ADDRESS: 1168 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1169 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1170 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1171 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1172 break; 1173 1174 case TGSI_FILE_PREDICATE: 1175 assert(index->i[0] < TGSI_EXEC_NUM_PREDS); 1176 assert(index->i[1] < TGSI_EXEC_NUM_PREDS); 1177 assert(index->i[2] < TGSI_EXEC_NUM_PREDS); 1178 assert(index->i[3] < TGSI_EXEC_NUM_PREDS); 1179 chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0]; 1180 chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1]; 1181 chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2]; 1182 chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3]; 1183 break; 1184 1185 case TGSI_FILE_OUTPUT: 1186 /* vertex/fragment output vars can be read too */ 1187 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1188 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1189 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1190 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1191 break; 1192 1193 default: 1194 assert( 0 ); 1195 } 1196 break; 1197 1198 default: 1199 assert( 0 ); 1200 } 1201} 1202 1203static void 1204fetch_source( 1205 const struct tgsi_exec_machine *mach, 1206 union tgsi_exec_channel *chan, 1207 const struct tgsi_full_src_register *reg, 1208 const uint chan_index ) 1209{ 1210 union tgsi_exec_channel index; 1211 uint swizzle; 1212 1213 /* We start with a direct index into a register file. 1214 * 1215 * file[1], 1216 * where: 1217 * file = SrcRegister.File 1218 * [1] = SrcRegister.Index 1219 */ 1220 index.i[0] = 1221 index.i[1] = 1222 index.i[2] = 1223 index.i[3] = reg->SrcRegister.Index; 1224 1225 /* There is an extra source register that indirectly subscripts 1226 * a register file. The direct index now becomes an offset 1227 * that is being added to the indirect register. 1228 * 1229 * file[ind[2].x+1], 1230 * where: 1231 * ind = SrcRegisterInd.File 1232 * [2] = SrcRegisterInd.Index 1233 * .x = SrcRegisterInd.SwizzleX 1234 */ 1235 if (reg->SrcRegister.Indirect) { 1236 union tgsi_exec_channel index2; 1237 union tgsi_exec_channel indir_index; 1238 const uint execmask = mach->ExecMask; 1239 uint i; 1240 1241 /* which address register (always zero now) */ 1242 index2.i[0] = 1243 index2.i[1] = 1244 index2.i[2] = 1245 index2.i[3] = reg->SrcRegisterInd.Index; 1246 1247 /* get current value of address register[swizzle] */ 1248 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1249 fetch_src_file_channel( 1250 mach, 1251 reg->SrcRegisterInd.File, 1252 swizzle, 1253 &index2, 1254 &indir_index ); 1255 1256 /* add value of address register to the offset */ 1257 index.i[0] += (int) indir_index.f[0]; 1258 index.i[1] += (int) indir_index.f[1]; 1259 index.i[2] += (int) indir_index.f[2]; 1260 index.i[3] += (int) indir_index.f[3]; 1261 1262 /* for disabled execution channels, zero-out the index to 1263 * avoid using a potential garbage value. 1264 */ 1265 for (i = 0; i < QUAD_SIZE; i++) { 1266 if ((execmask & (1 << i)) == 0) 1267 index.i[i] = 0; 1268 } 1269 } 1270 1271 /* There is an extra source register that is a second 1272 * subscript to a register file. Effectively it means that 1273 * the register file is actually a 2D array of registers. 1274 * 1275 * file[1][3] == file[1*sizeof(file[1])+3], 1276 * where: 1277 * [3] = SrcRegisterDim.Index 1278 */ 1279 if (reg->SrcRegister.Dimension) { 1280 /* The size of the first-order array depends on the register file type. 1281 * We need to multiply the index to the first array to get an effective, 1282 * "flat" index that points to the beginning of the second-order array. 1283 */ 1284 switch (reg->SrcRegister.File) { 1285 case TGSI_FILE_INPUT: 1286 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1287 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1288 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1289 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1290 break; 1291 case TGSI_FILE_CONSTANT: 1292 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1293 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1294 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1295 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1296 break; 1297 default: 1298 assert( 0 ); 1299 } 1300 1301 index.i[0] += reg->SrcRegisterDim.Index; 1302 index.i[1] += reg->SrcRegisterDim.Index; 1303 index.i[2] += reg->SrcRegisterDim.Index; 1304 index.i[3] += reg->SrcRegisterDim.Index; 1305 1306 /* Again, the second subscript index can be addressed indirectly 1307 * identically to the first one. 1308 * Nothing stops us from indirectly addressing the indirect register, 1309 * but there is no need for that, so we won't exercise it. 1310 * 1311 * file[1][ind[4].y+3], 1312 * where: 1313 * ind = SrcRegisterDimInd.File 1314 * [4] = SrcRegisterDimInd.Index 1315 * .y = SrcRegisterDimInd.SwizzleX 1316 */ 1317 if (reg->SrcRegisterDim.Indirect) { 1318 union tgsi_exec_channel index2; 1319 union tgsi_exec_channel indir_index; 1320 const uint execmask = mach->ExecMask; 1321 uint i; 1322 1323 index2.i[0] = 1324 index2.i[1] = 1325 index2.i[2] = 1326 index2.i[3] = reg->SrcRegisterDimInd.Index; 1327 1328 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1329 fetch_src_file_channel( 1330 mach, 1331 reg->SrcRegisterDimInd.File, 1332 swizzle, 1333 &index2, 1334 &indir_index ); 1335 1336 index.i[0] += (int) indir_index.f[0]; 1337 index.i[1] += (int) indir_index.f[1]; 1338 index.i[2] += (int) indir_index.f[2]; 1339 index.i[3] += (int) indir_index.f[3]; 1340 1341 /* for disabled execution channels, zero-out the index to 1342 * avoid using a potential garbage value. 1343 */ 1344 for (i = 0; i < QUAD_SIZE; i++) { 1345 if ((execmask & (1 << i)) == 0) 1346 index.i[i] = 0; 1347 } 1348 } 1349 1350 /* If by any chance there was a need for a 3D array of register 1351 * files, we would have to check whether SrcRegisterDim is followed 1352 * by a dimension register and continue the saga. 1353 */ 1354 } 1355 1356 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1357 fetch_src_file_channel( 1358 mach, 1359 reg->SrcRegister.File, 1360 swizzle, 1361 &index, 1362 chan ); 1363 1364 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1365 case TGSI_UTIL_SIGN_CLEAR: 1366 micro_abs( chan, chan ); 1367 break; 1368 1369 case TGSI_UTIL_SIGN_SET: 1370 micro_abs( chan, chan ); 1371 micro_neg( chan, chan ); 1372 break; 1373 1374 case TGSI_UTIL_SIGN_TOGGLE: 1375 micro_neg( chan, chan ); 1376 break; 1377 1378 case TGSI_UTIL_SIGN_KEEP: 1379 break; 1380 } 1381 1382 if (reg->SrcRegisterExtMod.Complement) { 1383 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1384 } 1385} 1386 1387static void 1388store_dest( 1389 struct tgsi_exec_machine *mach, 1390 const union tgsi_exec_channel *chan, 1391 const struct tgsi_full_dst_register *reg, 1392 const struct tgsi_full_instruction *inst, 1393 uint chan_index ) 1394{ 1395 uint i; 1396 union tgsi_exec_channel null; 1397 union tgsi_exec_channel *dst; 1398 uint execmask = mach->ExecMask; 1399 int offset = 0; /* indirection offset */ 1400 int index; 1401 1402#ifdef DEBUG 1403 check_inf_or_nan(chan); 1404#endif 1405 1406 /* There is an extra source register that indirectly subscripts 1407 * a register file. The direct index now becomes an offset 1408 * that is being added to the indirect register. 1409 * 1410 * file[ind[2].x+1], 1411 * where: 1412 * ind = DstRegisterInd.File 1413 * [2] = DstRegisterInd.Index 1414 * .x = DstRegisterInd.SwizzleX 1415 */ 1416 if (reg->DstRegister.Indirect) { 1417 union tgsi_exec_channel index; 1418 union tgsi_exec_channel indir_index; 1419 uint swizzle; 1420 1421 /* which address register (always zero for now) */ 1422 index.i[0] = 1423 index.i[1] = 1424 index.i[2] = 1425 index.i[3] = reg->DstRegisterInd.Index; 1426 1427 /* get current value of address register[swizzle] */ 1428 swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); 1429 1430 /* fetch values from the address/indirection register */ 1431 fetch_src_file_channel( 1432 mach, 1433 reg->DstRegisterInd.File, 1434 swizzle, 1435 &index, 1436 &indir_index ); 1437 1438 /* save indirection offset */ 1439 offset = (int) indir_index.f[0]; 1440 } 1441 1442 switch (reg->DstRegister.File) { 1443 case TGSI_FILE_NULL: 1444 dst = &null; 1445 break; 1446 1447 case TGSI_FILE_OUTPUT: 1448 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1449 + reg->DstRegister.Index; 1450 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1451 break; 1452 1453 case TGSI_FILE_TEMPORARY: 1454 index = reg->DstRegister.Index; 1455 assert( index < TGSI_EXEC_NUM_TEMPS ); 1456 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1457 break; 1458 1459 case TGSI_FILE_ADDRESS: 1460 index = reg->DstRegister.Index; 1461 dst = &mach->Addrs[index].xyzw[chan_index]; 1462 break; 1463 1464 case TGSI_FILE_PREDICATE: 1465 index = reg->DstRegister.Index; 1466 assert(index < TGSI_EXEC_NUM_PREDS); 1467 dst = &mach->Addrs[index].xyzw[chan_index]; 1468 break; 1469 1470 default: 1471 assert( 0 ); 1472 return; 1473 } 1474 1475 switch (inst->Instruction.Saturate) { 1476 case TGSI_SAT_NONE: 1477 for (i = 0; i < QUAD_SIZE; i++) 1478 if (execmask & (1 << i)) 1479 dst->i[i] = chan->i[i]; 1480 break; 1481 1482 case TGSI_SAT_ZERO_ONE: 1483 for (i = 0; i < QUAD_SIZE; i++) 1484 if (execmask & (1 << i)) { 1485 if (chan->f[i] < 0.0f) 1486 dst->f[i] = 0.0f; 1487 else if (chan->f[i] > 1.0f) 1488 dst->f[i] = 1.0f; 1489 else 1490 dst->i[i] = chan->i[i]; 1491 } 1492 break; 1493 1494 case TGSI_SAT_MINUS_PLUS_ONE: 1495 for (i = 0; i < QUAD_SIZE; i++) 1496 if (execmask & (1 << i)) { 1497 if (chan->f[i] < -1.0f) 1498 dst->f[i] = -1.0f; 1499 else if (chan->f[i] > 1.0f) 1500 dst->f[i] = 1.0f; 1501 else 1502 dst->i[i] = chan->i[i]; 1503 } 1504 break; 1505 1506 default: 1507 assert( 0 ); 1508 } 1509} 1510 1511#define FETCH(VAL,INDEX,CHAN)\ 1512 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1513 1514#define STORE(VAL,INDEX,CHAN)\ 1515 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1516 1517 1518/** 1519 * Execute ARB-style KIL which is predicated by a src register. 1520 * Kill fragment if any of the four values is less than zero. 1521 */ 1522static void 1523exec_kil(struct tgsi_exec_machine *mach, 1524 const struct tgsi_full_instruction *inst) 1525{ 1526 uint uniquemask; 1527 uint chan_index; 1528 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1529 union tgsi_exec_channel r[1]; 1530 1531 /* This mask stores component bits that were already tested. */ 1532 uniquemask = 0; 1533 1534 for (chan_index = 0; chan_index < 4; chan_index++) 1535 { 1536 uint swizzle; 1537 uint i; 1538 1539 /* unswizzle channel */ 1540 swizzle = tgsi_util_get_full_src_register_swizzle ( 1541 &inst->FullSrcRegisters[0], 1542 chan_index); 1543 1544 /* check if the component has not been already tested */ 1545 if (uniquemask & (1 << swizzle)) 1546 continue; 1547 uniquemask |= 1 << swizzle; 1548 1549 FETCH(&r[0], 0, chan_index); 1550 for (i = 0; i < 4; i++) 1551 if (r[0].f[i] < 0.0f) 1552 kilmask |= 1 << i; 1553 } 1554 1555 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1556} 1557 1558/** 1559 * Execute NVIDIA-style KIL which is predicated by a condition code. 1560 * Kill fragment if the condition code is TRUE. 1561 */ 1562static void 1563exec_kilp(struct tgsi_exec_machine *mach, 1564 const struct tgsi_full_instruction *inst) 1565{ 1566 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1567 1568 /* "unconditional" kil */ 1569 kilmask = mach->ExecMask; 1570 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1571} 1572 1573 1574/* 1575 * Fetch a four texture samples using STR texture coordinates. 1576 */ 1577static void 1578fetch_texel( struct tgsi_sampler *sampler, 1579 const union tgsi_exec_channel *s, 1580 const union tgsi_exec_channel *t, 1581 const union tgsi_exec_channel *p, 1582 float lodbias, /* XXX should be float[4] */ 1583 union tgsi_exec_channel *r, 1584 union tgsi_exec_channel *g, 1585 union tgsi_exec_channel *b, 1586 union tgsi_exec_channel *a ) 1587{ 1588 uint j; 1589 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1590 1591 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1592 1593 for (j = 0; j < 4; j++) { 1594 r->f[j] = rgba[0][j]; 1595 g->f[j] = rgba[1][j]; 1596 b->f[j] = rgba[2][j]; 1597 a->f[j] = rgba[3][j]; 1598 } 1599} 1600 1601 1602static void 1603exec_tex(struct tgsi_exec_machine *mach, 1604 const struct tgsi_full_instruction *inst, 1605 boolean biasLod, 1606 boolean projected) 1607{ 1608 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1609 union tgsi_exec_channel r[4]; 1610 uint chan_index; 1611 float lodBias; 1612 1613 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1614 1615 switch (inst->InstructionExtTexture.Texture) { 1616 case TGSI_TEXTURE_1D: 1617 case TGSI_TEXTURE_SHADOW1D: 1618 1619 FETCH(&r[0], 0, CHAN_X); 1620 1621 if (projected) { 1622 FETCH(&r[1], 0, CHAN_W); 1623 micro_div( &r[0], &r[0], &r[1] ); 1624 } 1625 1626 if (biasLod) { 1627 FETCH(&r[1], 0, CHAN_W); 1628 lodBias = r[2].f[0]; 1629 } 1630 else 1631 lodBias = 0.0; 1632 1633 fetch_texel(mach->Samplers[unit], 1634 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1635 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1636 break; 1637 1638 case TGSI_TEXTURE_2D: 1639 case TGSI_TEXTURE_RECT: 1640 case TGSI_TEXTURE_SHADOW2D: 1641 case TGSI_TEXTURE_SHADOWRECT: 1642 1643 FETCH(&r[0], 0, CHAN_X); 1644 FETCH(&r[1], 0, CHAN_Y); 1645 FETCH(&r[2], 0, CHAN_Z); 1646 1647 if (projected) { 1648 FETCH(&r[3], 0, CHAN_W); 1649 micro_div( &r[0], &r[0], &r[3] ); 1650 micro_div( &r[1], &r[1], &r[3] ); 1651 micro_div( &r[2], &r[2], &r[3] ); 1652 } 1653 1654 if (biasLod) { 1655 FETCH(&r[3], 0, CHAN_W); 1656 lodBias = r[3].f[0]; 1657 } 1658 else 1659 lodBias = 0.0; 1660 1661 fetch_texel(mach->Samplers[unit], 1662 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1663 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1664 break; 1665 1666 case TGSI_TEXTURE_3D: 1667 case TGSI_TEXTURE_CUBE: 1668 1669 FETCH(&r[0], 0, CHAN_X); 1670 FETCH(&r[1], 0, CHAN_Y); 1671 FETCH(&r[2], 0, CHAN_Z); 1672 1673 if (projected) { 1674 FETCH(&r[3], 0, CHAN_W); 1675 micro_div( &r[0], &r[0], &r[3] ); 1676 micro_div( &r[1], &r[1], &r[3] ); 1677 micro_div( &r[2], &r[2], &r[3] ); 1678 } 1679 1680 if (biasLod) { 1681 FETCH(&r[3], 0, CHAN_W); 1682 lodBias = r[3].f[0]; 1683 } 1684 else 1685 lodBias = 0.0; 1686 1687 fetch_texel(mach->Samplers[unit], 1688 &r[0], &r[1], &r[2], lodBias, 1689 &r[0], &r[1], &r[2], &r[3]); 1690 break; 1691 1692 default: 1693 assert (0); 1694 } 1695 1696 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1697 STORE( &r[chan_index], 0, chan_index ); 1698 } 1699} 1700 1701static void 1702exec_txd(struct tgsi_exec_machine *mach, 1703 const struct tgsi_full_instruction *inst) 1704{ 1705 const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index; 1706 union tgsi_exec_channel r[4]; 1707 uint chan_index; 1708 1709 /* 1710 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1711 */ 1712 1713 switch (inst->InstructionExtTexture.Texture) { 1714 case TGSI_TEXTURE_1D: 1715 case TGSI_TEXTURE_SHADOW1D: 1716 1717 FETCH(&r[0], 0, CHAN_X); 1718 1719 fetch_texel(mach->Samplers[unit], 1720 &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */ 1721 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1722 break; 1723 1724 case TGSI_TEXTURE_2D: 1725 case TGSI_TEXTURE_RECT: 1726 case TGSI_TEXTURE_SHADOW2D: 1727 case TGSI_TEXTURE_SHADOWRECT: 1728 1729 FETCH(&r[0], 0, CHAN_X); 1730 FETCH(&r[1], 0, CHAN_Y); 1731 FETCH(&r[2], 0, CHAN_Z); 1732 1733 fetch_texel(mach->Samplers[unit], 1734 &r[0], &r[1], &r[2], 0.0f, /* inputs */ 1735 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1736 break; 1737 1738 case TGSI_TEXTURE_3D: 1739 case TGSI_TEXTURE_CUBE: 1740 1741 FETCH(&r[0], 0, CHAN_X); 1742 FETCH(&r[1], 0, CHAN_Y); 1743 FETCH(&r[2], 0, CHAN_Z); 1744 1745 fetch_texel(mach->Samplers[unit], 1746 &r[0], &r[1], &r[2], 0.0f, 1747 &r[0], &r[1], &r[2], &r[3]); 1748 break; 1749 1750 default: 1751 assert(0); 1752 } 1753 1754 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1755 STORE(&r[chan_index], 0, chan_index); 1756 } 1757} 1758 1759 1760/** 1761 * Evaluate a constant-valued coefficient at the position of the 1762 * current quad. 1763 */ 1764static void 1765eval_constant_coef( 1766 struct tgsi_exec_machine *mach, 1767 unsigned attrib, 1768 unsigned chan ) 1769{ 1770 unsigned i; 1771 1772 for( i = 0; i < QUAD_SIZE; i++ ) { 1773 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1774 } 1775} 1776 1777/** 1778 * Evaluate a linear-valued coefficient at the position of the 1779 * current quad. 1780 */ 1781static void 1782eval_linear_coef( 1783 struct tgsi_exec_machine *mach, 1784 unsigned attrib, 1785 unsigned chan ) 1786{ 1787 const float x = mach->QuadPos.xyzw[0].f[0]; 1788 const float y = mach->QuadPos.xyzw[1].f[0]; 1789 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1790 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1791 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1792 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1793 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1794 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1795 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1796} 1797 1798/** 1799 * Evaluate a perspective-valued coefficient at the position of the 1800 * current quad. 1801 */ 1802static void 1803eval_perspective_coef( 1804 struct tgsi_exec_machine *mach, 1805 unsigned attrib, 1806 unsigned chan ) 1807{ 1808 const float x = mach->QuadPos.xyzw[0].f[0]; 1809 const float y = mach->QuadPos.xyzw[1].f[0]; 1810 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1811 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1812 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1813 const float *w = mach->QuadPos.xyzw[3].f; 1814 /* divide by W here */ 1815 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1816 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1817 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1818 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1819} 1820 1821 1822typedef void (* eval_coef_func)( 1823 struct tgsi_exec_machine *mach, 1824 unsigned attrib, 1825 unsigned chan ); 1826 1827static void 1828exec_declaration( 1829 struct tgsi_exec_machine *mach, 1830 const struct tgsi_full_declaration *decl ) 1831{ 1832 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1833 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1834 unsigned first, last, mask; 1835 eval_coef_func eval; 1836 1837 first = decl->DeclarationRange.First; 1838 last = decl->DeclarationRange.Last; 1839 mask = decl->Declaration.UsageMask; 1840 1841 switch( decl->Declaration.Interpolate ) { 1842 case TGSI_INTERPOLATE_CONSTANT: 1843 eval = eval_constant_coef; 1844 break; 1845 1846 case TGSI_INTERPOLATE_LINEAR: 1847 eval = eval_linear_coef; 1848 break; 1849 1850 case TGSI_INTERPOLATE_PERSPECTIVE: 1851 eval = eval_perspective_coef; 1852 break; 1853 1854 default: 1855 assert( 0 ); 1856 return; 1857 } 1858 1859 if( mask == TGSI_WRITEMASK_XYZW ) { 1860 unsigned i, j; 1861 1862 for( i = first; i <= last; i++ ) { 1863 for( j = 0; j < NUM_CHANNELS; j++ ) { 1864 eval( mach, i, j ); 1865 } 1866 } 1867 } 1868 else { 1869 unsigned i, j; 1870 1871 for( j = 0; j < NUM_CHANNELS; j++ ) { 1872 if( mask & (1 << j) ) { 1873 for( i = first; i <= last; i++ ) { 1874 eval( mach, i, j ); 1875 } 1876 } 1877 } 1878 } 1879 } 1880 } 1881} 1882 1883static void 1884exec_instruction( 1885 struct tgsi_exec_machine *mach, 1886 const struct tgsi_full_instruction *inst, 1887 int *pc ) 1888{ 1889 uint chan_index; 1890 union tgsi_exec_channel r[10]; 1891 union tgsi_exec_channel d[8]; 1892 1893 (*pc)++; 1894 1895 switch (inst->Instruction.Opcode) { 1896 case TGSI_OPCODE_ARL: 1897 case TGSI_OPCODE_FLR: 1898 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1899 FETCH( &r[0], 0, chan_index ); 1900 micro_flr(&d[chan_index], &r[0]); 1901 } 1902 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1903 STORE(&d[chan_index], 0, chan_index); 1904 } 1905 break; 1906 1907 case TGSI_OPCODE_MOV: 1908 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1909 FETCH(&d[chan_index], 0, chan_index); 1910 } 1911 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1912 STORE(&d[chan_index], 0, chan_index); 1913 } 1914 break; 1915 1916 case TGSI_OPCODE_LIT: 1917 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1918 FETCH( &r[0], 0, CHAN_X ); 1919 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1920 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 1921 } 1922 1923 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1924 FETCH( &r[1], 0, CHAN_Y ); 1925 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1926 1927 FETCH( &r[2], 0, CHAN_W ); 1928 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1929 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1930 micro_pow( &r[1], &r[1], &r[2] ); 1931 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 1932 } 1933 1934 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 1935 STORE(&d[CHAN_Y], 0, CHAN_Y); 1936 } 1937 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1938 STORE(&d[CHAN_Z], 0, CHAN_Z); 1939 } 1940 } 1941 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1942 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1943 } 1944 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1945 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1946 } 1947 break; 1948 1949 case TGSI_OPCODE_RCP: 1950 /* TGSI_OPCODE_RECIP */ 1951 FETCH( &r[0], 0, CHAN_X ); 1952 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1953 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1954 STORE( &r[0], 0, chan_index ); 1955 } 1956 break; 1957 1958 case TGSI_OPCODE_RSQ: 1959 /* TGSI_OPCODE_RECIPSQRT */ 1960 FETCH( &r[0], 0, CHAN_X ); 1961 micro_abs( &r[0], &r[0] ); 1962 micro_sqrt( &r[0], &r[0] ); 1963 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1964 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1965 STORE( &r[0], 0, chan_index ); 1966 } 1967 break; 1968 1969 case TGSI_OPCODE_EXP: 1970 FETCH( &r[0], 0, CHAN_X ); 1971 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1972 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1973 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1974 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1975 } 1976 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1977 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1978 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1979 } 1980 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1981 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1982 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1983 } 1984 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1985 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1986 } 1987 break; 1988 1989 case TGSI_OPCODE_LOG: 1990 FETCH( &r[0], 0, CHAN_X ); 1991 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1992 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1993 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1994 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1995 STORE( &r[0], 0, CHAN_X ); 1996 } 1997 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1998 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1999 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2000 STORE( &r[0], 0, CHAN_Y ); 2001 } 2002 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2003 STORE( &r[1], 0, CHAN_Z ); 2004 } 2005 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2006 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2007 } 2008 break; 2009 2010 case TGSI_OPCODE_MUL: 2011 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2012 FETCH(&r[0], 0, chan_index); 2013 FETCH(&r[1], 1, chan_index); 2014 micro_mul(&d[chan_index], &r[0], &r[1]); 2015 } 2016 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2017 STORE(&d[chan_index], 0, chan_index); 2018 } 2019 break; 2020 2021 case TGSI_OPCODE_ADD: 2022 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2023 FETCH( &r[0], 0, chan_index ); 2024 FETCH( &r[1], 1, chan_index ); 2025 micro_add(&d[chan_index], &r[0], &r[1]); 2026 } 2027 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2028 STORE(&d[chan_index], 0, chan_index); 2029 } 2030 break; 2031 2032 case TGSI_OPCODE_DP3: 2033 /* TGSI_OPCODE_DOT3 */ 2034 FETCH( &r[0], 0, CHAN_X ); 2035 FETCH( &r[1], 1, CHAN_X ); 2036 micro_mul( &r[0], &r[0], &r[1] ); 2037 2038 FETCH( &r[1], 0, CHAN_Y ); 2039 FETCH( &r[2], 1, CHAN_Y ); 2040 micro_mul( &r[1], &r[1], &r[2] ); 2041 micro_add( &r[0], &r[0], &r[1] ); 2042 2043 FETCH( &r[1], 0, CHAN_Z ); 2044 FETCH( &r[2], 1, CHAN_Z ); 2045 micro_mul( &r[1], &r[1], &r[2] ); 2046 micro_add( &r[0], &r[0], &r[1] ); 2047 2048 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2049 STORE( &r[0], 0, chan_index ); 2050 } 2051 break; 2052 2053 case TGSI_OPCODE_DP4: 2054 /* TGSI_OPCODE_DOT4 */ 2055 FETCH(&r[0], 0, CHAN_X); 2056 FETCH(&r[1], 1, CHAN_X); 2057 2058 micro_mul( &r[0], &r[0], &r[1] ); 2059 2060 FETCH(&r[1], 0, CHAN_Y); 2061 FETCH(&r[2], 1, CHAN_Y); 2062 2063 micro_mul( &r[1], &r[1], &r[2] ); 2064 micro_add( &r[0], &r[0], &r[1] ); 2065 2066 FETCH(&r[1], 0, CHAN_Z); 2067 FETCH(&r[2], 1, CHAN_Z); 2068 2069 micro_mul( &r[1], &r[1], &r[2] ); 2070 micro_add( &r[0], &r[0], &r[1] ); 2071 2072 FETCH(&r[1], 0, CHAN_W); 2073 FETCH(&r[2], 1, CHAN_W); 2074 2075 micro_mul( &r[1], &r[1], &r[2] ); 2076 micro_add( &r[0], &r[0], &r[1] ); 2077 2078 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2079 STORE( &r[0], 0, chan_index ); 2080 } 2081 break; 2082 2083 case TGSI_OPCODE_DST: 2084 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2085 FETCH( &r[0], 0, CHAN_Y ); 2086 FETCH( &r[1], 1, CHAN_Y); 2087 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2088 } 2089 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2090 FETCH(&d[CHAN_Z], 0, CHAN_Z); 2091 } 2092 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2093 FETCH(&d[CHAN_W], 1, CHAN_W); 2094 } 2095 2096 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2097 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); 2098 } 2099 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2100 STORE(&d[CHAN_Y], 0, CHAN_Y); 2101 } 2102 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2103 STORE(&d[CHAN_Z], 0, CHAN_Z); 2104 } 2105 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2106 STORE(&d[CHAN_W], 0, CHAN_W); 2107 } 2108 break; 2109 2110 case TGSI_OPCODE_MIN: 2111 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2112 FETCH(&r[0], 0, chan_index); 2113 FETCH(&r[1], 1, chan_index); 2114 2115 /* XXX use micro_min()?? */ 2116 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); 2117 } 2118 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2119 STORE(&d[chan_index], 0, chan_index); 2120 } 2121 break; 2122 2123 case TGSI_OPCODE_MAX: 2124 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2125 FETCH(&r[0], 0, chan_index); 2126 FETCH(&r[1], 1, chan_index); 2127 2128 /* XXX use micro_max()?? */ 2129 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); 2130 } 2131 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2132 STORE(&d[chan_index], 0, chan_index); 2133 } 2134 break; 2135 2136 case TGSI_OPCODE_SLT: 2137 /* TGSI_OPCODE_SETLT */ 2138 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2139 FETCH( &r[0], 0, chan_index ); 2140 FETCH( &r[1], 1, chan_index ); 2141 micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2142 } 2143 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2144 STORE(&d[chan_index], 0, chan_index); 2145 } 2146 break; 2147 2148 case TGSI_OPCODE_SGE: 2149 /* TGSI_OPCODE_SETGE */ 2150 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2151 FETCH( &r[0], 0, chan_index ); 2152 FETCH( &r[1], 1, chan_index ); 2153 micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2154 } 2155 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2156 STORE(&d[chan_index], 0, chan_index); 2157 } 2158 break; 2159 2160 case TGSI_OPCODE_MAD: 2161 /* TGSI_OPCODE_MADD */ 2162 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2163 FETCH( &r[0], 0, chan_index ); 2164 FETCH( &r[1], 1, chan_index ); 2165 micro_mul( &r[0], &r[0], &r[1] ); 2166 FETCH( &r[1], 2, chan_index ); 2167 micro_add(&d[chan_index], &r[0], &r[1]); 2168 } 2169 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2170 STORE(&d[chan_index], 0, chan_index); 2171 } 2172 break; 2173 2174 case TGSI_OPCODE_SUB: 2175 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2176 FETCH(&r[0], 0, chan_index); 2177 FETCH(&r[1], 1, chan_index); 2178 micro_sub(&d[chan_index], &r[0], &r[1]); 2179 } 2180 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2181 STORE(&d[chan_index], 0, chan_index); 2182 } 2183 break; 2184 2185 case TGSI_OPCODE_LRP: 2186 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2187 FETCH(&r[0], 0, chan_index); 2188 FETCH(&r[1], 1, chan_index); 2189 FETCH(&r[2], 2, chan_index); 2190 micro_sub( &r[1], &r[1], &r[2] ); 2191 micro_mul( &r[0], &r[0], &r[1] ); 2192 micro_add(&d[chan_index], &r[0], &r[2]); 2193 } 2194 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2195 STORE(&d[chan_index], 0, chan_index); 2196 } 2197 break; 2198 2199 case TGSI_OPCODE_CND: 2200 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2201 FETCH(&r[0], 0, chan_index); 2202 FETCH(&r[1], 1, chan_index); 2203 FETCH(&r[2], 2, chan_index); 2204 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2205 } 2206 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2207 STORE(&d[chan_index], 0, chan_index); 2208 } 2209 break; 2210 2211 case TGSI_OPCODE_DP2A: 2212 FETCH( &r[0], 0, CHAN_X ); 2213 FETCH( &r[1], 1, CHAN_X ); 2214 micro_mul( &r[0], &r[0], &r[1] ); 2215 2216 FETCH( &r[1], 0, CHAN_Y ); 2217 FETCH( &r[2], 1, CHAN_Y ); 2218 micro_mul( &r[1], &r[1], &r[2] ); 2219 micro_add( &r[0], &r[0], &r[1] ); 2220 2221 FETCH( &r[2], 2, CHAN_X ); 2222 micro_add( &r[0], &r[0], &r[2] ); 2223 2224 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2225 STORE( &r[0], 0, chan_index ); 2226 } 2227 break; 2228 2229 case TGSI_OPCODE_FRC: 2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2231 FETCH( &r[0], 0, chan_index ); 2232 micro_frc(&d[chan_index], &r[0]); 2233 } 2234 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2235 STORE(&d[chan_index], 0, chan_index); 2236 } 2237 break; 2238 2239 case TGSI_OPCODE_CLAMP: 2240 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2241 FETCH(&r[0], 0, chan_index); 2242 FETCH(&r[1], 1, chan_index); 2243 micro_max(&r[0], &r[0], &r[1]); 2244 FETCH(&r[1], 2, chan_index); 2245 micro_min(&d[chan_index], &r[0], &r[1]); 2246 } 2247 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2248 STORE(&d[chan_index], 0, chan_index); 2249 } 2250 break; 2251 2252 case TGSI_OPCODE_ROUND: 2253 case TGSI_OPCODE_ARR: 2254 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2255 FETCH( &r[0], 0, chan_index ); 2256 micro_rnd(&d[chan_index], &r[0]); 2257 } 2258 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2259 STORE(&d[chan_index], 0, chan_index); 2260 } 2261 break; 2262 2263 case TGSI_OPCODE_EX2: 2264 FETCH(&r[0], 0, CHAN_X); 2265 2266#if FAST_MATH 2267 micro_exp2( &r[0], &r[0] ); 2268#else 2269 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2270#endif 2271 2272 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2273 STORE( &r[0], 0, chan_index ); 2274 } 2275 break; 2276 2277 case TGSI_OPCODE_LG2: 2278 FETCH( &r[0], 0, CHAN_X ); 2279 micro_lg2( &r[0], &r[0] ); 2280 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2281 STORE( &r[0], 0, chan_index ); 2282 } 2283 break; 2284 2285 case TGSI_OPCODE_POW: 2286 FETCH(&r[0], 0, CHAN_X); 2287 FETCH(&r[1], 1, CHAN_X); 2288 2289 micro_pow( &r[0], &r[0], &r[1] ); 2290 2291 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2292 STORE( &r[0], 0, chan_index ); 2293 } 2294 break; 2295 2296 case TGSI_OPCODE_XPD: 2297 FETCH(&r[0], 0, CHAN_Y); 2298 FETCH(&r[1], 1, CHAN_Z); 2299 2300 micro_mul( &r[2], &r[0], &r[1] ); 2301 2302 FETCH(&r[3], 0, CHAN_Z); 2303 FETCH(&r[4], 1, CHAN_Y); 2304 2305 micro_mul( &r[5], &r[3], &r[4] ); 2306 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2307 2308 FETCH(&r[2], 1, CHAN_X); 2309 2310 micro_mul( &r[3], &r[3], &r[2] ); 2311 2312 FETCH(&r[5], 0, CHAN_X); 2313 2314 micro_mul( &r[1], &r[1], &r[5] ); 2315 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2316 2317 micro_mul( &r[5], &r[5], &r[4] ); 2318 micro_mul( &r[0], &r[0], &r[2] ); 2319 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2320 2321 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2322 STORE(&d[CHAN_X], 0, CHAN_X); 2323 } 2324 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2325 STORE(&d[CHAN_Y], 0, CHAN_Y); 2326 } 2327 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2328 STORE(&d[CHAN_Z], 0, CHAN_Z); 2329 } 2330 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2331 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2332 } 2333 break; 2334 2335 case TGSI_OPCODE_ABS: 2336 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2337 FETCH(&r[0], 0, chan_index); 2338 micro_abs(&d[chan_index], &r[0]); 2339 } 2340 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2341 STORE(&d[chan_index], 0, chan_index); 2342 } 2343 break; 2344 2345 case TGSI_OPCODE_RCC: 2346 FETCH(&r[0], 0, CHAN_X); 2347 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2348 micro_float_clamp(&r[0], &r[0]); 2349 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2350 STORE(&r[0], 0, chan_index); 2351 } 2352 break; 2353 2354 case TGSI_OPCODE_DPH: 2355 FETCH(&r[0], 0, CHAN_X); 2356 FETCH(&r[1], 1, CHAN_X); 2357 2358 micro_mul( &r[0], &r[0], &r[1] ); 2359 2360 FETCH(&r[1], 0, CHAN_Y); 2361 FETCH(&r[2], 1, CHAN_Y); 2362 2363 micro_mul( &r[1], &r[1], &r[2] ); 2364 micro_add( &r[0], &r[0], &r[1] ); 2365 2366 FETCH(&r[1], 0, CHAN_Z); 2367 FETCH(&r[2], 1, CHAN_Z); 2368 2369 micro_mul( &r[1], &r[1], &r[2] ); 2370 micro_add( &r[0], &r[0], &r[1] ); 2371 2372 FETCH(&r[1], 1, CHAN_W); 2373 2374 micro_add( &r[0], &r[0], &r[1] ); 2375 2376 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2377 STORE( &r[0], 0, chan_index ); 2378 } 2379 break; 2380 2381 case TGSI_OPCODE_COS: 2382 FETCH(&r[0], 0, CHAN_X); 2383 2384 micro_cos( &r[0], &r[0] ); 2385 2386 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2387 STORE( &r[0], 0, chan_index ); 2388 } 2389 break; 2390 2391 case TGSI_OPCODE_DDX: 2392 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2393 FETCH( &r[0], 0, chan_index ); 2394 micro_ddx(&d[chan_index], &r[0]); 2395 } 2396 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2397 STORE(&d[chan_index], 0, chan_index); 2398 } 2399 break; 2400 2401 case TGSI_OPCODE_DDY: 2402 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2403 FETCH( &r[0], 0, chan_index ); 2404 micro_ddy(&d[chan_index], &r[0]); 2405 } 2406 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2407 STORE(&d[chan_index], 0, chan_index); 2408 } 2409 break; 2410 2411 case TGSI_OPCODE_KILP: 2412 exec_kilp (mach, inst); 2413 break; 2414 2415 case TGSI_OPCODE_KIL: 2416 exec_kil (mach, inst); 2417 break; 2418 2419 case TGSI_OPCODE_PK2H: 2420 assert (0); 2421 break; 2422 2423 case TGSI_OPCODE_PK2US: 2424 assert (0); 2425 break; 2426 2427 case TGSI_OPCODE_PK4B: 2428 assert (0); 2429 break; 2430 2431 case TGSI_OPCODE_PK4UB: 2432 assert (0); 2433 break; 2434 2435 case TGSI_OPCODE_RFL: 2436 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2437 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2438 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2439 /* r0 = dp3(src0, src0) */ 2440 FETCH(&r[2], 0, CHAN_X); 2441 micro_mul(&r[0], &r[2], &r[2]); 2442 FETCH(&r[4], 0, CHAN_Y); 2443 micro_mul(&r[8], &r[4], &r[4]); 2444 micro_add(&r[0], &r[0], &r[8]); 2445 FETCH(&r[6], 0, CHAN_Z); 2446 micro_mul(&r[8], &r[6], &r[6]); 2447 micro_add(&r[0], &r[0], &r[8]); 2448 2449 /* r1 = dp3(src0, src1) */ 2450 FETCH(&r[3], 1, CHAN_X); 2451 micro_mul(&r[1], &r[2], &r[3]); 2452 FETCH(&r[5], 1, CHAN_Y); 2453 micro_mul(&r[8], &r[4], &r[5]); 2454 micro_add(&r[1], &r[1], &r[8]); 2455 FETCH(&r[7], 1, CHAN_Z); 2456 micro_mul(&r[8], &r[6], &r[7]); 2457 micro_add(&r[1], &r[1], &r[8]); 2458 2459 /* r1 = 2 * r1 / r0 */ 2460 micro_add(&r[1], &r[1], &r[1]); 2461 micro_div(&r[1], &r[1], &r[0]); 2462 2463 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2464 micro_mul(&r[2], &r[2], &r[1]); 2465 micro_sub(&r[2], &r[2], &r[3]); 2466 STORE(&r[2], 0, CHAN_X); 2467 } 2468 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2469 micro_mul(&r[4], &r[4], &r[1]); 2470 micro_sub(&r[4], &r[4], &r[5]); 2471 STORE(&r[4], 0, CHAN_Y); 2472 } 2473 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2474 micro_mul(&r[6], &r[6], &r[1]); 2475 micro_sub(&r[6], &r[6], &r[7]); 2476 STORE(&r[6], 0, CHAN_Z); 2477 } 2478 } 2479 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2480 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2481 } 2482 break; 2483 2484 case TGSI_OPCODE_SEQ: 2485 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2486 FETCH( &r[0], 0, chan_index ); 2487 FETCH( &r[1], 1, chan_index ); 2488 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2489 } 2490 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2491 STORE(&d[chan_index], 0, chan_index); 2492 } 2493 break; 2494 2495 case TGSI_OPCODE_SFL: 2496 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2497 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2498 } 2499 break; 2500 2501 case TGSI_OPCODE_SGT: 2502 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2503 FETCH( &r[0], 0, chan_index ); 2504 FETCH( &r[1], 1, chan_index ); 2505 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2506 } 2507 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2508 STORE(&d[chan_index], 0, chan_index); 2509 } 2510 break; 2511 2512 case TGSI_OPCODE_SIN: 2513 FETCH( &r[0], 0, CHAN_X ); 2514 micro_sin( &r[0], &r[0] ); 2515 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2516 STORE( &r[0], 0, chan_index ); 2517 } 2518 break; 2519 2520 case TGSI_OPCODE_SLE: 2521 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2522 FETCH( &r[0], 0, chan_index ); 2523 FETCH( &r[1], 1, chan_index ); 2524 micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2525 } 2526 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2527 STORE(&d[chan_index], 0, chan_index); 2528 } 2529 break; 2530 2531 case TGSI_OPCODE_SNE: 2532 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2533 FETCH( &r[0], 0, chan_index ); 2534 FETCH( &r[1], 1, chan_index ); 2535 micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 2536 } 2537 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2538 STORE(&d[chan_index], 0, chan_index); 2539 } 2540 break; 2541 2542 case TGSI_OPCODE_STR: 2543 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2544 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2545 } 2546 break; 2547 2548 case TGSI_OPCODE_TEX: 2549 /* simple texture lookup */ 2550 /* src[0] = texcoord */ 2551 /* src[1] = sampler unit */ 2552 exec_tex(mach, inst, FALSE, FALSE); 2553 break; 2554 2555 case TGSI_OPCODE_TXB: 2556 /* Texture lookup with lod bias */ 2557 /* src[0] = texcoord (src[0].w = LOD bias) */ 2558 /* src[1] = sampler unit */ 2559 exec_tex(mach, inst, TRUE, FALSE); 2560 break; 2561 2562 case TGSI_OPCODE_TXD: 2563 /* Texture lookup with explict partial derivatives */ 2564 /* src[0] = texcoord */ 2565 /* src[1] = d[strq]/dx */ 2566 /* src[2] = d[strq]/dy */ 2567 /* src[3] = sampler unit */ 2568 exec_txd(mach, inst); 2569 break; 2570 2571 case TGSI_OPCODE_TXL: 2572 /* Texture lookup with explit LOD */ 2573 /* src[0] = texcoord (src[0].w = LOD) */ 2574 /* src[1] = sampler unit */ 2575 exec_tex(mach, inst, TRUE, FALSE); 2576 break; 2577 2578 case TGSI_OPCODE_TXP: 2579 /* Texture lookup with projection */ 2580 /* src[0] = texcoord (src[0].w = projection) */ 2581 /* src[1] = sampler unit */ 2582 exec_tex(mach, inst, FALSE, TRUE); 2583 break; 2584 2585 case TGSI_OPCODE_UP2H: 2586 assert (0); 2587 break; 2588 2589 case TGSI_OPCODE_UP2US: 2590 assert (0); 2591 break; 2592 2593 case TGSI_OPCODE_UP4B: 2594 assert (0); 2595 break; 2596 2597 case TGSI_OPCODE_UP4UB: 2598 assert (0); 2599 break; 2600 2601 case TGSI_OPCODE_X2D: 2602 FETCH(&r[0], 1, CHAN_X); 2603 FETCH(&r[1], 1, CHAN_Y); 2604 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2605 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2606 FETCH(&r[2], 2, CHAN_X); 2607 micro_mul(&r[2], &r[2], &r[0]); 2608 FETCH(&r[3], 2, CHAN_Y); 2609 micro_mul(&r[3], &r[3], &r[1]); 2610 micro_add(&r[2], &r[2], &r[3]); 2611 FETCH(&r[3], 0, CHAN_X); 2612 micro_add(&d[CHAN_X], &r[2], &r[3]); 2613 2614 } 2615 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2616 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2617 FETCH(&r[2], 2, CHAN_Z); 2618 micro_mul(&r[2], &r[2], &r[0]); 2619 FETCH(&r[3], 2, CHAN_W); 2620 micro_mul(&r[3], &r[3], &r[1]); 2621 micro_add(&r[2], &r[2], &r[3]); 2622 FETCH(&r[3], 0, CHAN_Y); 2623 micro_add(&d[CHAN_Y], &r[2], &r[3]); 2624 2625 } 2626 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2627 STORE(&d[CHAN_X], 0, CHAN_X); 2628 } 2629 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2630 STORE(&d[CHAN_Y], 0, CHAN_Y); 2631 } 2632 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2633 STORE(&d[CHAN_X], 0, CHAN_Z); 2634 } 2635 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2636 STORE(&d[CHAN_Y], 0, CHAN_W); 2637 } 2638 break; 2639 2640 case TGSI_OPCODE_ARA: 2641 assert (0); 2642 break; 2643 2644 case TGSI_OPCODE_BRA: 2645 assert (0); 2646 break; 2647 2648 case TGSI_OPCODE_CAL: 2649 /* skip the call if no execution channels are enabled */ 2650 if (mach->ExecMask) { 2651 /* do the call */ 2652 2653 /* First, record the depths of the execution stacks. 2654 * This is important for deeply nested/looped return statements. 2655 * We have to unwind the stacks by the correct amount. For a 2656 * real code generator, we could determine the number of entries 2657 * to pop off each stack with simple static analysis and avoid 2658 * implementing this data structure at run time. 2659 */ 2660 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 2661 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 2662 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 2663 /* note that PC was already incremented above */ 2664 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 2665 2666 mach->CallStackTop++; 2667 2668 /* Second, push the Cond, Loop, Cont, Func stacks */ 2669 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2670 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2671 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2672 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2673 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2674 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2675 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2676 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2677 2678 /* Finally, jump to the subroutine */ 2679 *pc = inst->InstructionExtLabel.Label; 2680 } 2681 break; 2682 2683 case TGSI_OPCODE_RET: 2684 mach->FuncMask &= ~mach->ExecMask; 2685 UPDATE_EXEC_MASK(mach); 2686 2687 if (mach->FuncMask == 0x0) { 2688 /* really return now (otherwise, keep executing */ 2689 2690 if (mach->CallStackTop == 0) { 2691 /* returning from main() */ 2692 *pc = -1; 2693 return; 2694 } 2695 2696 assert(mach->CallStackTop > 0); 2697 mach->CallStackTop--; 2698 2699 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 2700 mach->CondMask = mach->CondStack[mach->CondStackTop]; 2701 2702 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 2703 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 2704 2705 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 2706 mach->ContMask = mach->ContStack[mach->ContStackTop]; 2707 2708 assert(mach->FuncStackTop > 0); 2709 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2710 2711 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 2712 2713 UPDATE_EXEC_MASK(mach); 2714 } 2715 break; 2716 2717 case TGSI_OPCODE_SSG: 2718 /* TGSI_OPCODE_SGN */ 2719 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2720 FETCH( &r[0], 0, chan_index ); 2721 micro_sgn(&d[chan_index], &r[0]); 2722 } 2723 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2724 STORE(&d[chan_index], 0, chan_index); 2725 } 2726 break; 2727 2728 case TGSI_OPCODE_CMP: 2729 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2730 FETCH(&r[0], 0, chan_index); 2731 FETCH(&r[1], 1, chan_index); 2732 FETCH(&r[2], 2, chan_index); 2733 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); 2734 } 2735 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2736 STORE(&d[chan_index], 0, chan_index); 2737 } 2738 break; 2739 2740 case TGSI_OPCODE_SCS: 2741 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2742 FETCH( &r[0], 0, CHAN_X ); 2743 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2744 micro_cos(&r[1], &r[0]); 2745 STORE(&r[1], 0, CHAN_X); 2746 } 2747 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2748 micro_sin(&r[1], &r[0]); 2749 STORE(&r[1], 0, CHAN_Y); 2750 } 2751 } 2752 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2753 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2754 } 2755 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2756 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2757 } 2758 break; 2759 2760 case TGSI_OPCODE_NRM: 2761 /* 3-component vector normalize */ 2762 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2763 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2764 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2765 /* r3 = sqrt(dp3(src0, src0)) */ 2766 FETCH(&r[0], 0, CHAN_X); 2767 micro_mul(&r[3], &r[0], &r[0]); 2768 FETCH(&r[1], 0, CHAN_Y); 2769 micro_mul(&r[4], &r[1], &r[1]); 2770 micro_add(&r[3], &r[3], &r[4]); 2771 FETCH(&r[2], 0, CHAN_Z); 2772 micro_mul(&r[4], &r[2], &r[2]); 2773 micro_add(&r[3], &r[3], &r[4]); 2774 micro_sqrt(&r[3], &r[3]); 2775 2776 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2777 micro_div(&r[0], &r[0], &r[3]); 2778 STORE(&r[0], 0, CHAN_X); 2779 } 2780 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2781 micro_div(&r[1], &r[1], &r[3]); 2782 STORE(&r[1], 0, CHAN_Y); 2783 } 2784 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2785 micro_div(&r[2], &r[2], &r[3]); 2786 STORE(&r[2], 0, CHAN_Z); 2787 } 2788 } 2789 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2790 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2791 } 2792 break; 2793 2794 case TGSI_OPCODE_NRM4: 2795 /* 4-component vector normalize */ 2796 { 2797 union tgsi_exec_channel tmp, dot; 2798 2799 /* tmp = dp4(src0, src0): */ 2800 FETCH( &r[0], 0, CHAN_X ); 2801 micro_mul( &tmp, &r[0], &r[0] ); 2802 2803 FETCH( &r[1], 0, CHAN_Y ); 2804 micro_mul( &dot, &r[1], &r[1] ); 2805 micro_add( &tmp, &tmp, &dot ); 2806 2807 FETCH( &r[2], 0, CHAN_Z ); 2808 micro_mul( &dot, &r[2], &r[2] ); 2809 micro_add( &tmp, &tmp, &dot ); 2810 2811 FETCH( &r[3], 0, CHAN_W ); 2812 micro_mul( &dot, &r[3], &r[3] ); 2813 micro_add( &tmp, &tmp, &dot ); 2814 2815 /* tmp = 1 / sqrt(tmp) */ 2816 micro_sqrt( &tmp, &tmp ); 2817 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2818 2819 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2820 /* chan = chan * tmp */ 2821 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2822 STORE( &r[chan_index], 0, chan_index ); 2823 } 2824 } 2825 break; 2826 2827 case TGSI_OPCODE_DIV: 2828 assert( 0 ); 2829 break; 2830 2831 case TGSI_OPCODE_DP2: 2832 FETCH( &r[0], 0, CHAN_X ); 2833 FETCH( &r[1], 1, CHAN_X ); 2834 micro_mul( &r[0], &r[0], &r[1] ); 2835 2836 FETCH( &r[1], 0, CHAN_Y ); 2837 FETCH( &r[2], 1, CHAN_Y ); 2838 micro_mul( &r[1], &r[1], &r[2] ); 2839 micro_add( &r[0], &r[0], &r[1] ); 2840 2841 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2842 STORE( &r[0], 0, chan_index ); 2843 } 2844 break; 2845 2846 case TGSI_OPCODE_IF: 2847 /* push CondMask */ 2848 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2849 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2850 FETCH( &r[0], 0, CHAN_X ); 2851 /* update CondMask */ 2852 if( ! r[0].u[0] ) { 2853 mach->CondMask &= ~0x1; 2854 } 2855 if( ! r[0].u[1] ) { 2856 mach->CondMask &= ~0x2; 2857 } 2858 if( ! r[0].u[2] ) { 2859 mach->CondMask &= ~0x4; 2860 } 2861 if( ! r[0].u[3] ) { 2862 mach->CondMask &= ~0x8; 2863 } 2864 UPDATE_EXEC_MASK(mach); 2865 /* Todo: If CondMask==0, jump to ELSE */ 2866 break; 2867 2868 case TGSI_OPCODE_ELSE: 2869 /* invert CondMask wrt previous mask */ 2870 { 2871 uint prevMask; 2872 assert(mach->CondStackTop > 0); 2873 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2874 mach->CondMask = ~mach->CondMask & prevMask; 2875 UPDATE_EXEC_MASK(mach); 2876 /* Todo: If CondMask==0, jump to ENDIF */ 2877 } 2878 break; 2879 2880 case TGSI_OPCODE_ENDIF: 2881 /* pop CondMask */ 2882 assert(mach->CondStackTop > 0); 2883 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2884 UPDATE_EXEC_MASK(mach); 2885 break; 2886 2887 case TGSI_OPCODE_END: 2888 /* halt execution */ 2889 *pc = -1; 2890 break; 2891 2892 case TGSI_OPCODE_REP: 2893 assert (0); 2894 break; 2895 2896 case TGSI_OPCODE_ENDREP: 2897 assert (0); 2898 break; 2899 2900 case TGSI_OPCODE_PUSHA: 2901 assert (0); 2902 break; 2903 2904 case TGSI_OPCODE_POPA: 2905 assert (0); 2906 break; 2907 2908 case TGSI_OPCODE_CEIL: 2909 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2910 FETCH( &r[0], 0, chan_index ); 2911 micro_ceil(&d[chan_index], &r[0]); 2912 } 2913 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2914 STORE(&d[chan_index], 0, chan_index); 2915 } 2916 break; 2917 2918 case TGSI_OPCODE_I2F: 2919 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2920 FETCH( &r[0], 0, chan_index ); 2921 micro_i2f(&d[chan_index], &r[0]); 2922 } 2923 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2924 STORE(&d[chan_index], 0, chan_index); 2925 } 2926 break; 2927 2928 case TGSI_OPCODE_NOT: 2929 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2930 FETCH( &r[0], 0, chan_index ); 2931 micro_not(&d[chan_index], &r[0]); 2932 } 2933 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2934 STORE(&d[chan_index], 0, chan_index); 2935 } 2936 break; 2937 2938 case TGSI_OPCODE_TRUNC: 2939 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2940 FETCH( &r[0], 0, chan_index ); 2941 micro_trunc(&d[chan_index], &r[0]); 2942 } 2943 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2944 STORE(&d[chan_index], 0, chan_index); 2945 } 2946 break; 2947 2948 case TGSI_OPCODE_SHL: 2949 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2950 FETCH( &r[0], 0, chan_index ); 2951 FETCH( &r[1], 1, chan_index ); 2952 micro_shl(&d[chan_index], &r[0], &r[1]); 2953 } 2954 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2955 STORE(&d[chan_index], 0, chan_index); 2956 } 2957 break; 2958 2959 case TGSI_OPCODE_SHR: 2960 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2961 FETCH( &r[0], 0, chan_index ); 2962 FETCH( &r[1], 1, chan_index ); 2963 micro_ishr(&d[chan_index], &r[0], &r[1]); 2964 } 2965 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2966 STORE(&d[chan_index], 0, chan_index); 2967 } 2968 break; 2969 2970 case TGSI_OPCODE_AND: 2971 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2972 FETCH( &r[0], 0, chan_index ); 2973 FETCH( &r[1], 1, chan_index ); 2974 micro_and(&d[chan_index], &r[0], &r[1]); 2975 } 2976 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2977 STORE(&d[chan_index], 0, chan_index); 2978 } 2979 break; 2980 2981 case TGSI_OPCODE_OR: 2982 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2983 FETCH( &r[0], 0, chan_index ); 2984 FETCH( &r[1], 1, chan_index ); 2985 micro_or(&d[chan_index], &r[0], &r[1]); 2986 } 2987 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2988 STORE(&d[chan_index], 0, chan_index); 2989 } 2990 break; 2991 2992 case TGSI_OPCODE_MOD: 2993 assert (0); 2994 break; 2995 2996 case TGSI_OPCODE_XOR: 2997 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2998 FETCH( &r[0], 0, chan_index ); 2999 FETCH( &r[1], 1, chan_index ); 3000 micro_xor(&d[chan_index], &r[0], &r[1]); 3001 } 3002 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3003 STORE(&d[chan_index], 0, chan_index); 3004 } 3005 break; 3006 3007 case TGSI_OPCODE_SAD: 3008 assert (0); 3009 break; 3010 3011 case TGSI_OPCODE_TXF: 3012 assert (0); 3013 break; 3014 3015 case TGSI_OPCODE_TXQ: 3016 assert (0); 3017 break; 3018 3019 case TGSI_OPCODE_EMIT: 3020 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 3021 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 3022 break; 3023 3024 case TGSI_OPCODE_ENDPRIM: 3025 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 3026 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 3027 break; 3028 3029 case TGSI_OPCODE_BGNFOR: 3030 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3031 for (chan_index = 0; chan_index < 3; chan_index++) { 3032 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3033 } 3034 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); 3035 ++mach->LoopCounterStackTop; 3036 /* fall-through (for now) */ 3037 case TGSI_OPCODE_BGNLOOP: 3038 /* push LoopMask and ContMasks */ 3039 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3040 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3041 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3042 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3043 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3044 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3045 break; 3046 3047 case TGSI_OPCODE_ENDFOR: 3048 assert(mach->LoopCounterStackTop > 0); 3049 micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3050 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3051 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 3052 /* update LoopMask */ 3053 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { 3054 mach->LoopMask &= ~0x1; 3055 } 3056 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { 3057 mach->LoopMask &= ~0x2; 3058 } 3059 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { 3060 mach->LoopMask &= ~0x4; 3061 } 3062 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { 3063 mach->LoopMask &= ~0x8; 3064 } 3065 micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3066 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3067 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3068 assert(mach->LoopLabelStackTop > 0); 3069 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3070 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); 3071 /* Restore ContMask, but don't pop */ 3072 assert(mach->ContStackTop > 0); 3073 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3074 UPDATE_EXEC_MASK(mach); 3075 if (mach->ExecMask) { 3076 /* repeat loop: jump to instruction just past BGNLOOP */ 3077 assert(mach->LoopLabelStackTop > 0); 3078 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3079 } 3080 else { 3081 /* exit loop: pop LoopMask */ 3082 assert(mach->LoopStackTop > 0); 3083 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3084 /* pop ContMask */ 3085 assert(mach->ContStackTop > 0); 3086 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3087 assert(mach->LoopLabelStackTop > 0); 3088 --mach->LoopLabelStackTop; 3089 assert(mach->LoopCounterStackTop > 0); 3090 --mach->LoopCounterStackTop; 3091 } 3092 UPDATE_EXEC_MASK(mach); 3093 break; 3094 3095 case TGSI_OPCODE_ENDLOOP: 3096 /* Restore ContMask, but don't pop */ 3097 assert(mach->ContStackTop > 0); 3098 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3099 UPDATE_EXEC_MASK(mach); 3100 if (mach->ExecMask) { 3101 /* repeat loop: jump to instruction just past BGNLOOP */ 3102 assert(mach->LoopLabelStackTop > 0); 3103 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3104 } 3105 else { 3106 /* exit loop: pop LoopMask */ 3107 assert(mach->LoopStackTop > 0); 3108 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3109 /* pop ContMask */ 3110 assert(mach->ContStackTop > 0); 3111 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3112 assert(mach->LoopLabelStackTop > 0); 3113 --mach->LoopLabelStackTop; 3114 } 3115 UPDATE_EXEC_MASK(mach); 3116 break; 3117 3118 case TGSI_OPCODE_BRK: 3119 /* turn off loop channels for each enabled exec channel */ 3120 mach->LoopMask &= ~mach->ExecMask; 3121 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3122 UPDATE_EXEC_MASK(mach); 3123 break; 3124 3125 case TGSI_OPCODE_CONT: 3126 /* turn off cont channels for each enabled exec channel */ 3127 mach->ContMask &= ~mach->ExecMask; 3128 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3129 UPDATE_EXEC_MASK(mach); 3130 break; 3131 3132 case TGSI_OPCODE_BGNSUB: 3133 /* no-op */ 3134 break; 3135 3136 case TGSI_OPCODE_ENDSUB: 3137 /* no-op */ 3138 break; 3139 3140 case TGSI_OPCODE_NOP: 3141 break; 3142 3143 default: 3144 assert( 0 ); 3145 } 3146} 3147 3148#define DEBUG_EXECUTION 0 3149 3150 3151/** 3152 * Run TGSI interpreter. 3153 * \return bitmask of "alive" quad components 3154 */ 3155uint 3156tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3157{ 3158 uint i; 3159 int pc = 0; 3160 3161 mach->CondMask = 0xf; 3162 mach->LoopMask = 0xf; 3163 mach->ContMask = 0xf; 3164 mach->FuncMask = 0xf; 3165 mach->ExecMask = 0xf; 3166 3167 assert(mach->CondStackTop == 0); 3168 assert(mach->LoopStackTop == 0); 3169 assert(mach->ContStackTop == 0); 3170 assert(mach->CallStackTop == 0); 3171 3172 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3173 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3174 3175 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3176 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3177 mach->Primitives[0] = 0; 3178 } 3179 3180 for (i = 0; i < QUAD_SIZE; i++) { 3181 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3182 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3183 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3184 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3185 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3186 } 3187 3188 /* execute declarations (interpolants) */ 3189 for (i = 0; i < mach->NumDeclarations; i++) { 3190 exec_declaration( mach, mach->Declarations+i ); 3191 } 3192 3193 { 3194#if DEBUG_EXECUTION 3195 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 3196 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 3197 uint inst = 1; 3198 3199 memcpy(temps, mach->Temps, sizeof(temps)); 3200 memcpy(outputs, mach->Outputs, sizeof(outputs)); 3201#endif 3202 3203 /* execute instructions, until pc is set to -1 */ 3204 while (pc != -1) { 3205 3206#if DEBUG_EXECUTION 3207 uint i; 3208 3209 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 3210#endif 3211 3212 assert(pc < (int) mach->NumInstructions); 3213 exec_instruction(mach, mach->Instructions + pc, &pc); 3214 3215#if DEBUG_EXECUTION 3216 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 3217 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 3218 uint j; 3219 3220 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 3221 debug_printf("TEMP[%2u] = ", i); 3222 for (j = 0; j < 4; j++) { 3223 if (j > 0) { 3224 debug_printf(" "); 3225 } 3226 debug_printf("(%6f, %6f, %6f, %6f)\n", 3227 temps[i].xyzw[0].f[j], 3228 temps[i].xyzw[1].f[j], 3229 temps[i].xyzw[2].f[j], 3230 temps[i].xyzw[3].f[j]); 3231 } 3232 } 3233 } 3234 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 3235 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 3236 uint j; 3237 3238 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 3239 debug_printf("OUT[%2u] = ", i); 3240 for (j = 0; j < 4; j++) { 3241 if (j > 0) { 3242 debug_printf(" "); 3243 } 3244 debug_printf("{%6f, %6f, %6f, %6f}\n", 3245 outputs[i].xyzw[0].f[j], 3246 outputs[i].xyzw[1].f[j], 3247 outputs[i].xyzw[2].f[j], 3248 outputs[i].xyzw[3].f[j]); 3249 } 3250 } 3251 } 3252#endif 3253 } 3254 } 3255 3256#if 0 3257 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3258 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3259 /* 3260 * Scale back depth component. 3261 */ 3262 for (i = 0; i < 4; i++) 3263 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3264 } 3265#endif 3266 3267 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3268} 3269