tgsi_exec.c revision 321634d80b48e33b4e9572d99e82c45c65701dd1
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_parse.h" 57#include "tgsi/tgsi_util.h" 58#include "tgsi_exec.h" 59#include "util/u_memory.h" 60#include "util/u_math.h" 61 62#define FAST_MATH 1 63 64#define TILE_TOP_LEFT 0 65#define TILE_TOP_RIGHT 1 66#define TILE_BOTTOM_LEFT 2 67#define TILE_BOTTOM_RIGHT 3 68 69#define CHAN_X 0 70#define CHAN_Y 1 71#define CHAN_Z 2 72#define CHAN_W 3 73 74/* 75 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 76 */ 77#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 78#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 79#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 80#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 81#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 82#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 83#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 84#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 85#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 86#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 87#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 88#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 89#define TEMP_128_I TGSI_EXEC_TEMP_128_I 90#define TEMP_128_C TGSI_EXEC_TEMP_128_C 91#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 92#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 93#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 94#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 95#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 96#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 97#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 98#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 99#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 100#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 101#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 102#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 103#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 104#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 105#define TEMP_R0 TGSI_EXEC_TEMP_R0 106 107#define IS_CHANNEL_ENABLED(INST, CHAN)\ 108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 109 110#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 112 113#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 115 if (IS_CHANNEL_ENABLED( INST, CHAN )) 116 117#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 119 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 120 121 122/** The execution mask depends on the conditional mask and the loop mask */ 123#define UPDATE_EXEC_MASK(MACH) \ 124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 125 126/** 127 * Initialize machine state by expanding tokens to full instructions, 128 * allocating temporary storage, setting up constants, etc. 129 * After this, we can call tgsi_exec_machine_run() many times. 130 */ 131void 132tgsi_exec_machine_bind_shader( 133 struct tgsi_exec_machine *mach, 134 const struct tgsi_token *tokens, 135 uint numSamplers, 136 struct tgsi_sampler **samplers) 137{ 138 uint k; 139 struct tgsi_parse_context parse; 140 struct tgsi_exec_labels *labels = &mach->Labels; 141 struct tgsi_full_instruction *instructions; 142 struct tgsi_full_declaration *declarations; 143 uint maxInstructions = 10, numInstructions = 0; 144 uint maxDeclarations = 10, numDeclarations = 0; 145 uint instno = 0; 146 147#if 0 148 tgsi_dump(tokens, 0); 149#endif 150 151 util_init_math(); 152 153 mach->Tokens = tokens; 154 mach->Samplers = samplers; 155 156 k = tgsi_parse_init (&parse, mach->Tokens); 157 if (k != TGSI_PARSE_OK) { 158 debug_printf( "Problem parsing!\n" ); 159 return; 160 } 161 162 mach->Processor = parse.FullHeader.Processor.Processor; 163 mach->ImmLimit = 0; 164 labels->count = 0; 165 166 declarations = (struct tgsi_full_declaration *) 167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 168 169 if (!declarations) { 170 return; 171 } 172 173 instructions = (struct tgsi_full_instruction *) 174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 175 176 if (!instructions) { 177 FREE( declarations ); 178 return; 179 } 180 181 while( !tgsi_parse_end_of_tokens( &parse ) ) { 182 uint pointer = parse.Position; 183 uint i; 184 185 tgsi_parse_token( &parse ); 186 switch( parse.FullToken.Token.Type ) { 187 case TGSI_TOKEN_TYPE_DECLARATION: 188 /* save expanded declaration */ 189 if (numDeclarations == maxDeclarations) { 190 declarations = REALLOC(declarations, 191 maxDeclarations 192 * sizeof(struct tgsi_full_declaration), 193 (maxDeclarations + 10) 194 * sizeof(struct tgsi_full_declaration)); 195 maxDeclarations += 10; 196 } 197 memcpy(declarations + numDeclarations, 198 &parse.FullToken.FullDeclaration, 199 sizeof(declarations[0])); 200 numDeclarations++; 201 break; 202 203 case TGSI_TOKEN_TYPE_IMMEDIATE: 204 { 205 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 206 assert( size % 4 == 0 ); 207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); 208 209 for( i = 0; i < size; i++ ) { 210 mach->Imms[mach->ImmLimit + i / 4][i % 4] = 211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 212 } 213 mach->ImmLimit += size / 4; 214 } 215 break; 216 217 case TGSI_TOKEN_TYPE_INSTRUCTION: 218 assert( labels->count < MAX_LABELS ); 219 220 labels->labels[labels->count][0] = instno; 221 labels->labels[labels->count][1] = pointer; 222 labels->count++; 223 224 /* save expanded instruction */ 225 if (numInstructions == maxInstructions) { 226 instructions = REALLOC(instructions, 227 maxInstructions 228 * sizeof(struct tgsi_full_instruction), 229 (maxInstructions + 10) 230 * sizeof(struct tgsi_full_instruction)); 231 maxInstructions += 10; 232 } 233 memcpy(instructions + numInstructions, 234 &parse.FullToken.FullInstruction, 235 sizeof(instructions[0])); 236 numInstructions++; 237 break; 238 239 default: 240 assert( 0 ); 241 } 242 } 243 tgsi_parse_free (&parse); 244 245 if (mach->Declarations) { 246 FREE( mach->Declarations ); 247 } 248 mach->Declarations = declarations; 249 mach->NumDeclarations = numDeclarations; 250 251 if (mach->Instructions) { 252 FREE( mach->Instructions ); 253 } 254 mach->Instructions = instructions; 255 mach->NumInstructions = numInstructions; 256} 257 258 259void 260tgsi_exec_machine_init( 261 struct tgsi_exec_machine *mach ) 262{ 263 uint i; 264 265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); 266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 267 268 /* Setup constants. */ 269 for( i = 0; i < 4; i++ ) { 270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 280 } 281} 282 283 284void 285tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) 286{ 287 if (mach->Instructions) { 288 FREE(mach->Instructions); 289 mach->Instructions = NULL; 290 mach->NumInstructions = 0; 291 } 292 if (mach->Declarations) { 293 FREE(mach->Declarations); 294 mach->Declarations = NULL; 295 mach->NumDeclarations = 0; 296 } 297} 298 299 300static void 301micro_abs( 302 union tgsi_exec_channel *dst, 303 const union tgsi_exec_channel *src ) 304{ 305 dst->f[0] = fabsf( src->f[0] ); 306 dst->f[1] = fabsf( src->f[1] ); 307 dst->f[2] = fabsf( src->f[2] ); 308 dst->f[3] = fabsf( src->f[3] ); 309} 310 311static void 312micro_add( 313 union tgsi_exec_channel *dst, 314 const union tgsi_exec_channel *src0, 315 const union tgsi_exec_channel *src1 ) 316{ 317 dst->f[0] = src0->f[0] + src1->f[0]; 318 dst->f[1] = src0->f[1] + src1->f[1]; 319 dst->f[2] = src0->f[2] + src1->f[2]; 320 dst->f[3] = src0->f[3] + src1->f[3]; 321} 322 323#if 0 324static void 325micro_iadd( 326 union tgsi_exec_channel *dst, 327 const union tgsi_exec_channel *src0, 328 const union tgsi_exec_channel *src1 ) 329{ 330 dst->i[0] = src0->i[0] + src1->i[0]; 331 dst->i[1] = src0->i[1] + src1->i[1]; 332 dst->i[2] = src0->i[2] + src1->i[2]; 333 dst->i[3] = src0->i[3] + src1->i[3]; 334} 335#endif 336 337static void 338micro_and( 339 union tgsi_exec_channel *dst, 340 const union tgsi_exec_channel *src0, 341 const union tgsi_exec_channel *src1 ) 342{ 343 dst->u[0] = src0->u[0] & src1->u[0]; 344 dst->u[1] = src0->u[1] & src1->u[1]; 345 dst->u[2] = src0->u[2] & src1->u[2]; 346 dst->u[3] = src0->u[3] & src1->u[3]; 347} 348 349static void 350micro_ceil( 351 union tgsi_exec_channel *dst, 352 const union tgsi_exec_channel *src ) 353{ 354 dst->f[0] = ceilf( src->f[0] ); 355 dst->f[1] = ceilf( src->f[1] ); 356 dst->f[2] = ceilf( src->f[2] ); 357 dst->f[3] = ceilf( src->f[3] ); 358} 359 360static void 361micro_cos( 362 union tgsi_exec_channel *dst, 363 const union tgsi_exec_channel *src ) 364{ 365 dst->f[0] = cosf( src->f[0] ); 366 dst->f[1] = cosf( src->f[1] ); 367 dst->f[2] = cosf( src->f[2] ); 368 dst->f[3] = cosf( src->f[3] ); 369} 370 371static void 372micro_ddx( 373 union tgsi_exec_channel *dst, 374 const union tgsi_exec_channel *src ) 375{ 376 dst->f[0] = 377 dst->f[1] = 378 dst->f[2] = 379 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 380} 381 382static void 383micro_ddy( 384 union tgsi_exec_channel *dst, 385 const union tgsi_exec_channel *src ) 386{ 387 dst->f[0] = 388 dst->f[1] = 389 dst->f[2] = 390 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 391} 392 393static void 394micro_div( 395 union tgsi_exec_channel *dst, 396 const union tgsi_exec_channel *src0, 397 const union tgsi_exec_channel *src1 ) 398{ 399 if (src1->f[0] != 0) { 400 dst->f[0] = src0->f[0] / src1->f[0]; 401 } 402 if (src1->f[1] != 0) { 403 dst->f[1] = src0->f[1] / src1->f[1]; 404 } 405 if (src1->f[2] != 0) { 406 dst->f[2] = src0->f[2] / src1->f[2]; 407 } 408 if (src1->f[3] != 0) { 409 dst->f[3] = src0->f[3] / src1->f[3]; 410 } 411} 412 413#if 0 414static void 415micro_udiv( 416 union tgsi_exec_channel *dst, 417 const union tgsi_exec_channel *src0, 418 const union tgsi_exec_channel *src1 ) 419{ 420 dst->u[0] = src0->u[0] / src1->u[0]; 421 dst->u[1] = src0->u[1] / src1->u[1]; 422 dst->u[2] = src0->u[2] / src1->u[2]; 423 dst->u[3] = src0->u[3] / src1->u[3]; 424} 425#endif 426 427static void 428micro_eq( 429 union tgsi_exec_channel *dst, 430 const union tgsi_exec_channel *src0, 431 const union tgsi_exec_channel *src1, 432 const union tgsi_exec_channel *src2, 433 const union tgsi_exec_channel *src3 ) 434{ 435 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 436 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 437 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 438 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 439} 440 441#if 0 442static void 443micro_ieq( 444 union tgsi_exec_channel *dst, 445 const union tgsi_exec_channel *src0, 446 const union tgsi_exec_channel *src1, 447 const union tgsi_exec_channel *src2, 448 const union tgsi_exec_channel *src3 ) 449{ 450 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 451 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 452 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 453 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 454} 455#endif 456 457static void 458micro_exp2( 459 union tgsi_exec_channel *dst, 460 const union tgsi_exec_channel *src) 461{ 462#if FAST_MATH 463 dst->f[0] = util_fast_exp2( src->f[0] ); 464 dst->f[1] = util_fast_exp2( src->f[1] ); 465 dst->f[2] = util_fast_exp2( src->f[2] ); 466 dst->f[3] = util_fast_exp2( src->f[3] ); 467#else 468 dst->f[0] = powf( 2.0f, src->f[0] ); 469 dst->f[1] = powf( 2.0f, src->f[1] ); 470 dst->f[2] = powf( 2.0f, src->f[2] ); 471 dst->f[3] = powf( 2.0f, src->f[3] ); 472#endif 473} 474 475#if 0 476static void 477micro_f2ut( 478 union tgsi_exec_channel *dst, 479 const union tgsi_exec_channel *src ) 480{ 481 dst->u[0] = (uint) src->f[0]; 482 dst->u[1] = (uint) src->f[1]; 483 dst->u[2] = (uint) src->f[2]; 484 dst->u[3] = (uint) src->f[3]; 485} 486#endif 487 488static void 489micro_float_clamp(union tgsi_exec_channel *dst, 490 const union tgsi_exec_channel *src) 491{ 492 uint i; 493 494 for (i = 0; i < 4; i++) { 495 if (src->f[i] > 0.0f) { 496 if (src->f[i] > 1.884467e+019f) 497 dst->f[i] = 1.884467e+019f; 498 else if (src->f[i] < 5.42101e-020f) 499 dst->f[i] = 5.42101e-020f; 500 else 501 dst->f[i] = src->f[i]; 502 } 503 else { 504 if (src->f[i] < -1.884467e+019f) 505 dst->f[i] = -1.884467e+019f; 506 else if (src->f[i] > -5.42101e-020f) 507 dst->f[i] = -5.42101e-020f; 508 else 509 dst->f[i] = src->f[i]; 510 } 511 } 512} 513 514static void 515micro_flr( 516 union tgsi_exec_channel *dst, 517 const union tgsi_exec_channel *src ) 518{ 519 dst->f[0] = floorf( src->f[0] ); 520 dst->f[1] = floorf( src->f[1] ); 521 dst->f[2] = floorf( src->f[2] ); 522 dst->f[3] = floorf( src->f[3] ); 523} 524 525static void 526micro_frc( 527 union tgsi_exec_channel *dst, 528 const union tgsi_exec_channel *src ) 529{ 530 dst->f[0] = src->f[0] - floorf( src->f[0] ); 531 dst->f[1] = src->f[1] - floorf( src->f[1] ); 532 dst->f[2] = src->f[2] - floorf( src->f[2] ); 533 dst->f[3] = src->f[3] - floorf( src->f[3] ); 534} 535 536static void 537micro_i2f( 538 union tgsi_exec_channel *dst, 539 const union tgsi_exec_channel *src ) 540{ 541 dst->f[0] = (float) src->i[0]; 542 dst->f[1] = (float) src->i[1]; 543 dst->f[2] = (float) src->i[2]; 544 dst->f[3] = (float) src->i[3]; 545} 546 547static void 548micro_lg2( 549 union tgsi_exec_channel *dst, 550 const union tgsi_exec_channel *src ) 551{ 552#if FAST_MATH 553 dst->f[0] = util_fast_log2( src->f[0] ); 554 dst->f[1] = util_fast_log2( src->f[1] ); 555 dst->f[2] = util_fast_log2( src->f[2] ); 556 dst->f[3] = util_fast_log2( src->f[3] ); 557#else 558 dst->f[0] = logf( src->f[0] ) * 1.442695f; 559 dst->f[1] = logf( src->f[1] ) * 1.442695f; 560 dst->f[2] = logf( src->f[2] ) * 1.442695f; 561 dst->f[3] = logf( src->f[3] ) * 1.442695f; 562#endif 563} 564 565static void 566micro_le( 567 union tgsi_exec_channel *dst, 568 const union tgsi_exec_channel *src0, 569 const union tgsi_exec_channel *src1, 570 const union tgsi_exec_channel *src2, 571 const union tgsi_exec_channel *src3 ) 572{ 573 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 574 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 575 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 576 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 577} 578 579static void 580micro_lt( 581 union tgsi_exec_channel *dst, 582 const union tgsi_exec_channel *src0, 583 const union tgsi_exec_channel *src1, 584 const union tgsi_exec_channel *src2, 585 const union tgsi_exec_channel *src3 ) 586{ 587 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 588 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 589 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 590 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 591} 592 593#if 0 594static void 595micro_ilt( 596 union tgsi_exec_channel *dst, 597 const union tgsi_exec_channel *src0, 598 const union tgsi_exec_channel *src1, 599 const union tgsi_exec_channel *src2, 600 const union tgsi_exec_channel *src3 ) 601{ 602 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 603 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 604 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 605 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 606} 607#endif 608 609#if 0 610static void 611micro_ult( 612 union tgsi_exec_channel *dst, 613 const union tgsi_exec_channel *src0, 614 const union tgsi_exec_channel *src1, 615 const union tgsi_exec_channel *src2, 616 const union tgsi_exec_channel *src3 ) 617{ 618 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 619 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 620 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 621 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 622} 623#endif 624 625static void 626micro_max( 627 union tgsi_exec_channel *dst, 628 const union tgsi_exec_channel *src0, 629 const union tgsi_exec_channel *src1 ) 630{ 631 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 632 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 633 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 634 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 635} 636 637#if 0 638static void 639micro_imax( 640 union tgsi_exec_channel *dst, 641 const union tgsi_exec_channel *src0, 642 const union tgsi_exec_channel *src1 ) 643{ 644 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 645 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 646 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 647 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 648} 649#endif 650 651#if 0 652static void 653micro_umax( 654 union tgsi_exec_channel *dst, 655 const union tgsi_exec_channel *src0, 656 const union tgsi_exec_channel *src1 ) 657{ 658 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 659 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 660 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 661 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 662} 663#endif 664 665static void 666micro_min( 667 union tgsi_exec_channel *dst, 668 const union tgsi_exec_channel *src0, 669 const union tgsi_exec_channel *src1 ) 670{ 671 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 672 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 673 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 674 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 675} 676 677#if 0 678static void 679micro_imin( 680 union tgsi_exec_channel *dst, 681 const union tgsi_exec_channel *src0, 682 const union tgsi_exec_channel *src1 ) 683{ 684 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 685 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 686 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 687 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 688} 689#endif 690 691#if 0 692static void 693micro_umin( 694 union tgsi_exec_channel *dst, 695 const union tgsi_exec_channel *src0, 696 const union tgsi_exec_channel *src1 ) 697{ 698 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 699 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 700 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 701 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 702} 703#endif 704 705#if 0 706static void 707micro_umod( 708 union tgsi_exec_channel *dst, 709 const union tgsi_exec_channel *src0, 710 const union tgsi_exec_channel *src1 ) 711{ 712 dst->u[0] = src0->u[0] % src1->u[0]; 713 dst->u[1] = src0->u[1] % src1->u[1]; 714 dst->u[2] = src0->u[2] % src1->u[2]; 715 dst->u[3] = src0->u[3] % src1->u[3]; 716} 717#endif 718 719static void 720micro_mul( 721 union tgsi_exec_channel *dst, 722 const union tgsi_exec_channel *src0, 723 const union tgsi_exec_channel *src1 ) 724{ 725 dst->f[0] = src0->f[0] * src1->f[0]; 726 dst->f[1] = src0->f[1] * src1->f[1]; 727 dst->f[2] = src0->f[2] * src1->f[2]; 728 dst->f[3] = src0->f[3] * src1->f[3]; 729} 730 731#if 0 732static void 733micro_imul( 734 union tgsi_exec_channel *dst, 735 const union tgsi_exec_channel *src0, 736 const union tgsi_exec_channel *src1 ) 737{ 738 dst->i[0] = src0->i[0] * src1->i[0]; 739 dst->i[1] = src0->i[1] * src1->i[1]; 740 dst->i[2] = src0->i[2] * src1->i[2]; 741 dst->i[3] = src0->i[3] * src1->i[3]; 742} 743#endif 744 745#if 0 746static void 747micro_imul64( 748 union tgsi_exec_channel *dst0, 749 union tgsi_exec_channel *dst1, 750 const union tgsi_exec_channel *src0, 751 const union tgsi_exec_channel *src1 ) 752{ 753 dst1->i[0] = src0->i[0] * src1->i[0]; 754 dst1->i[1] = src0->i[1] * src1->i[1]; 755 dst1->i[2] = src0->i[2] * src1->i[2]; 756 dst1->i[3] = src0->i[3] * src1->i[3]; 757 dst0->i[0] = 0; 758 dst0->i[1] = 0; 759 dst0->i[2] = 0; 760 dst0->i[3] = 0; 761} 762#endif 763 764#if 0 765static void 766micro_umul64( 767 union tgsi_exec_channel *dst0, 768 union tgsi_exec_channel *dst1, 769 const union tgsi_exec_channel *src0, 770 const union tgsi_exec_channel *src1 ) 771{ 772 dst1->u[0] = src0->u[0] * src1->u[0]; 773 dst1->u[1] = src0->u[1] * src1->u[1]; 774 dst1->u[2] = src0->u[2] * src1->u[2]; 775 dst1->u[3] = src0->u[3] * src1->u[3]; 776 dst0->u[0] = 0; 777 dst0->u[1] = 0; 778 dst0->u[2] = 0; 779 dst0->u[3] = 0; 780} 781#endif 782 783 784#if 0 785static void 786micro_movc( 787 union tgsi_exec_channel *dst, 788 const union tgsi_exec_channel *src0, 789 const union tgsi_exec_channel *src1, 790 const union tgsi_exec_channel *src2 ) 791{ 792 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 793 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 794 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 795 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 796} 797#endif 798 799static void 800micro_neg( 801 union tgsi_exec_channel *dst, 802 const union tgsi_exec_channel *src ) 803{ 804 dst->f[0] = -src->f[0]; 805 dst->f[1] = -src->f[1]; 806 dst->f[2] = -src->f[2]; 807 dst->f[3] = -src->f[3]; 808} 809 810#if 0 811static void 812micro_ineg( 813 union tgsi_exec_channel *dst, 814 const union tgsi_exec_channel *src ) 815{ 816 dst->i[0] = -src->i[0]; 817 dst->i[1] = -src->i[1]; 818 dst->i[2] = -src->i[2]; 819 dst->i[3] = -src->i[3]; 820} 821#endif 822 823static void 824micro_not( 825 union tgsi_exec_channel *dst, 826 const union tgsi_exec_channel *src ) 827{ 828 dst->u[0] = ~src->u[0]; 829 dst->u[1] = ~src->u[1]; 830 dst->u[2] = ~src->u[2]; 831 dst->u[3] = ~src->u[3]; 832} 833 834static void 835micro_or( 836 union tgsi_exec_channel *dst, 837 const union tgsi_exec_channel *src0, 838 const union tgsi_exec_channel *src1 ) 839{ 840 dst->u[0] = src0->u[0] | src1->u[0]; 841 dst->u[1] = src0->u[1] | src1->u[1]; 842 dst->u[2] = src0->u[2] | src1->u[2]; 843 dst->u[3] = src0->u[3] | src1->u[3]; 844} 845 846static void 847micro_pow( 848 union tgsi_exec_channel *dst, 849 const union tgsi_exec_channel *src0, 850 const union tgsi_exec_channel *src1 ) 851{ 852#if FAST_MATH 853 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 854 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 855 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 856 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 857#else 858 dst->f[0] = powf( src0->f[0], src1->f[0] ); 859 dst->f[1] = powf( src0->f[1], src1->f[1] ); 860 dst->f[2] = powf( src0->f[2], src1->f[2] ); 861 dst->f[3] = powf( src0->f[3], src1->f[3] ); 862#endif 863} 864 865static void 866micro_rnd( 867 union tgsi_exec_channel *dst, 868 const union tgsi_exec_channel *src ) 869{ 870 dst->f[0] = floorf( src->f[0] + 0.5f ); 871 dst->f[1] = floorf( src->f[1] + 0.5f ); 872 dst->f[2] = floorf( src->f[2] + 0.5f ); 873 dst->f[3] = floorf( src->f[3] + 0.5f ); 874} 875 876static void 877micro_sgn( 878 union tgsi_exec_channel *dst, 879 const union tgsi_exec_channel *src ) 880{ 881 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 882 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 883 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 884 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 885} 886 887static void 888micro_shl( 889 union tgsi_exec_channel *dst, 890 const union tgsi_exec_channel *src0, 891 const union tgsi_exec_channel *src1 ) 892{ 893 dst->i[0] = src0->i[0] << src1->i[0]; 894 dst->i[1] = src0->i[1] << src1->i[1]; 895 dst->i[2] = src0->i[2] << src1->i[2]; 896 dst->i[3] = src0->i[3] << src1->i[3]; 897} 898 899static void 900micro_ishr( 901 union tgsi_exec_channel *dst, 902 const union tgsi_exec_channel *src0, 903 const union tgsi_exec_channel *src1 ) 904{ 905 dst->i[0] = src0->i[0] >> src1->i[0]; 906 dst->i[1] = src0->i[1] >> src1->i[1]; 907 dst->i[2] = src0->i[2] >> src1->i[2]; 908 dst->i[3] = src0->i[3] >> src1->i[3]; 909} 910 911static void 912micro_trunc( 913 union tgsi_exec_channel *dst, 914 const union tgsi_exec_channel *src0 ) 915{ 916 dst->f[0] = (float) (int) src0->f[0]; 917 dst->f[1] = (float) (int) src0->f[1]; 918 dst->f[2] = (float) (int) src0->f[2]; 919 dst->f[3] = (float) (int) src0->f[3]; 920} 921 922#if 0 923static void 924micro_ushr( 925 union tgsi_exec_channel *dst, 926 const union tgsi_exec_channel *src0, 927 const union tgsi_exec_channel *src1 ) 928{ 929 dst->u[0] = src0->u[0] >> src1->u[0]; 930 dst->u[1] = src0->u[1] >> src1->u[1]; 931 dst->u[2] = src0->u[2] >> src1->u[2]; 932 dst->u[3] = src0->u[3] >> src1->u[3]; 933} 934#endif 935 936static void 937micro_sin( 938 union tgsi_exec_channel *dst, 939 const union tgsi_exec_channel *src ) 940{ 941 dst->f[0] = sinf( src->f[0] ); 942 dst->f[1] = sinf( src->f[1] ); 943 dst->f[2] = sinf( src->f[2] ); 944 dst->f[3] = sinf( src->f[3] ); 945} 946 947static void 948micro_sqrt( union tgsi_exec_channel *dst, 949 const union tgsi_exec_channel *src ) 950{ 951 dst->f[0] = sqrtf( src->f[0] ); 952 dst->f[1] = sqrtf( src->f[1] ); 953 dst->f[2] = sqrtf( src->f[2] ); 954 dst->f[3] = sqrtf( src->f[3] ); 955} 956 957static void 958micro_sub( 959 union tgsi_exec_channel *dst, 960 const union tgsi_exec_channel *src0, 961 const union tgsi_exec_channel *src1 ) 962{ 963 dst->f[0] = src0->f[0] - src1->f[0]; 964 dst->f[1] = src0->f[1] - src1->f[1]; 965 dst->f[2] = src0->f[2] - src1->f[2]; 966 dst->f[3] = src0->f[3] - src1->f[3]; 967} 968 969#if 0 970static void 971micro_u2f( 972 union tgsi_exec_channel *dst, 973 const union tgsi_exec_channel *src ) 974{ 975 dst->f[0] = (float) src->u[0]; 976 dst->f[1] = (float) src->u[1]; 977 dst->f[2] = (float) src->u[2]; 978 dst->f[3] = (float) src->u[3]; 979} 980#endif 981 982static void 983micro_xor( 984 union tgsi_exec_channel *dst, 985 const union tgsi_exec_channel *src0, 986 const union tgsi_exec_channel *src1 ) 987{ 988 dst->u[0] = src0->u[0] ^ src1->u[0]; 989 dst->u[1] = src0->u[1] ^ src1->u[1]; 990 dst->u[2] = src0->u[2] ^ src1->u[2]; 991 dst->u[3] = src0->u[3] ^ src1->u[3]; 992} 993 994static void 995fetch_src_file_channel( 996 const struct tgsi_exec_machine *mach, 997 const uint file, 998 const uint swizzle, 999 const union tgsi_exec_channel *index, 1000 union tgsi_exec_channel *chan ) 1001{ 1002 switch( swizzle ) { 1003 case TGSI_EXTSWIZZLE_X: 1004 case TGSI_EXTSWIZZLE_Y: 1005 case TGSI_EXTSWIZZLE_Z: 1006 case TGSI_EXTSWIZZLE_W: 1007 switch( file ) { 1008 case TGSI_FILE_CONSTANT: 1009 assert(mach->Consts); 1010 if (index->i[0] < 0) 1011 chan->f[0] = 0.0f; 1012 else 1013 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1014 if (index->i[1] < 0) 1015 chan->f[1] = 0.0f; 1016 else 1017 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1018 if (index->i[2] < 0) 1019 chan->f[2] = 0.0f; 1020 else 1021 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1022 if (index->i[3] < 0) 1023 chan->f[3] = 0.0f; 1024 else 1025 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1026 break; 1027 1028 case TGSI_FILE_INPUT: 1029 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1030 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1031 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1032 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1033 break; 1034 1035 case TGSI_FILE_TEMPORARY: 1036 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1037 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1038 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1039 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1040 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1041 break; 1042 1043 case TGSI_FILE_IMMEDIATE: 1044 assert( index->i[0] < (int) mach->ImmLimit ); 1045 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1046 assert( index->i[1] < (int) mach->ImmLimit ); 1047 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1048 assert( index->i[2] < (int) mach->ImmLimit ); 1049 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1050 assert( index->i[3] < (int) mach->ImmLimit ); 1051 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1052 break; 1053 1054 case TGSI_FILE_ADDRESS: 1055 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1056 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1057 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1058 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1059 break; 1060 1061 case TGSI_FILE_OUTPUT: 1062 /* vertex/fragment output vars can be read too */ 1063 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1064 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1065 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1066 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1067 break; 1068 1069 default: 1070 assert( 0 ); 1071 } 1072 break; 1073 1074 case TGSI_EXTSWIZZLE_ZERO: 1075 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1076 break; 1077 1078 case TGSI_EXTSWIZZLE_ONE: 1079 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1080 break; 1081 1082 default: 1083 assert( 0 ); 1084 } 1085} 1086 1087static void 1088fetch_source( 1089 const struct tgsi_exec_machine *mach, 1090 union tgsi_exec_channel *chan, 1091 const struct tgsi_full_src_register *reg, 1092 const uint chan_index ) 1093{ 1094 union tgsi_exec_channel index; 1095 uint swizzle; 1096 1097 /* We start with a direct index into a register file. 1098 * 1099 * file[1], 1100 * where: 1101 * file = SrcRegister.File 1102 * [1] = SrcRegister.Index 1103 */ 1104 index.i[0] = 1105 index.i[1] = 1106 index.i[2] = 1107 index.i[3] = reg->SrcRegister.Index; 1108 1109 /* There is an extra source register that indirectly subscripts 1110 * a register file. The direct index now becomes an offset 1111 * that is being added to the indirect register. 1112 * 1113 * file[ind[2].x+1], 1114 * where: 1115 * ind = SrcRegisterInd.File 1116 * [2] = SrcRegisterInd.Index 1117 * .x = SrcRegisterInd.SwizzleX 1118 */ 1119 if (reg->SrcRegister.Indirect) { 1120 union tgsi_exec_channel index2; 1121 union tgsi_exec_channel indir_index; 1122 const uint execmask = mach->ExecMask; 1123 uint i; 1124 1125 /* which address register (always zero now) */ 1126 index2.i[0] = 1127 index2.i[1] = 1128 index2.i[2] = 1129 index2.i[3] = reg->SrcRegisterInd.Index; 1130 1131 /* get current value of address register[swizzle] */ 1132 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1133 fetch_src_file_channel( 1134 mach, 1135 reg->SrcRegisterInd.File, 1136 swizzle, 1137 &index2, 1138 &indir_index ); 1139 1140 /* add value of address register to the offset */ 1141 index.i[0] += (int) indir_index.f[0]; 1142 index.i[1] += (int) indir_index.f[1]; 1143 index.i[2] += (int) indir_index.f[2]; 1144 index.i[3] += (int) indir_index.f[3]; 1145 1146 /* for disabled execution channels, zero-out the index to 1147 * avoid using a potential garbage value. 1148 */ 1149 for (i = 0; i < QUAD_SIZE; i++) { 1150 if ((execmask & (1 << i)) == 0) 1151 index.i[i] = 0; 1152 } 1153 } 1154 1155 /* There is an extra source register that is a second 1156 * subscript to a register file. Effectively it means that 1157 * the register file is actually a 2D array of registers. 1158 * 1159 * file[1][3] == file[1*sizeof(file[1])+3], 1160 * where: 1161 * [3] = SrcRegisterDim.Index 1162 */ 1163 if (reg->SrcRegister.Dimension) { 1164 /* The size of the first-order array depends on the register file type. 1165 * We need to multiply the index to the first array to get an effective, 1166 * "flat" index that points to the beginning of the second-order array. 1167 */ 1168 switch (reg->SrcRegister.File) { 1169 case TGSI_FILE_INPUT: 1170 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1171 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1172 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1173 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1174 break; 1175 case TGSI_FILE_CONSTANT: 1176 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1177 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1178 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1179 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1180 break; 1181 default: 1182 assert( 0 ); 1183 } 1184 1185 index.i[0] += reg->SrcRegisterDim.Index; 1186 index.i[1] += reg->SrcRegisterDim.Index; 1187 index.i[2] += reg->SrcRegisterDim.Index; 1188 index.i[3] += reg->SrcRegisterDim.Index; 1189 1190 /* Again, the second subscript index can be addressed indirectly 1191 * identically to the first one. 1192 * Nothing stops us from indirectly addressing the indirect register, 1193 * but there is no need for that, so we won't exercise it. 1194 * 1195 * file[1][ind[4].y+3], 1196 * where: 1197 * ind = SrcRegisterDimInd.File 1198 * [4] = SrcRegisterDimInd.Index 1199 * .y = SrcRegisterDimInd.SwizzleX 1200 */ 1201 if (reg->SrcRegisterDim.Indirect) { 1202 union tgsi_exec_channel index2; 1203 union tgsi_exec_channel indir_index; 1204 const uint execmask = mach->ExecMask; 1205 uint i; 1206 1207 index2.i[0] = 1208 index2.i[1] = 1209 index2.i[2] = 1210 index2.i[3] = reg->SrcRegisterDimInd.Index; 1211 1212 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1213 fetch_src_file_channel( 1214 mach, 1215 reg->SrcRegisterDimInd.File, 1216 swizzle, 1217 &index2, 1218 &indir_index ); 1219 1220 index.i[0] += (int) indir_index.f[0]; 1221 index.i[1] += (int) indir_index.f[1]; 1222 index.i[2] += (int) indir_index.f[2]; 1223 index.i[3] += (int) indir_index.f[3]; 1224 1225 /* for disabled execution channels, zero-out the index to 1226 * avoid using a potential garbage value. 1227 */ 1228 for (i = 0; i < QUAD_SIZE; i++) { 1229 if ((execmask & (1 << i)) == 0) 1230 index.i[i] = 0; 1231 } 1232 } 1233 1234 /* If by any chance there was a need for a 3D array of register 1235 * files, we would have to check whether SrcRegisterDim is followed 1236 * by a dimension register and continue the saga. 1237 */ 1238 } 1239 1240 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1241 fetch_src_file_channel( 1242 mach, 1243 reg->SrcRegister.File, 1244 swizzle, 1245 &index, 1246 chan ); 1247 1248 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1249 case TGSI_UTIL_SIGN_CLEAR: 1250 micro_abs( chan, chan ); 1251 break; 1252 1253 case TGSI_UTIL_SIGN_SET: 1254 micro_abs( chan, chan ); 1255 micro_neg( chan, chan ); 1256 break; 1257 1258 case TGSI_UTIL_SIGN_TOGGLE: 1259 micro_neg( chan, chan ); 1260 break; 1261 1262 case TGSI_UTIL_SIGN_KEEP: 1263 break; 1264 } 1265 1266 if (reg->SrcRegisterExtMod.Complement) { 1267 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1268 } 1269} 1270 1271static void 1272store_dest( 1273 struct tgsi_exec_machine *mach, 1274 const union tgsi_exec_channel *chan, 1275 const struct tgsi_full_dst_register *reg, 1276 const struct tgsi_full_instruction *inst, 1277 uint chan_index ) 1278{ 1279 uint i; 1280 union tgsi_exec_channel null; 1281 union tgsi_exec_channel *dst; 1282 uint execmask = mach->ExecMask; 1283 1284 switch (reg->DstRegister.File) { 1285 case TGSI_FILE_NULL: 1286 dst = &null; 1287 break; 1288 1289 case TGSI_FILE_OUTPUT: 1290 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1291 + reg->DstRegister.Index].xyzw[chan_index]; 1292 break; 1293 1294 case TGSI_FILE_TEMPORARY: 1295 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); 1296 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 1297 break; 1298 1299 case TGSI_FILE_ADDRESS: 1300 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 1301 break; 1302 1303 default: 1304 assert( 0 ); 1305 return; 1306 } 1307 1308 if (inst->InstructionExtNv.CondFlowEnable) { 1309 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1310 uint swizzle; 1311 uint shift; 1312 uint mask; 1313 uint test; 1314 1315 /* Only CC0 supported. 1316 */ 1317 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1318 1319 switch (chan_index) { 1320 case CHAN_X: 1321 swizzle = inst->InstructionExtNv.CondSwizzleX; 1322 break; 1323 case CHAN_Y: 1324 swizzle = inst->InstructionExtNv.CondSwizzleY; 1325 break; 1326 case CHAN_Z: 1327 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1328 break; 1329 case CHAN_W: 1330 swizzle = inst->InstructionExtNv.CondSwizzleW; 1331 break; 1332 default: 1333 assert( 0 ); 1334 return; 1335 } 1336 1337 switch (swizzle) { 1338 case TGSI_SWIZZLE_X: 1339 shift = TGSI_EXEC_CC_X_SHIFT; 1340 mask = TGSI_EXEC_CC_X_MASK; 1341 break; 1342 case TGSI_SWIZZLE_Y: 1343 shift = TGSI_EXEC_CC_Y_SHIFT; 1344 mask = TGSI_EXEC_CC_Y_MASK; 1345 break; 1346 case TGSI_SWIZZLE_Z: 1347 shift = TGSI_EXEC_CC_Z_SHIFT; 1348 mask = TGSI_EXEC_CC_Z_MASK; 1349 break; 1350 case TGSI_SWIZZLE_W: 1351 shift = TGSI_EXEC_CC_W_SHIFT; 1352 mask = TGSI_EXEC_CC_W_MASK; 1353 break; 1354 default: 1355 assert( 0 ); 1356 return; 1357 } 1358 1359 switch (inst->InstructionExtNv.CondMask) { 1360 case TGSI_CC_GT: 1361 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1362 for (i = 0; i < QUAD_SIZE; i++) 1363 if (cc->u[i] & test) 1364 execmask &= ~(1 << i); 1365 break; 1366 1367 case TGSI_CC_EQ: 1368 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1369 for (i = 0; i < QUAD_SIZE; i++) 1370 if (cc->u[i] & test) 1371 execmask &= ~(1 << i); 1372 break; 1373 1374 case TGSI_CC_LT: 1375 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1376 for (i = 0; i < QUAD_SIZE; i++) 1377 if (cc->u[i] & test) 1378 execmask &= ~(1 << i); 1379 break; 1380 1381 case TGSI_CC_GE: 1382 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1383 for (i = 0; i < QUAD_SIZE; i++) 1384 if (cc->u[i] & test) 1385 execmask &= ~(1 << i); 1386 break; 1387 1388 case TGSI_CC_LE: 1389 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1390 for (i = 0; i < QUAD_SIZE; i++) 1391 if (cc->u[i] & test) 1392 execmask &= ~(1 << i); 1393 break; 1394 1395 case TGSI_CC_NE: 1396 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1397 for (i = 0; i < QUAD_SIZE; i++) 1398 if (cc->u[i] & test) 1399 execmask &= ~(1 << i); 1400 break; 1401 1402 case TGSI_CC_TR: 1403 break; 1404 1405 case TGSI_CC_FL: 1406 for (i = 0; i < QUAD_SIZE; i++) 1407 execmask &= ~(1 << i); 1408 break; 1409 1410 default: 1411 assert( 0 ); 1412 return; 1413 } 1414 } 1415 1416 switch (inst->Instruction.Saturate) { 1417 case TGSI_SAT_NONE: 1418 for (i = 0; i < QUAD_SIZE; i++) 1419 if (execmask & (1 << i)) 1420 dst->i[i] = chan->i[i]; 1421 break; 1422 1423 case TGSI_SAT_ZERO_ONE: 1424 for (i = 0; i < QUAD_SIZE; i++) 1425 if (execmask & (1 << i)) { 1426 if (chan->f[i] < 0.0f) 1427 dst->f[i] = 0.0f; 1428 else if (chan->f[i] > 1.0f) 1429 dst->f[i] = 1.0f; 1430 else 1431 dst->i[i] = chan->i[i]; 1432 } 1433 break; 1434 1435 case TGSI_SAT_MINUS_PLUS_ONE: 1436 for (i = 0; i < QUAD_SIZE; i++) 1437 if (execmask & (1 << i)) { 1438 if (chan->f[i] < -1.0f) 1439 dst->f[i] = -1.0f; 1440 else if (chan->f[i] > 1.0f) 1441 dst->f[i] = 1.0f; 1442 else 1443 dst->i[i] = chan->i[i]; 1444 } 1445 break; 1446 1447 default: 1448 assert( 0 ); 1449 } 1450 1451 if (inst->InstructionExtNv.CondDstUpdate) { 1452 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1453 uint shift; 1454 uint mask; 1455 1456 /* Only CC0 supported. 1457 */ 1458 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1459 1460 switch (chan_index) { 1461 case CHAN_X: 1462 shift = TGSI_EXEC_CC_X_SHIFT; 1463 mask = ~TGSI_EXEC_CC_X_MASK; 1464 break; 1465 case CHAN_Y: 1466 shift = TGSI_EXEC_CC_Y_SHIFT; 1467 mask = ~TGSI_EXEC_CC_Y_MASK; 1468 break; 1469 case CHAN_Z: 1470 shift = TGSI_EXEC_CC_Z_SHIFT; 1471 mask = ~TGSI_EXEC_CC_Z_MASK; 1472 break; 1473 case CHAN_W: 1474 shift = TGSI_EXEC_CC_W_SHIFT; 1475 mask = ~TGSI_EXEC_CC_W_MASK; 1476 break; 1477 default: 1478 assert( 0 ); 1479 return; 1480 } 1481 1482 for (i = 0; i < QUAD_SIZE; i++) 1483 if (execmask & (1 << i)) { 1484 cc->u[i] &= mask; 1485 if (dst->f[i] < 0.0f) 1486 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1487 else if (dst->f[i] > 0.0f) 1488 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1489 else if (dst->f[i] == 0.0f) 1490 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1491 else 1492 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1493 } 1494 } 1495} 1496 1497#define FETCH(VAL,INDEX,CHAN)\ 1498 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1499 1500#define STORE(VAL,INDEX,CHAN)\ 1501 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1502 1503 1504/** 1505 * Execute ARB-style KIL which is predicated by a src register. 1506 * Kill fragment if any of the four values is less than zero. 1507 */ 1508static void 1509exec_kil(struct tgsi_exec_machine *mach, 1510 const struct tgsi_full_instruction *inst) 1511{ 1512 uint uniquemask; 1513 uint chan_index; 1514 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1515 union tgsi_exec_channel r[1]; 1516 1517 /* This mask stores component bits that were already tested. Note that 1518 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1519 * tested. */ 1520 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1521 1522 for (chan_index = 0; chan_index < 4; chan_index++) 1523 { 1524 uint swizzle; 1525 uint i; 1526 1527 /* unswizzle channel */ 1528 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1529 &inst->FullSrcRegisters[0], 1530 chan_index); 1531 1532 /* check if the component has not been already tested */ 1533 if (uniquemask & (1 << swizzle)) 1534 continue; 1535 uniquemask |= 1 << swizzle; 1536 1537 FETCH(&r[0], 0, chan_index); 1538 for (i = 0; i < 4; i++) 1539 if (r[0].f[i] < 0.0f) 1540 kilmask |= 1 << i; 1541 } 1542 1543 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1544} 1545 1546/** 1547 * Execute NVIDIA-style KIL which is predicated by a condition code. 1548 * Kill fragment if the condition code is TRUE. 1549 */ 1550static void 1551exec_kilp(struct tgsi_exec_machine *mach, 1552 const struct tgsi_full_instruction *inst) 1553{ 1554 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1555 1556 if (inst->InstructionExtNv.CondFlowEnable) { 1557 uint swizzle[4]; 1558 uint chan_index; 1559 1560 kilmask = 0x0; 1561 1562 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1563 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1564 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1565 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1566 1567 for (chan_index = 0; chan_index < 4; chan_index++) 1568 { 1569 uint i; 1570 1571 for (i = 0; i < 4; i++) { 1572 /* TODO: evaluate the condition code */ 1573 if (0) 1574 kilmask |= 1 << i; 1575 } 1576 } 1577 } 1578 else { 1579 /* "unconditional" kil */ 1580 kilmask = mach->ExecMask; 1581 } 1582 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1583} 1584 1585 1586/* 1587 * Fetch a four texture samples using STR texture coordinates. 1588 */ 1589static void 1590fetch_texel( struct tgsi_sampler *sampler, 1591 const union tgsi_exec_channel *s, 1592 const union tgsi_exec_channel *t, 1593 const union tgsi_exec_channel *p, 1594 float lodbias, /* XXX should be float[4] */ 1595 union tgsi_exec_channel *r, 1596 union tgsi_exec_channel *g, 1597 union tgsi_exec_channel *b, 1598 union tgsi_exec_channel *a ) 1599{ 1600 uint j; 1601 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1602 1603 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1604 1605 for (j = 0; j < 4; j++) { 1606 r->f[j] = rgba[0][j]; 1607 g->f[j] = rgba[1][j]; 1608 b->f[j] = rgba[2][j]; 1609 a->f[j] = rgba[3][j]; 1610 } 1611} 1612 1613 1614static void 1615exec_tex(struct tgsi_exec_machine *mach, 1616 const struct tgsi_full_instruction *inst, 1617 boolean biasLod, 1618 boolean projected) 1619{ 1620 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1621 union tgsi_exec_channel r[4]; 1622 uint chan_index; 1623 float lodBias; 1624 1625 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1626 1627 switch (inst->InstructionExtTexture.Texture) { 1628 case TGSI_TEXTURE_1D: 1629 case TGSI_TEXTURE_SHADOW1D: 1630 1631 FETCH(&r[0], 0, CHAN_X); 1632 1633 if (projected) { 1634 FETCH(&r[1], 0, CHAN_W); 1635 micro_div( &r[0], &r[0], &r[1] ); 1636 } 1637 1638 if (biasLod) { 1639 FETCH(&r[1], 0, CHAN_W); 1640 lodBias = r[2].f[0]; 1641 } 1642 else 1643 lodBias = 0.0; 1644 1645 fetch_texel(mach->Samplers[unit], 1646 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ 1647 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1648 break; 1649 1650 case TGSI_TEXTURE_2D: 1651 case TGSI_TEXTURE_RECT: 1652 case TGSI_TEXTURE_SHADOW2D: 1653 case TGSI_TEXTURE_SHADOWRECT: 1654 1655 FETCH(&r[0], 0, CHAN_X); 1656 FETCH(&r[1], 0, CHAN_Y); 1657 FETCH(&r[2], 0, CHAN_Z); 1658 1659 if (projected) { 1660 FETCH(&r[3], 0, CHAN_W); 1661 micro_div( &r[0], &r[0], &r[3] ); 1662 micro_div( &r[1], &r[1], &r[3] ); 1663 micro_div( &r[2], &r[2], &r[3] ); 1664 } 1665 1666 if (biasLod) { 1667 FETCH(&r[3], 0, CHAN_W); 1668 lodBias = r[3].f[0]; 1669 } 1670 else 1671 lodBias = 0.0; 1672 1673 fetch_texel(mach->Samplers[unit], 1674 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1675 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1676 break; 1677 1678 case TGSI_TEXTURE_3D: 1679 case TGSI_TEXTURE_CUBE: 1680 1681 FETCH(&r[0], 0, CHAN_X); 1682 FETCH(&r[1], 0, CHAN_Y); 1683 FETCH(&r[2], 0, CHAN_Z); 1684 1685 if (projected) { 1686 FETCH(&r[3], 0, CHAN_W); 1687 micro_div( &r[0], &r[0], &r[3] ); 1688 micro_div( &r[1], &r[1], &r[3] ); 1689 micro_div( &r[2], &r[2], &r[3] ); 1690 } 1691 1692 if (biasLod) { 1693 FETCH(&r[3], 0, CHAN_W); 1694 lodBias = r[3].f[0]; 1695 } 1696 else 1697 lodBias = 0.0; 1698 1699 fetch_texel(mach->Samplers[unit], 1700 &r[0], &r[1], &r[2], lodBias, 1701 &r[0], &r[1], &r[2], &r[3]); 1702 break; 1703 1704 default: 1705 assert (0); 1706 } 1707 1708 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1709 STORE( &r[chan_index], 0, chan_index ); 1710 } 1711} 1712 1713 1714/** 1715 * Evaluate a constant-valued coefficient at the position of the 1716 * current quad. 1717 */ 1718static void 1719eval_constant_coef( 1720 struct tgsi_exec_machine *mach, 1721 unsigned attrib, 1722 unsigned chan ) 1723{ 1724 unsigned i; 1725 1726 for( i = 0; i < QUAD_SIZE; i++ ) { 1727 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1728 } 1729} 1730 1731/** 1732 * Evaluate a linear-valued coefficient at the position of the 1733 * current quad. 1734 */ 1735static void 1736eval_linear_coef( 1737 struct tgsi_exec_machine *mach, 1738 unsigned attrib, 1739 unsigned chan ) 1740{ 1741 const float x = mach->QuadPos.xyzw[0].f[0]; 1742 const float y = mach->QuadPos.xyzw[1].f[0]; 1743 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1744 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1745 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1746 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1747 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1748 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1749 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1750} 1751 1752/** 1753 * Evaluate a perspective-valued coefficient at the position of the 1754 * current quad. 1755 */ 1756static void 1757eval_perspective_coef( 1758 struct tgsi_exec_machine *mach, 1759 unsigned attrib, 1760 unsigned chan ) 1761{ 1762 const float x = mach->QuadPos.xyzw[0].f[0]; 1763 const float y = mach->QuadPos.xyzw[1].f[0]; 1764 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1765 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1766 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1767 const float *w = mach->QuadPos.xyzw[3].f; 1768 /* divide by W here */ 1769 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1770 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1771 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1772 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1773} 1774 1775 1776typedef void (* eval_coef_func)( 1777 struct tgsi_exec_machine *mach, 1778 unsigned attrib, 1779 unsigned chan ); 1780 1781static void 1782exec_declaration( 1783 struct tgsi_exec_machine *mach, 1784 const struct tgsi_full_declaration *decl ) 1785{ 1786 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1787 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1788 unsigned first, last, mask; 1789 eval_coef_func eval; 1790 1791 first = decl->DeclarationRange.First; 1792 last = decl->DeclarationRange.Last; 1793 mask = decl->Declaration.UsageMask; 1794 1795 switch( decl->Declaration.Interpolate ) { 1796 case TGSI_INTERPOLATE_CONSTANT: 1797 eval = eval_constant_coef; 1798 break; 1799 1800 case TGSI_INTERPOLATE_LINEAR: 1801 eval = eval_linear_coef; 1802 break; 1803 1804 case TGSI_INTERPOLATE_PERSPECTIVE: 1805 eval = eval_perspective_coef; 1806 break; 1807 1808 default: 1809 eval = NULL; 1810 assert( 0 ); 1811 } 1812 1813 if( mask == TGSI_WRITEMASK_XYZW ) { 1814 unsigned i, j; 1815 1816 for( i = first; i <= last; i++ ) { 1817 for( j = 0; j < NUM_CHANNELS; j++ ) { 1818 eval( mach, i, j ); 1819 } 1820 } 1821 } 1822 else { 1823 unsigned i, j; 1824 1825 for( j = 0; j < NUM_CHANNELS; j++ ) { 1826 if( mask & (1 << j) ) { 1827 for( i = first; i <= last; i++ ) { 1828 eval( mach, i, j ); 1829 } 1830 } 1831 } 1832 } 1833 } 1834 } 1835} 1836 1837static void 1838exec_instruction( 1839 struct tgsi_exec_machine *mach, 1840 const struct tgsi_full_instruction *inst, 1841 int *pc ) 1842{ 1843 uint chan_index; 1844 union tgsi_exec_channel r[10]; 1845 1846 (*pc)++; 1847 1848 switch (inst->Instruction.Opcode) { 1849 case TGSI_OPCODE_ARL: 1850 /* TGSI_OPCODE_FLOOR */ 1851 /* TGSI_OPCODE_FLR */ 1852 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1853 FETCH( &r[0], 0, chan_index ); 1854 micro_flr( &r[0], &r[0] ); 1855 STORE( &r[0], 0, chan_index ); 1856 } 1857 break; 1858 1859 case TGSI_OPCODE_MOV: 1860 case TGSI_OPCODE_SWZ: 1861 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1862 FETCH( &r[0], 0, chan_index ); 1863 STORE( &r[0], 0, chan_index ); 1864 } 1865 break; 1866 1867 case TGSI_OPCODE_LIT: 1868 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1869 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1870 } 1871 1872 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1873 FETCH( &r[0], 0, CHAN_X ); 1874 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1875 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1876 STORE( &r[0], 0, CHAN_Y ); 1877 } 1878 1879 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1880 FETCH( &r[1], 0, CHAN_Y ); 1881 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1882 1883 FETCH( &r[2], 0, CHAN_W ); 1884 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1885 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1886 micro_pow( &r[1], &r[1], &r[2] ); 1887 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1888 STORE( &r[0], 0, CHAN_Z ); 1889 } 1890 } 1891 1892 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1893 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1894 } 1895 break; 1896 1897 case TGSI_OPCODE_RCP: 1898 /* TGSI_OPCODE_RECIP */ 1899 FETCH( &r[0], 0, CHAN_X ); 1900 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1901 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1902 STORE( &r[0], 0, chan_index ); 1903 } 1904 break; 1905 1906 case TGSI_OPCODE_RSQ: 1907 /* TGSI_OPCODE_RECIPSQRT */ 1908 FETCH( &r[0], 0, CHAN_X ); 1909 micro_abs( &r[0], &r[0] ); 1910 micro_sqrt( &r[0], &r[0] ); 1911 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1912 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1913 STORE( &r[0], 0, chan_index ); 1914 } 1915 break; 1916 1917 case TGSI_OPCODE_EXP: 1918 FETCH( &r[0], 0, CHAN_X ); 1919 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1920 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1921 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1922 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1923 } 1924 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1925 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1926 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1927 } 1928 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1929 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1930 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1931 } 1932 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1933 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1934 } 1935 break; 1936 1937 case TGSI_OPCODE_LOG: 1938 FETCH( &r[0], 0, CHAN_X ); 1939 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1940 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1941 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1942 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1943 STORE( &r[0], 0, CHAN_X ); 1944 } 1945 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1946 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1947 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 1948 STORE( &r[0], 0, CHAN_Y ); 1949 } 1950 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1951 STORE( &r[1], 0, CHAN_Z ); 1952 } 1953 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1954 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1955 } 1956 break; 1957 1958 case TGSI_OPCODE_MUL: 1959 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1960 { 1961 FETCH(&r[0], 0, chan_index); 1962 FETCH(&r[1], 1, chan_index); 1963 1964 micro_mul( &r[0], &r[0], &r[1] ); 1965 1966 STORE(&r[0], 0, chan_index); 1967 } 1968 break; 1969 1970 case TGSI_OPCODE_ADD: 1971 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1972 FETCH( &r[0], 0, chan_index ); 1973 FETCH( &r[1], 1, chan_index ); 1974 micro_add( &r[0], &r[0], &r[1] ); 1975 STORE( &r[0], 0, chan_index ); 1976 } 1977 break; 1978 1979 case TGSI_OPCODE_DP3: 1980 /* TGSI_OPCODE_DOT3 */ 1981 FETCH( &r[0], 0, CHAN_X ); 1982 FETCH( &r[1], 1, CHAN_X ); 1983 micro_mul( &r[0], &r[0], &r[1] ); 1984 1985 FETCH( &r[1], 0, CHAN_Y ); 1986 FETCH( &r[2], 1, CHAN_Y ); 1987 micro_mul( &r[1], &r[1], &r[2] ); 1988 micro_add( &r[0], &r[0], &r[1] ); 1989 1990 FETCH( &r[1], 0, CHAN_Z ); 1991 FETCH( &r[2], 1, CHAN_Z ); 1992 micro_mul( &r[1], &r[1], &r[2] ); 1993 micro_add( &r[0], &r[0], &r[1] ); 1994 1995 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1996 STORE( &r[0], 0, chan_index ); 1997 } 1998 break; 1999 2000 case TGSI_OPCODE_DP4: 2001 /* TGSI_OPCODE_DOT4 */ 2002 FETCH(&r[0], 0, CHAN_X); 2003 FETCH(&r[1], 1, CHAN_X); 2004 2005 micro_mul( &r[0], &r[0], &r[1] ); 2006 2007 FETCH(&r[1], 0, CHAN_Y); 2008 FETCH(&r[2], 1, CHAN_Y); 2009 2010 micro_mul( &r[1], &r[1], &r[2] ); 2011 micro_add( &r[0], &r[0], &r[1] ); 2012 2013 FETCH(&r[1], 0, CHAN_Z); 2014 FETCH(&r[2], 1, CHAN_Z); 2015 2016 micro_mul( &r[1], &r[1], &r[2] ); 2017 micro_add( &r[0], &r[0], &r[1] ); 2018 2019 FETCH(&r[1], 0, CHAN_W); 2020 FETCH(&r[2], 1, CHAN_W); 2021 2022 micro_mul( &r[1], &r[1], &r[2] ); 2023 micro_add( &r[0], &r[0], &r[1] ); 2024 2025 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2026 STORE( &r[0], 0, chan_index ); 2027 } 2028 break; 2029 2030 case TGSI_OPCODE_DST: 2031 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2032 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2033 } 2034 2035 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2036 FETCH( &r[0], 0, CHAN_Y ); 2037 FETCH( &r[1], 1, CHAN_Y); 2038 micro_mul( &r[0], &r[0], &r[1] ); 2039 STORE( &r[0], 0, CHAN_Y ); 2040 } 2041 2042 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2043 FETCH( &r[0], 0, CHAN_Z ); 2044 STORE( &r[0], 0, CHAN_Z ); 2045 } 2046 2047 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2048 FETCH( &r[0], 1, CHAN_W ); 2049 STORE( &r[0], 0, CHAN_W ); 2050 } 2051 break; 2052 2053 case TGSI_OPCODE_MIN: 2054 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2055 FETCH(&r[0], 0, chan_index); 2056 FETCH(&r[1], 1, chan_index); 2057 2058 /* XXX use micro_min()?? */ 2059 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2060 2061 STORE(&r[0], 0, chan_index); 2062 } 2063 break; 2064 2065 case TGSI_OPCODE_MAX: 2066 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2067 FETCH(&r[0], 0, chan_index); 2068 FETCH(&r[1], 1, chan_index); 2069 2070 /* XXX use micro_max()?? */ 2071 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2072 2073 STORE(&r[0], 0, chan_index ); 2074 } 2075 break; 2076 2077 case TGSI_OPCODE_SLT: 2078 /* TGSI_OPCODE_SETLT */ 2079 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2080 FETCH( &r[0], 0, chan_index ); 2081 FETCH( &r[1], 1, chan_index ); 2082 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2083 STORE( &r[0], 0, chan_index ); 2084 } 2085 break; 2086 2087 case TGSI_OPCODE_SGE: 2088 /* TGSI_OPCODE_SETGE */ 2089 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2090 FETCH( &r[0], 0, chan_index ); 2091 FETCH( &r[1], 1, chan_index ); 2092 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2093 STORE( &r[0], 0, chan_index ); 2094 } 2095 break; 2096 2097 case TGSI_OPCODE_MAD: 2098 /* TGSI_OPCODE_MADD */ 2099 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2100 FETCH( &r[0], 0, chan_index ); 2101 FETCH( &r[1], 1, chan_index ); 2102 micro_mul( &r[0], &r[0], &r[1] ); 2103 FETCH( &r[1], 2, chan_index ); 2104 micro_add( &r[0], &r[0], &r[1] ); 2105 STORE( &r[0], 0, chan_index ); 2106 } 2107 break; 2108 2109 case TGSI_OPCODE_SUB: 2110 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2111 FETCH(&r[0], 0, chan_index); 2112 FETCH(&r[1], 1, chan_index); 2113 2114 micro_sub( &r[0], &r[0], &r[1] ); 2115 2116 STORE(&r[0], 0, chan_index); 2117 } 2118 break; 2119 2120 case TGSI_OPCODE_LERP: 2121 /* TGSI_OPCODE_LRP */ 2122 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2123 FETCH(&r[0], 0, chan_index); 2124 FETCH(&r[1], 1, chan_index); 2125 FETCH(&r[2], 2, chan_index); 2126 2127 micro_sub( &r[1], &r[1], &r[2] ); 2128 micro_mul( &r[0], &r[0], &r[1] ); 2129 micro_add( &r[0], &r[0], &r[2] ); 2130 2131 STORE(&r[0], 0, chan_index); 2132 } 2133 break; 2134 2135 case TGSI_OPCODE_CND: 2136 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2137 FETCH(&r[0], 0, chan_index); 2138 FETCH(&r[1], 1, chan_index); 2139 FETCH(&r[2], 2, chan_index); 2140 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2141 STORE(&r[0], 0, chan_index); 2142 } 2143 break; 2144 2145 case TGSI_OPCODE_CND0: 2146 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2147 FETCH(&r[0], 0, chan_index); 2148 FETCH(&r[1], 1, chan_index); 2149 FETCH(&r[2], 2, chan_index); 2150 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); 2151 STORE(&r[0], 0, chan_index); 2152 } 2153 break; 2154 2155 case TGSI_OPCODE_DOT2ADD: 2156 /* TGSI_OPCODE_DP2A */ 2157 FETCH( &r[0], 0, CHAN_X ); 2158 FETCH( &r[1], 1, CHAN_X ); 2159 micro_mul( &r[0], &r[0], &r[1] ); 2160 2161 FETCH( &r[1], 0, CHAN_Y ); 2162 FETCH( &r[2], 1, CHAN_Y ); 2163 micro_mul( &r[1], &r[1], &r[2] ); 2164 micro_add( &r[0], &r[0], &r[1] ); 2165 2166 FETCH( &r[2], 2, CHAN_X ); 2167 micro_add( &r[0], &r[0], &r[2] ); 2168 2169 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2170 STORE( &r[0], 0, chan_index ); 2171 } 2172 break; 2173 2174 case TGSI_OPCODE_INDEX: 2175 /* XXX: considered for removal */ 2176 assert (0); 2177 break; 2178 2179 case TGSI_OPCODE_NEGATE: 2180 /* XXX: considered for removal */ 2181 assert (0); 2182 break; 2183 2184 case TGSI_OPCODE_FRAC: 2185 /* TGSI_OPCODE_FRC */ 2186 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2187 FETCH( &r[0], 0, chan_index ); 2188 micro_frc( &r[0], &r[0] ); 2189 STORE( &r[0], 0, chan_index ); 2190 } 2191 break; 2192 2193 case TGSI_OPCODE_CLAMP: 2194 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2195 FETCH(&r[0], 0, chan_index); 2196 FETCH(&r[1], 1, chan_index); 2197 micro_max(&r[0], &r[0], &r[1]); 2198 FETCH(&r[1], 2, chan_index); 2199 micro_min(&r[0], &r[0], &r[1]); 2200 STORE(&r[0], 0, chan_index); 2201 } 2202 break; 2203 2204 case TGSI_OPCODE_ROUND: 2205 case TGSI_OPCODE_ARR: 2206 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2207 FETCH( &r[0], 0, chan_index ); 2208 micro_rnd( &r[0], &r[0] ); 2209 STORE( &r[0], 0, chan_index ); 2210 } 2211 break; 2212 2213 case TGSI_OPCODE_EXPBASE2: 2214 /* TGSI_OPCODE_EX2 */ 2215 FETCH(&r[0], 0, CHAN_X); 2216 2217#if FAST_MATH 2218 micro_exp2( &r[0], &r[0] ); 2219#else 2220 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2221#endif 2222 2223 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2224 STORE( &r[0], 0, chan_index ); 2225 } 2226 break; 2227 2228 case TGSI_OPCODE_LOGBASE2: 2229 /* TGSI_OPCODE_LG2 */ 2230 FETCH( &r[0], 0, CHAN_X ); 2231 micro_lg2( &r[0], &r[0] ); 2232 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2233 STORE( &r[0], 0, chan_index ); 2234 } 2235 break; 2236 2237 case TGSI_OPCODE_POWER: 2238 /* TGSI_OPCODE_POW */ 2239 FETCH(&r[0], 0, CHAN_X); 2240 FETCH(&r[1], 1, CHAN_X); 2241 2242 micro_pow( &r[0], &r[0], &r[1] ); 2243 2244 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2245 STORE( &r[0], 0, chan_index ); 2246 } 2247 break; 2248 2249 case TGSI_OPCODE_CROSSPRODUCT: 2250 /* TGSI_OPCODE_XPD */ 2251 FETCH(&r[0], 0, CHAN_Y); 2252 FETCH(&r[1], 1, CHAN_Z); 2253 2254 micro_mul( &r[2], &r[0], &r[1] ); 2255 2256 FETCH(&r[3], 0, CHAN_Z); 2257 FETCH(&r[4], 1, CHAN_Y); 2258 2259 micro_mul( &r[5], &r[3], &r[4] ); 2260 micro_sub( &r[2], &r[2], &r[5] ); 2261 2262 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2263 STORE( &r[2], 0, CHAN_X ); 2264 } 2265 2266 FETCH(&r[2], 1, CHAN_X); 2267 2268 micro_mul( &r[3], &r[3], &r[2] ); 2269 2270 FETCH(&r[5], 0, CHAN_X); 2271 2272 micro_mul( &r[1], &r[1], &r[5] ); 2273 micro_sub( &r[3], &r[3], &r[1] ); 2274 2275 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2276 STORE( &r[3], 0, CHAN_Y ); 2277 } 2278 2279 micro_mul( &r[5], &r[5], &r[4] ); 2280 micro_mul( &r[0], &r[0], &r[2] ); 2281 micro_sub( &r[5], &r[5], &r[0] ); 2282 2283 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2284 STORE( &r[5], 0, CHAN_Z ); 2285 } 2286 2287 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2288 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2289 } 2290 break; 2291 2292 case TGSI_OPCODE_MULTIPLYMATRIX: 2293 /* XXX: considered for removal */ 2294 assert (0); 2295 break; 2296 2297 case TGSI_OPCODE_ABS: 2298 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2299 FETCH(&r[0], 0, chan_index); 2300 2301 micro_abs( &r[0], &r[0] ); 2302 2303 STORE(&r[0], 0, chan_index); 2304 } 2305 break; 2306 2307 case TGSI_OPCODE_RCC: 2308 FETCH(&r[0], 0, CHAN_X); 2309 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2310 micro_float_clamp(&r[0], &r[0]); 2311 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2312 STORE(&r[0], 0, chan_index); 2313 } 2314 break; 2315 2316 case TGSI_OPCODE_DPH: 2317 FETCH(&r[0], 0, CHAN_X); 2318 FETCH(&r[1], 1, CHAN_X); 2319 2320 micro_mul( &r[0], &r[0], &r[1] ); 2321 2322 FETCH(&r[1], 0, CHAN_Y); 2323 FETCH(&r[2], 1, CHAN_Y); 2324 2325 micro_mul( &r[1], &r[1], &r[2] ); 2326 micro_add( &r[0], &r[0], &r[1] ); 2327 2328 FETCH(&r[1], 0, CHAN_Z); 2329 FETCH(&r[2], 1, CHAN_Z); 2330 2331 micro_mul( &r[1], &r[1], &r[2] ); 2332 micro_add( &r[0], &r[0], &r[1] ); 2333 2334 FETCH(&r[1], 1, CHAN_W); 2335 2336 micro_add( &r[0], &r[0], &r[1] ); 2337 2338 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2339 STORE( &r[0], 0, chan_index ); 2340 } 2341 break; 2342 2343 case TGSI_OPCODE_COS: 2344 FETCH(&r[0], 0, CHAN_X); 2345 2346 micro_cos( &r[0], &r[0] ); 2347 2348 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2349 STORE( &r[0], 0, chan_index ); 2350 } 2351 break; 2352 2353 case TGSI_OPCODE_DDX: 2354 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2355 FETCH( &r[0], 0, chan_index ); 2356 micro_ddx( &r[0], &r[0] ); 2357 STORE( &r[0], 0, chan_index ); 2358 } 2359 break; 2360 2361 case TGSI_OPCODE_DDY: 2362 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2363 FETCH( &r[0], 0, chan_index ); 2364 micro_ddy( &r[0], &r[0] ); 2365 STORE( &r[0], 0, chan_index ); 2366 } 2367 break; 2368 2369 case TGSI_OPCODE_KILP: 2370 exec_kilp (mach, inst); 2371 break; 2372 2373 case TGSI_OPCODE_KIL: 2374 exec_kil (mach, inst); 2375 break; 2376 2377 case TGSI_OPCODE_PK2H: 2378 assert (0); 2379 break; 2380 2381 case TGSI_OPCODE_PK2US: 2382 assert (0); 2383 break; 2384 2385 case TGSI_OPCODE_PK4B: 2386 assert (0); 2387 break; 2388 2389 case TGSI_OPCODE_PK4UB: 2390 assert (0); 2391 break; 2392 2393 case TGSI_OPCODE_RFL: 2394 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2395 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2396 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2397 /* r0 = dp3(src0, src0) */ 2398 FETCH(&r[2], 0, CHAN_X); 2399 micro_mul(&r[0], &r[2], &r[2]); 2400 FETCH(&r[4], 0, CHAN_Y); 2401 micro_mul(&r[8], &r[4], &r[4]); 2402 micro_add(&r[0], &r[0], &r[8]); 2403 FETCH(&r[6], 0, CHAN_Z); 2404 micro_mul(&r[8], &r[6], &r[6]); 2405 micro_add(&r[0], &r[0], &r[8]); 2406 2407 /* r1 = dp3(src0, src1) */ 2408 FETCH(&r[3], 1, CHAN_X); 2409 micro_mul(&r[1], &r[2], &r[3]); 2410 FETCH(&r[5], 1, CHAN_Y); 2411 micro_mul(&r[8], &r[4], &r[5]); 2412 micro_add(&r[1], &r[1], &r[8]); 2413 FETCH(&r[7], 1, CHAN_Z); 2414 micro_mul(&r[8], &r[6], &r[7]); 2415 micro_add(&r[1], &r[1], &r[8]); 2416 2417 /* r1 = 2 * r1 / r0 */ 2418 micro_add(&r[1], &r[1], &r[1]); 2419 micro_div(&r[1], &r[1], &r[0]); 2420 2421 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2422 micro_mul(&r[2], &r[2], &r[1]); 2423 micro_sub(&r[2], &r[2], &r[3]); 2424 STORE(&r[2], 0, CHAN_X); 2425 } 2426 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2427 micro_mul(&r[4], &r[4], &r[1]); 2428 micro_sub(&r[4], &r[4], &r[5]); 2429 STORE(&r[4], 0, CHAN_Y); 2430 } 2431 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2432 micro_mul(&r[6], &r[6], &r[1]); 2433 micro_sub(&r[6], &r[6], &r[7]); 2434 STORE(&r[6], 0, CHAN_Z); 2435 } 2436 } 2437 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2438 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2439 } 2440 break; 2441 2442 case TGSI_OPCODE_SEQ: 2443 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2444 FETCH( &r[0], 0, chan_index ); 2445 FETCH( &r[1], 1, chan_index ); 2446 micro_eq( &r[0], &r[0], &r[1], 2447 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2448 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2449 STORE( &r[0], 0, chan_index ); 2450 } 2451 break; 2452 2453 case TGSI_OPCODE_SFL: 2454 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2455 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2456 } 2457 break; 2458 2459 case TGSI_OPCODE_SGT: 2460 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2461 FETCH( &r[0], 0, chan_index ); 2462 FETCH( &r[1], 1, chan_index ); 2463 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2464 STORE( &r[0], 0, chan_index ); 2465 } 2466 break; 2467 2468 case TGSI_OPCODE_SIN: 2469 FETCH( &r[0], 0, CHAN_X ); 2470 micro_sin( &r[0], &r[0] ); 2471 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2472 STORE( &r[0], 0, chan_index ); 2473 } 2474 break; 2475 2476 case TGSI_OPCODE_SLE: 2477 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2478 FETCH( &r[0], 0, chan_index ); 2479 FETCH( &r[1], 1, chan_index ); 2480 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2481 STORE( &r[0], 0, chan_index ); 2482 } 2483 break; 2484 2485 case TGSI_OPCODE_SNE: 2486 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2487 FETCH( &r[0], 0, chan_index ); 2488 FETCH( &r[1], 1, chan_index ); 2489 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2490 STORE( &r[0], 0, chan_index ); 2491 } 2492 break; 2493 2494 case TGSI_OPCODE_STR: 2495 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2496 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2497 } 2498 break; 2499 2500 case TGSI_OPCODE_TEX: 2501 /* simple texture lookup */ 2502 /* src[0] = texcoord */ 2503 /* src[1] = sampler unit */ 2504 exec_tex(mach, inst, FALSE, FALSE); 2505 break; 2506 2507 case TGSI_OPCODE_TXB: 2508 /* Texture lookup with lod bias */ 2509 /* src[0] = texcoord (src[0].w = LOD bias) */ 2510 /* src[1] = sampler unit */ 2511 exec_tex(mach, inst, TRUE, FALSE); 2512 break; 2513 2514 case TGSI_OPCODE_TXD: 2515 /* Texture lookup with explict partial derivatives */ 2516 /* src[0] = texcoord */ 2517 /* src[1] = d[strq]/dx */ 2518 /* src[2] = d[strq]/dy */ 2519 /* src[3] = sampler unit */ 2520 assert (0); 2521 break; 2522 2523 case TGSI_OPCODE_TXL: 2524 /* Texture lookup with explit LOD */ 2525 /* src[0] = texcoord (src[0].w = LOD) */ 2526 /* src[1] = sampler unit */ 2527 exec_tex(mach, inst, TRUE, FALSE); 2528 break; 2529 2530 case TGSI_OPCODE_TXP: 2531 /* Texture lookup with projection */ 2532 /* src[0] = texcoord (src[0].w = projection) */ 2533 /* src[1] = sampler unit */ 2534 exec_tex(mach, inst, FALSE, TRUE); 2535 break; 2536 2537 case TGSI_OPCODE_UP2H: 2538 assert (0); 2539 break; 2540 2541 case TGSI_OPCODE_UP2US: 2542 assert (0); 2543 break; 2544 2545 case TGSI_OPCODE_UP4B: 2546 assert (0); 2547 break; 2548 2549 case TGSI_OPCODE_UP4UB: 2550 assert (0); 2551 break; 2552 2553 case TGSI_OPCODE_X2D: 2554 FETCH(&r[0], 1, CHAN_X); 2555 FETCH(&r[1], 1, CHAN_Y); 2556 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2557 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2558 FETCH(&r[2], 2, CHAN_X); 2559 micro_mul(&r[2], &r[2], &r[0]); 2560 FETCH(&r[3], 2, CHAN_Y); 2561 micro_mul(&r[3], &r[3], &r[1]); 2562 micro_add(&r[2], &r[2], &r[3]); 2563 FETCH(&r[3], 0, CHAN_X); 2564 micro_add(&r[2], &r[2], &r[3]); 2565 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2566 STORE(&r[2], 0, CHAN_X); 2567 } 2568 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2569 STORE(&r[2], 0, CHAN_Z); 2570 } 2571 } 2572 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2573 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2574 FETCH(&r[2], 2, CHAN_Z); 2575 micro_mul(&r[2], &r[2], &r[0]); 2576 FETCH(&r[3], 2, CHAN_W); 2577 micro_mul(&r[3], &r[3], &r[1]); 2578 micro_add(&r[2], &r[2], &r[3]); 2579 FETCH(&r[3], 0, CHAN_Y); 2580 micro_add(&r[2], &r[2], &r[3]); 2581 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2582 STORE(&r[2], 0, CHAN_Y); 2583 } 2584 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2585 STORE(&r[2], 0, CHAN_W); 2586 } 2587 } 2588 break; 2589 2590 case TGSI_OPCODE_ARA: 2591 assert (0); 2592 break; 2593 2594 case TGSI_OPCODE_BRA: 2595 assert (0); 2596 break; 2597 2598 case TGSI_OPCODE_CAL: 2599 /* skip the call if no execution channels are enabled */ 2600 if (mach->ExecMask) { 2601 /* do the call */ 2602 2603 /* push the Cond, Loop, Cont stacks */ 2604 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2605 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2606 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2607 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2608 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2609 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2610 2611 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2612 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2613 2614 /* note that PC was already incremented above */ 2615 mach->CallStack[mach->CallStackTop++] = *pc; 2616 *pc = inst->InstructionExtLabel.Label; 2617 } 2618 break; 2619 2620 case TGSI_OPCODE_RET: 2621 mach->FuncMask &= ~mach->ExecMask; 2622 UPDATE_EXEC_MASK(mach); 2623 2624 if (mach->FuncMask == 0x0) { 2625 /* really return now (otherwise, keep executing */ 2626 2627 if (mach->CallStackTop == 0) { 2628 /* returning from main() */ 2629 *pc = -1; 2630 return; 2631 } 2632 *pc = mach->CallStack[--mach->CallStackTop]; 2633 2634 /* pop the Cond, Loop, Cont stacks */ 2635 assert(mach->CondStackTop > 0); 2636 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2637 assert(mach->LoopStackTop > 0); 2638 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2639 assert(mach->ContStackTop > 0); 2640 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2641 assert(mach->FuncStackTop > 0); 2642 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2643 2644 UPDATE_EXEC_MASK(mach); 2645 } 2646 break; 2647 2648 case TGSI_OPCODE_SSG: 2649 /* TGSI_OPCODE_SGN */ 2650 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2651 FETCH( &r[0], 0, chan_index ); 2652 micro_sgn( &r[0], &r[0] ); 2653 STORE( &r[0], 0, chan_index ); 2654 } 2655 break; 2656 2657 case TGSI_OPCODE_CMP: 2658 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2659 FETCH(&r[0], 0, chan_index); 2660 FETCH(&r[1], 1, chan_index); 2661 FETCH(&r[2], 2, chan_index); 2662 2663 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2664 2665 STORE(&r[0], 0, chan_index); 2666 } 2667 break; 2668 2669 case TGSI_OPCODE_SCS: 2670 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2671 FETCH( &r[0], 0, CHAN_X ); 2672 } 2673 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { 2674 micro_cos( &r[1], &r[0] ); 2675 STORE( &r[1], 0, CHAN_X ); 2676 } 2677 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2678 micro_sin( &r[1], &r[0] ); 2679 STORE( &r[1], 0, CHAN_Y ); 2680 } 2681 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2682 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2683 } 2684 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2685 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2686 } 2687 break; 2688 2689 case TGSI_OPCODE_NRM: 2690 /* 3-component vector normalize */ 2691 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2692 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2693 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2694 /* r3 = sqrt(dp3(src0, src0)) */ 2695 FETCH(&r[0], 0, CHAN_X); 2696 micro_mul(&r[3], &r[0], &r[0]); 2697 FETCH(&r[1], 0, CHAN_Y); 2698 micro_mul(&r[4], &r[1], &r[1]); 2699 micro_add(&r[3], &r[3], &r[4]); 2700 FETCH(&r[2], 0, CHAN_Z); 2701 micro_mul(&r[4], &r[2], &r[2]); 2702 micro_add(&r[3], &r[3], &r[4]); 2703 micro_sqrt(&r[3], &r[3]); 2704 2705 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2706 micro_div(&r[0], &r[0], &r[3]); 2707 STORE(&r[0], 0, CHAN_X); 2708 } 2709 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2710 micro_div(&r[1], &r[1], &r[3]); 2711 STORE(&r[1], 0, CHAN_Y); 2712 } 2713 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2714 micro_div(&r[2], &r[2], &r[3]); 2715 STORE(&r[2], 0, CHAN_Z); 2716 } 2717 } 2718 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2719 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2720 } 2721 break; 2722 2723 case TGSI_OPCODE_NRM4: 2724 /* 4-component vector normalize */ 2725 { 2726 union tgsi_exec_channel tmp, dot; 2727 2728 /* tmp = dp4(src0, src0): */ 2729 FETCH( &r[0], 0, CHAN_X ); 2730 micro_mul( &tmp, &r[0], &r[0] ); 2731 2732 FETCH( &r[1], 0, CHAN_Y ); 2733 micro_mul( &dot, &r[1], &r[1] ); 2734 micro_add( &tmp, &tmp, &dot ); 2735 2736 FETCH( &r[2], 0, CHAN_Z ); 2737 micro_mul( &dot, &r[2], &r[2] ); 2738 micro_add( &tmp, &tmp, &dot ); 2739 2740 FETCH( &r[3], 0, CHAN_W ); 2741 micro_mul( &dot, &r[3], &r[3] ); 2742 micro_add( &tmp, &tmp, &dot ); 2743 2744 /* tmp = 1 / sqrt(tmp) */ 2745 micro_sqrt( &tmp, &tmp ); 2746 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2747 2748 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2749 /* chan = chan * tmp */ 2750 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2751 STORE( &r[chan_index], 0, chan_index ); 2752 } 2753 } 2754 break; 2755 2756 case TGSI_OPCODE_DIV: 2757 assert( 0 ); 2758 break; 2759 2760 case TGSI_OPCODE_DP2: 2761 FETCH( &r[0], 0, CHAN_X ); 2762 FETCH( &r[1], 1, CHAN_X ); 2763 micro_mul( &r[0], &r[0], &r[1] ); 2764 2765 FETCH( &r[1], 0, CHAN_Y ); 2766 FETCH( &r[2], 1, CHAN_Y ); 2767 micro_mul( &r[1], &r[1], &r[2] ); 2768 micro_add( &r[0], &r[0], &r[1] ); 2769 2770 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2771 STORE( &r[0], 0, chan_index ); 2772 } 2773 break; 2774 2775 case TGSI_OPCODE_IF: 2776 /* push CondMask */ 2777 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2778 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2779 FETCH( &r[0], 0, CHAN_X ); 2780 /* update CondMask */ 2781 if( ! r[0].u[0] ) { 2782 mach->CondMask &= ~0x1; 2783 } 2784 if( ! r[0].u[1] ) { 2785 mach->CondMask &= ~0x2; 2786 } 2787 if( ! r[0].u[2] ) { 2788 mach->CondMask &= ~0x4; 2789 } 2790 if( ! r[0].u[3] ) { 2791 mach->CondMask &= ~0x8; 2792 } 2793 UPDATE_EXEC_MASK(mach); 2794 /* Todo: If CondMask==0, jump to ELSE */ 2795 break; 2796 2797 case TGSI_OPCODE_ELSE: 2798 /* invert CondMask wrt previous mask */ 2799 { 2800 uint prevMask; 2801 assert(mach->CondStackTop > 0); 2802 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2803 mach->CondMask = ~mach->CondMask & prevMask; 2804 UPDATE_EXEC_MASK(mach); 2805 /* Todo: If CondMask==0, jump to ENDIF */ 2806 } 2807 break; 2808 2809 case TGSI_OPCODE_ENDIF: 2810 /* pop CondMask */ 2811 assert(mach->CondStackTop > 0); 2812 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2813 UPDATE_EXEC_MASK(mach); 2814 break; 2815 2816 case TGSI_OPCODE_END: 2817 /* halt execution */ 2818 *pc = -1; 2819 break; 2820 2821 case TGSI_OPCODE_REP: 2822 assert (0); 2823 break; 2824 2825 case TGSI_OPCODE_ENDREP: 2826 assert (0); 2827 break; 2828 2829 case TGSI_OPCODE_PUSHA: 2830 assert (0); 2831 break; 2832 2833 case TGSI_OPCODE_POPA: 2834 assert (0); 2835 break; 2836 2837 case TGSI_OPCODE_CEIL: 2838 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2839 FETCH( &r[0], 0, chan_index ); 2840 micro_ceil( &r[0], &r[0] ); 2841 STORE( &r[0], 0, chan_index ); 2842 } 2843 break; 2844 2845 case TGSI_OPCODE_I2F: 2846 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2847 FETCH( &r[0], 0, chan_index ); 2848 micro_i2f( &r[0], &r[0] ); 2849 STORE( &r[0], 0, chan_index ); 2850 } 2851 break; 2852 2853 case TGSI_OPCODE_NOT: 2854 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2855 FETCH( &r[0], 0, chan_index ); 2856 micro_not( &r[0], &r[0] ); 2857 STORE( &r[0], 0, chan_index ); 2858 } 2859 break; 2860 2861 case TGSI_OPCODE_TRUNC: 2862 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2863 FETCH( &r[0], 0, chan_index ); 2864 micro_trunc( &r[0], &r[0] ); 2865 STORE( &r[0], 0, chan_index ); 2866 } 2867 break; 2868 2869 case TGSI_OPCODE_SHL: 2870 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2871 FETCH( &r[0], 0, chan_index ); 2872 FETCH( &r[1], 1, chan_index ); 2873 micro_shl( &r[0], &r[0], &r[1] ); 2874 STORE( &r[0], 0, chan_index ); 2875 } 2876 break; 2877 2878 case TGSI_OPCODE_SHR: 2879 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2880 FETCH( &r[0], 0, chan_index ); 2881 FETCH( &r[1], 1, chan_index ); 2882 micro_ishr( &r[0], &r[0], &r[1] ); 2883 STORE( &r[0], 0, chan_index ); 2884 } 2885 break; 2886 2887 case TGSI_OPCODE_AND: 2888 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2889 FETCH( &r[0], 0, chan_index ); 2890 FETCH( &r[1], 1, chan_index ); 2891 micro_and( &r[0], &r[0], &r[1] ); 2892 STORE( &r[0], 0, chan_index ); 2893 } 2894 break; 2895 2896 case TGSI_OPCODE_OR: 2897 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2898 FETCH( &r[0], 0, chan_index ); 2899 FETCH( &r[1], 1, chan_index ); 2900 micro_or( &r[0], &r[0], &r[1] ); 2901 STORE( &r[0], 0, chan_index ); 2902 } 2903 break; 2904 2905 case TGSI_OPCODE_MOD: 2906 assert (0); 2907 break; 2908 2909 case TGSI_OPCODE_XOR: 2910 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2911 FETCH( &r[0], 0, chan_index ); 2912 FETCH( &r[1], 1, chan_index ); 2913 micro_xor( &r[0], &r[0], &r[1] ); 2914 STORE( &r[0], 0, chan_index ); 2915 } 2916 break; 2917 2918 case TGSI_OPCODE_SAD: 2919 assert (0); 2920 break; 2921 2922 case TGSI_OPCODE_TXF: 2923 assert (0); 2924 break; 2925 2926 case TGSI_OPCODE_TXQ: 2927 assert (0); 2928 break; 2929 2930 case TGSI_OPCODE_EMIT: 2931 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 2932 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2933 break; 2934 2935 case TGSI_OPCODE_ENDPRIM: 2936 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 2937 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 2938 break; 2939 2940 case TGSI_OPCODE_LOOP: 2941 /* fall-through (for now) */ 2942 case TGSI_OPCODE_BGNLOOP2: 2943 /* push LoopMask and ContMasks */ 2944 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2945 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2946 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2947 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2948 break; 2949 2950 case TGSI_OPCODE_ENDLOOP: 2951 /* fall-through (for now at least) */ 2952 case TGSI_OPCODE_ENDLOOP2: 2953 /* Restore ContMask, but don't pop */ 2954 assert(mach->ContStackTop > 0); 2955 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 2956 UPDATE_EXEC_MASK(mach); 2957 if (mach->ExecMask) { 2958 /* repeat loop: jump to instruction just past BGNLOOP */ 2959 *pc = inst->InstructionExtLabel.Label + 1; 2960 } 2961 else { 2962 /* exit loop: pop LoopMask */ 2963 assert(mach->LoopStackTop > 0); 2964 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2965 /* pop ContMask */ 2966 assert(mach->ContStackTop > 0); 2967 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2968 } 2969 UPDATE_EXEC_MASK(mach); 2970 break; 2971 2972 case TGSI_OPCODE_BRK: 2973 /* turn off loop channels for each enabled exec channel */ 2974 mach->LoopMask &= ~mach->ExecMask; 2975 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2976 UPDATE_EXEC_MASK(mach); 2977 break; 2978 2979 case TGSI_OPCODE_CONT: 2980 /* turn off cont channels for each enabled exec channel */ 2981 mach->ContMask &= ~mach->ExecMask; 2982 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2983 UPDATE_EXEC_MASK(mach); 2984 break; 2985 2986 case TGSI_OPCODE_BGNSUB: 2987 /* no-op */ 2988 break; 2989 2990 case TGSI_OPCODE_ENDSUB: 2991 /* no-op */ 2992 break; 2993 2994 case TGSI_OPCODE_NOISE1: 2995 assert( 0 ); 2996 break; 2997 2998 case TGSI_OPCODE_NOISE2: 2999 assert( 0 ); 3000 break; 3001 3002 case TGSI_OPCODE_NOISE3: 3003 assert( 0 ); 3004 break; 3005 3006 case TGSI_OPCODE_NOISE4: 3007 assert( 0 ); 3008 break; 3009 3010 case TGSI_OPCODE_NOP: 3011 break; 3012 3013 default: 3014 assert( 0 ); 3015 } 3016} 3017 3018 3019/** 3020 * Run TGSI interpreter. 3021 * \return bitmask of "alive" quad components 3022 */ 3023uint 3024tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3025{ 3026 uint i; 3027 int pc = 0; 3028 3029 mach->CondMask = 0xf; 3030 mach->LoopMask = 0xf; 3031 mach->ContMask = 0xf; 3032 mach->FuncMask = 0xf; 3033 mach->ExecMask = 0xf; 3034 3035 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 3036 assert(mach->CondStackTop == 0); 3037 assert(mach->LoopStackTop == 0); 3038 assert(mach->ContStackTop == 0); 3039 assert(mach->CallStackTop == 0); 3040 3041 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3042 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3043 3044 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3045 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3046 mach->Primitives[0] = 0; 3047 } 3048 3049 for (i = 0; i < QUAD_SIZE; i++) { 3050 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3051 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3052 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3053 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3054 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3055 } 3056 3057 /* execute declarations (interpolants) */ 3058 for (i = 0; i < mach->NumDeclarations; i++) { 3059 exec_declaration( mach, mach->Declarations+i ); 3060 } 3061 3062 /* execute instructions, until pc is set to -1 */ 3063 while (pc != -1) { 3064 assert(pc < (int) mach->NumInstructions); 3065 exec_instruction( mach, mach->Instructions + pc, &pc ); 3066 } 3067 3068#if 0 3069 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3070 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3071 /* 3072 * Scale back depth component. 3073 */ 3074 for (i = 0; i < 4; i++) 3075 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3076 } 3077#endif 3078 3079 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3080} 3081