tgsi_exec.c revision 50f78fcc2e3da24fa6dc076f0985355b3f64e9fd
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_parse.h" 57#include "tgsi/tgsi_util.h" 58#include "tgsi_exec.h" 59#include "util/u_memory.h" 60#include "util/u_math.h" 61 62#define FAST_MATH 1 63 64#define TILE_TOP_LEFT 0 65#define TILE_TOP_RIGHT 1 66#define TILE_BOTTOM_LEFT 2 67#define TILE_BOTTOM_RIGHT 3 68 69#define CHAN_X 0 70#define CHAN_Y 1 71#define CHAN_Z 2 72#define CHAN_W 3 73 74/* 75 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 76 */ 77#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 78#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 79#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 80#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 81#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 82#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 83#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 84#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 85#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 86#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 87#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 88#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 89#define TEMP_128_I TGSI_EXEC_TEMP_128_I 90#define TEMP_128_C TGSI_EXEC_TEMP_128_C 91#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 92#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 93#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 94#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 95#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 96#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 97#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 98#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 99#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 100#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 101#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 102#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 103#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 104#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 105#define TEMP_R0 TGSI_EXEC_TEMP_R0 106 107#define IS_CHANNEL_ENABLED(INST, CHAN)\ 108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 109 110#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 112 113#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 115 if (IS_CHANNEL_ENABLED( INST, CHAN )) 116 117#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 119 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 120 121 122/** The execution mask depends on the conditional mask and the loop mask */ 123#define UPDATE_EXEC_MASK(MACH) \ 124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 125 126/** 127 * Initialize machine state by expanding tokens to full instructions, 128 * allocating temporary storage, setting up constants, etc. 129 * After this, we can call tgsi_exec_machine_run() many times. 130 */ 131void 132tgsi_exec_machine_bind_shader( 133 struct tgsi_exec_machine *mach, 134 const struct tgsi_token *tokens, 135 uint numSamplers, 136 struct tgsi_sampler *samplers) 137{ 138 uint k; 139 struct tgsi_parse_context parse; 140 struct tgsi_exec_labels *labels = &mach->Labels; 141 struct tgsi_full_instruction *instructions; 142 struct tgsi_full_declaration *declarations; 143 uint maxInstructions = 10, numInstructions = 0; 144 uint maxDeclarations = 10, numDeclarations = 0; 145 uint instno = 0; 146 147#if 0 148 tgsi_dump(tokens, 0); 149#endif 150 151 util_init_math(); 152 153 mach->Tokens = tokens; 154 mach->Samplers = samplers; 155 156 k = tgsi_parse_init (&parse, mach->Tokens); 157 if (k != TGSI_PARSE_OK) { 158 debug_printf( "Problem parsing!\n" ); 159 return; 160 } 161 162 mach->Processor = parse.FullHeader.Processor.Processor; 163 mach->ImmLimit = 0; 164 labels->count = 0; 165 166 declarations = (struct tgsi_full_declaration *) 167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 168 169 if (!declarations) { 170 return; 171 } 172 173 instructions = (struct tgsi_full_instruction *) 174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 175 176 if (!instructions) { 177 FREE( declarations ); 178 return; 179 } 180 181 while( !tgsi_parse_end_of_tokens( &parse ) ) { 182 uint pointer = parse.Position; 183 uint i; 184 185 tgsi_parse_token( &parse ); 186 switch( parse.FullToken.Token.Type ) { 187 case TGSI_TOKEN_TYPE_DECLARATION: 188 /* save expanded declaration */ 189 if (numDeclarations == maxDeclarations) { 190 declarations = REALLOC(declarations, 191 maxDeclarations 192 * sizeof(struct tgsi_full_declaration), 193 (maxDeclarations + 10) 194 * sizeof(struct tgsi_full_declaration)); 195 maxDeclarations += 10; 196 } 197 memcpy(declarations + numDeclarations, 198 &parse.FullToken.FullDeclaration, 199 sizeof(declarations[0])); 200 numDeclarations++; 201 break; 202 203 case TGSI_TOKEN_TYPE_IMMEDIATE: 204 { 205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; 206 assert( size % 4 == 0 ); 207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); 208 209 for( i = 0; i < size; i++ ) { 210 mach->Imms[mach->ImmLimit + i / 4][i % 4] = 211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 212 } 213 mach->ImmLimit += size / 4; 214 } 215 break; 216 217 case TGSI_TOKEN_TYPE_INSTRUCTION: 218 assert( labels->count < MAX_LABELS ); 219 220 labels->labels[labels->count][0] = instno; 221 labels->labels[labels->count][1] = pointer; 222 labels->count++; 223 224 /* save expanded instruction */ 225 if (numInstructions == maxInstructions) { 226 instructions = REALLOC(instructions, 227 maxInstructions 228 * sizeof(struct tgsi_full_instruction), 229 (maxInstructions + 10) 230 * sizeof(struct tgsi_full_instruction)); 231 maxInstructions += 10; 232 } 233 memcpy(instructions + numInstructions, 234 &parse.FullToken.FullInstruction, 235 sizeof(instructions[0])); 236 numInstructions++; 237 break; 238 239 default: 240 assert( 0 ); 241 } 242 } 243 tgsi_parse_free (&parse); 244 245 if (mach->Declarations) { 246 FREE( mach->Declarations ); 247 } 248 mach->Declarations = declarations; 249 mach->NumDeclarations = numDeclarations; 250 251 if (mach->Instructions) { 252 FREE( mach->Instructions ); 253 } 254 mach->Instructions = instructions; 255 mach->NumInstructions = numInstructions; 256} 257 258 259void 260tgsi_exec_machine_init( 261 struct tgsi_exec_machine *mach ) 262{ 263 uint i; 264 265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); 266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 267 268 /* Setup constants. */ 269 for( i = 0; i < 4; i++ ) { 270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 280 } 281} 282 283 284void 285tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) 286{ 287 if (mach->Instructions) { 288 FREE(mach->Instructions); 289 mach->Instructions = NULL; 290 mach->NumInstructions = 0; 291 } 292 if (mach->Declarations) { 293 FREE(mach->Declarations); 294 mach->Declarations = NULL; 295 mach->NumDeclarations = 0; 296 } 297} 298 299 300static void 301micro_abs( 302 union tgsi_exec_channel *dst, 303 const union tgsi_exec_channel *src ) 304{ 305 dst->f[0] = fabsf( src->f[0] ); 306 dst->f[1] = fabsf( src->f[1] ); 307 dst->f[2] = fabsf( src->f[2] ); 308 dst->f[3] = fabsf( src->f[3] ); 309} 310 311static void 312micro_add( 313 union tgsi_exec_channel *dst, 314 const union tgsi_exec_channel *src0, 315 const union tgsi_exec_channel *src1 ) 316{ 317 dst->f[0] = src0->f[0] + src1->f[0]; 318 dst->f[1] = src0->f[1] + src1->f[1]; 319 dst->f[2] = src0->f[2] + src1->f[2]; 320 dst->f[3] = src0->f[3] + src1->f[3]; 321} 322 323static void 324micro_iadd( 325 union tgsi_exec_channel *dst, 326 const union tgsi_exec_channel *src0, 327 const union tgsi_exec_channel *src1 ) 328{ 329 dst->i[0] = src0->i[0] + src1->i[0]; 330 dst->i[1] = src0->i[1] + src1->i[1]; 331 dst->i[2] = src0->i[2] + src1->i[2]; 332 dst->i[3] = src0->i[3] + src1->i[3]; 333} 334 335static void 336micro_and( 337 union tgsi_exec_channel *dst, 338 const union tgsi_exec_channel *src0, 339 const union tgsi_exec_channel *src1 ) 340{ 341 dst->u[0] = src0->u[0] & src1->u[0]; 342 dst->u[1] = src0->u[1] & src1->u[1]; 343 dst->u[2] = src0->u[2] & src1->u[2]; 344 dst->u[3] = src0->u[3] & src1->u[3]; 345} 346 347static void 348micro_ceil( 349 union tgsi_exec_channel *dst, 350 const union tgsi_exec_channel *src ) 351{ 352 dst->f[0] = ceilf( src->f[0] ); 353 dst->f[1] = ceilf( src->f[1] ); 354 dst->f[2] = ceilf( src->f[2] ); 355 dst->f[3] = ceilf( src->f[3] ); 356} 357 358static void 359micro_cos( 360 union tgsi_exec_channel *dst, 361 const union tgsi_exec_channel *src ) 362{ 363 dst->f[0] = cosf( src->f[0] ); 364 dst->f[1] = cosf( src->f[1] ); 365 dst->f[2] = cosf( src->f[2] ); 366 dst->f[3] = cosf( src->f[3] ); 367} 368 369static void 370micro_ddx( 371 union tgsi_exec_channel *dst, 372 const union tgsi_exec_channel *src ) 373{ 374 dst->f[0] = 375 dst->f[1] = 376 dst->f[2] = 377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 378} 379 380static void 381micro_ddy( 382 union tgsi_exec_channel *dst, 383 const union tgsi_exec_channel *src ) 384{ 385 dst->f[0] = 386 dst->f[1] = 387 dst->f[2] = 388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 389} 390 391static void 392micro_div( 393 union tgsi_exec_channel *dst, 394 const union tgsi_exec_channel *src0, 395 const union tgsi_exec_channel *src1 ) 396{ 397 if (src1->f[0] != 0) { 398 dst->f[0] = src0->f[0] / src1->f[0]; 399 } 400 if (src1->f[1] != 0) { 401 dst->f[1] = src0->f[1] / src1->f[1]; 402 } 403 if (src1->f[2] != 0) { 404 dst->f[2] = src0->f[2] / src1->f[2]; 405 } 406 if (src1->f[3] != 0) { 407 dst->f[3] = src0->f[3] / src1->f[3]; 408 } 409} 410 411static void 412micro_udiv( 413 union tgsi_exec_channel *dst, 414 const union tgsi_exec_channel *src0, 415 const union tgsi_exec_channel *src1 ) 416{ 417 dst->u[0] = src0->u[0] / src1->u[0]; 418 dst->u[1] = src0->u[1] / src1->u[1]; 419 dst->u[2] = src0->u[2] / src1->u[2]; 420 dst->u[3] = src0->u[3] / src1->u[3]; 421} 422 423static void 424micro_eq( 425 union tgsi_exec_channel *dst, 426 const union tgsi_exec_channel *src0, 427 const union tgsi_exec_channel *src1, 428 const union tgsi_exec_channel *src2, 429 const union tgsi_exec_channel *src3 ) 430{ 431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 435} 436 437static void 438micro_ieq( 439 union tgsi_exec_channel *dst, 440 const union tgsi_exec_channel *src0, 441 const union tgsi_exec_channel *src1, 442 const union tgsi_exec_channel *src2, 443 const union tgsi_exec_channel *src3 ) 444{ 445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 449} 450 451static void 452micro_exp2( 453 union tgsi_exec_channel *dst, 454 const union tgsi_exec_channel *src) 455{ 456#if FAST_MATH 457 dst->f[0] = util_fast_exp2( src->f[0] ); 458 dst->f[1] = util_fast_exp2( src->f[1] ); 459 dst->f[2] = util_fast_exp2( src->f[2] ); 460 dst->f[3] = util_fast_exp2( src->f[3] ); 461#else 462 dst->f[0] = powf( 2.0f, src->f[0] ); 463 dst->f[1] = powf( 2.0f, src->f[1] ); 464 dst->f[2] = powf( 2.0f, src->f[2] ); 465 dst->f[3] = powf( 2.0f, src->f[3] ); 466#endif 467} 468 469static void 470micro_f2it( 471 union tgsi_exec_channel *dst, 472 const union tgsi_exec_channel *src ) 473{ 474 dst->i[0] = (int) src->f[0]; 475 dst->i[1] = (int) src->f[1]; 476 dst->i[2] = (int) src->f[2]; 477 dst->i[3] = (int) src->f[3]; 478} 479 480static void 481micro_f2ut( 482 union tgsi_exec_channel *dst, 483 const union tgsi_exec_channel *src ) 484{ 485 dst->u[0] = (uint) src->f[0]; 486 dst->u[1] = (uint) src->f[1]; 487 dst->u[2] = (uint) src->f[2]; 488 dst->u[3] = (uint) src->f[3]; 489} 490 491static void 492micro_flr( 493 union tgsi_exec_channel *dst, 494 const union tgsi_exec_channel *src ) 495{ 496 dst->f[0] = floorf( src->f[0] ); 497 dst->f[1] = floorf( src->f[1] ); 498 dst->f[2] = floorf( src->f[2] ); 499 dst->f[3] = floorf( src->f[3] ); 500} 501 502static void 503micro_frc( 504 union tgsi_exec_channel *dst, 505 const union tgsi_exec_channel *src ) 506{ 507 dst->f[0] = src->f[0] - floorf( src->f[0] ); 508 dst->f[1] = src->f[1] - floorf( src->f[1] ); 509 dst->f[2] = src->f[2] - floorf( src->f[2] ); 510 dst->f[3] = src->f[3] - floorf( src->f[3] ); 511} 512 513static void 514micro_ge( 515 union tgsi_exec_channel *dst, 516 const union tgsi_exec_channel *src0, 517 const union tgsi_exec_channel *src1, 518 const union tgsi_exec_channel *src2, 519 const union tgsi_exec_channel *src3 ) 520{ 521 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; 522 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; 523 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; 524 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; 525} 526 527static void 528micro_i2f( 529 union tgsi_exec_channel *dst, 530 const union tgsi_exec_channel *src ) 531{ 532 dst->f[0] = (float) src->i[0]; 533 dst->f[1] = (float) src->i[1]; 534 dst->f[2] = (float) src->i[2]; 535 dst->f[3] = (float) src->i[3]; 536} 537 538static void 539micro_lg2( 540 union tgsi_exec_channel *dst, 541 const union tgsi_exec_channel *src ) 542{ 543#if FAST_MATH 544 dst->f[0] = util_fast_log2( src->f[0] ); 545 dst->f[1] = util_fast_log2( src->f[1] ); 546 dst->f[2] = util_fast_log2( src->f[2] ); 547 dst->f[3] = util_fast_log2( src->f[3] ); 548#else 549 dst->f[0] = logf( src->f[0] ) * 1.442695f; 550 dst->f[1] = logf( src->f[1] ) * 1.442695f; 551 dst->f[2] = logf( src->f[2] ) * 1.442695f; 552 dst->f[3] = logf( src->f[3] ) * 1.442695f; 553#endif 554} 555 556static void 557micro_le( 558 union tgsi_exec_channel *dst, 559 const union tgsi_exec_channel *src0, 560 const union tgsi_exec_channel *src1, 561 const union tgsi_exec_channel *src2, 562 const union tgsi_exec_channel *src3 ) 563{ 564 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 565 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 566 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 567 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 568} 569 570static void 571micro_lt( 572 union tgsi_exec_channel *dst, 573 const union tgsi_exec_channel *src0, 574 const union tgsi_exec_channel *src1, 575 const union tgsi_exec_channel *src2, 576 const union tgsi_exec_channel *src3 ) 577{ 578 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 579 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 580 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 581 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 582} 583 584static void 585micro_ilt( 586 union tgsi_exec_channel *dst, 587 const union tgsi_exec_channel *src0, 588 const union tgsi_exec_channel *src1, 589 const union tgsi_exec_channel *src2, 590 const union tgsi_exec_channel *src3 ) 591{ 592 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 593 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 594 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 595 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 596} 597 598static void 599micro_ult( 600 union tgsi_exec_channel *dst, 601 const union tgsi_exec_channel *src0, 602 const union tgsi_exec_channel *src1, 603 const union tgsi_exec_channel *src2, 604 const union tgsi_exec_channel *src3 ) 605{ 606 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 607 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 608 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 609 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 610} 611 612static void 613micro_max( 614 union tgsi_exec_channel *dst, 615 const union tgsi_exec_channel *src0, 616 const union tgsi_exec_channel *src1 ) 617{ 618 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 619 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 620 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 621 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 622} 623 624static void 625micro_imax( 626 union tgsi_exec_channel *dst, 627 const union tgsi_exec_channel *src0, 628 const union tgsi_exec_channel *src1 ) 629{ 630 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 631 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 632 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 633 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 634} 635 636static void 637micro_umax( 638 union tgsi_exec_channel *dst, 639 const union tgsi_exec_channel *src0, 640 const union tgsi_exec_channel *src1 ) 641{ 642 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 643 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 644 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 645 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 646} 647 648static void 649micro_min( 650 union tgsi_exec_channel *dst, 651 const union tgsi_exec_channel *src0, 652 const union tgsi_exec_channel *src1 ) 653{ 654 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 655 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 656 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 657 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 658} 659 660static void 661micro_imin( 662 union tgsi_exec_channel *dst, 663 const union tgsi_exec_channel *src0, 664 const union tgsi_exec_channel *src1 ) 665{ 666 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 667 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 668 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 669 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 670} 671 672static void 673micro_umin( 674 union tgsi_exec_channel *dst, 675 const union tgsi_exec_channel *src0, 676 const union tgsi_exec_channel *src1 ) 677{ 678 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 679 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 680 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 681 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 682} 683 684static void 685micro_umod( 686 union tgsi_exec_channel *dst, 687 const union tgsi_exec_channel *src0, 688 const union tgsi_exec_channel *src1 ) 689{ 690 dst->u[0] = src0->u[0] % src1->u[0]; 691 dst->u[1] = src0->u[1] % src1->u[1]; 692 dst->u[2] = src0->u[2] % src1->u[2]; 693 dst->u[3] = src0->u[3] % src1->u[3]; 694} 695 696static void 697micro_mul( 698 union tgsi_exec_channel *dst, 699 const union tgsi_exec_channel *src0, 700 const union tgsi_exec_channel *src1 ) 701{ 702 dst->f[0] = src0->f[0] * src1->f[0]; 703 dst->f[1] = src0->f[1] * src1->f[1]; 704 dst->f[2] = src0->f[2] * src1->f[2]; 705 dst->f[3] = src0->f[3] * src1->f[3]; 706} 707 708static void 709micro_imul( 710 union tgsi_exec_channel *dst, 711 const union tgsi_exec_channel *src0, 712 const union tgsi_exec_channel *src1 ) 713{ 714 dst->i[0] = src0->i[0] * src1->i[0]; 715 dst->i[1] = src0->i[1] * src1->i[1]; 716 dst->i[2] = src0->i[2] * src1->i[2]; 717 dst->i[3] = src0->i[3] * src1->i[3]; 718} 719 720static void 721micro_imul64( 722 union tgsi_exec_channel *dst0, 723 union tgsi_exec_channel *dst1, 724 const union tgsi_exec_channel *src0, 725 const union tgsi_exec_channel *src1 ) 726{ 727 dst1->i[0] = src0->i[0] * src1->i[0]; 728 dst1->i[1] = src0->i[1] * src1->i[1]; 729 dst1->i[2] = src0->i[2] * src1->i[2]; 730 dst1->i[3] = src0->i[3] * src1->i[3]; 731 dst0->i[0] = 0; 732 dst0->i[1] = 0; 733 dst0->i[2] = 0; 734 dst0->i[3] = 0; 735} 736 737static void 738micro_umul64( 739 union tgsi_exec_channel *dst0, 740 union tgsi_exec_channel *dst1, 741 const union tgsi_exec_channel *src0, 742 const union tgsi_exec_channel *src1 ) 743{ 744 dst1->u[0] = src0->u[0] * src1->u[0]; 745 dst1->u[1] = src0->u[1] * src1->u[1]; 746 dst1->u[2] = src0->u[2] * src1->u[2]; 747 dst1->u[3] = src0->u[3] * src1->u[3]; 748 dst0->u[0] = 0; 749 dst0->u[1] = 0; 750 dst0->u[2] = 0; 751 dst0->u[3] = 0; 752} 753 754static void 755micro_movc( 756 union tgsi_exec_channel *dst, 757 const union tgsi_exec_channel *src0, 758 const union tgsi_exec_channel *src1, 759 const union tgsi_exec_channel *src2 ) 760{ 761 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 762 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 763 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 764 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 765} 766 767static void 768micro_neg( 769 union tgsi_exec_channel *dst, 770 const union tgsi_exec_channel *src ) 771{ 772 dst->f[0] = -src->f[0]; 773 dst->f[1] = -src->f[1]; 774 dst->f[2] = -src->f[2]; 775 dst->f[3] = -src->f[3]; 776} 777 778static void 779micro_ineg( 780 union tgsi_exec_channel *dst, 781 const union tgsi_exec_channel *src ) 782{ 783 dst->i[0] = -src->i[0]; 784 dst->i[1] = -src->i[1]; 785 dst->i[2] = -src->i[2]; 786 dst->i[3] = -src->i[3]; 787} 788 789static void 790micro_not( 791 union tgsi_exec_channel *dst, 792 const union tgsi_exec_channel *src ) 793{ 794 dst->u[0] = ~src->u[0]; 795 dst->u[1] = ~src->u[1]; 796 dst->u[2] = ~src->u[2]; 797 dst->u[3] = ~src->u[3]; 798} 799 800static void 801micro_or( 802 union tgsi_exec_channel *dst, 803 const union tgsi_exec_channel *src0, 804 const union tgsi_exec_channel *src1 ) 805{ 806 dst->u[0] = src0->u[0] | src1->u[0]; 807 dst->u[1] = src0->u[1] | src1->u[1]; 808 dst->u[2] = src0->u[2] | src1->u[2]; 809 dst->u[3] = src0->u[3] | src1->u[3]; 810} 811 812static void 813micro_pow( 814 union tgsi_exec_channel *dst, 815 const union tgsi_exec_channel *src0, 816 const union tgsi_exec_channel *src1 ) 817{ 818#if FAST_MATH 819 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 820 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 821 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 822 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 823#else 824 dst->f[0] = powf( src0->f[0], src1->f[0] ); 825 dst->f[1] = powf( src0->f[1], src1->f[1] ); 826 dst->f[2] = powf( src0->f[2], src1->f[2] ); 827 dst->f[3] = powf( src0->f[3], src1->f[3] ); 828#endif 829} 830 831static void 832micro_rnd( 833 union tgsi_exec_channel *dst, 834 const union tgsi_exec_channel *src ) 835{ 836 dst->f[0] = floorf( src->f[0] + 0.5f ); 837 dst->f[1] = floorf( src->f[1] + 0.5f ); 838 dst->f[2] = floorf( src->f[2] + 0.5f ); 839 dst->f[3] = floorf( src->f[3] + 0.5f ); 840} 841 842static void 843micro_shl( 844 union tgsi_exec_channel *dst, 845 const union tgsi_exec_channel *src0, 846 const union tgsi_exec_channel *src1 ) 847{ 848 dst->i[0] = src0->i[0] << src1->i[0]; 849 dst->i[1] = src0->i[1] << src1->i[1]; 850 dst->i[2] = src0->i[2] << src1->i[2]; 851 dst->i[3] = src0->i[3] << src1->i[3]; 852} 853 854static void 855micro_ishr( 856 union tgsi_exec_channel *dst, 857 const union tgsi_exec_channel *src0, 858 const union tgsi_exec_channel *src1 ) 859{ 860 dst->i[0] = src0->i[0] >> src1->i[0]; 861 dst->i[1] = src0->i[1] >> src1->i[1]; 862 dst->i[2] = src0->i[2] >> src1->i[2]; 863 dst->i[3] = src0->i[3] >> src1->i[3]; 864} 865 866static void 867micro_trunc( 868 union tgsi_exec_channel *dst, 869 const union tgsi_exec_channel *src0 ) 870{ 871 dst->f[0] = (float) (int) src0->f[0]; 872 dst->f[1] = (float) (int) src0->f[1]; 873 dst->f[2] = (float) (int) src0->f[2]; 874 dst->f[3] = (float) (int) src0->f[3]; 875} 876 877static void 878micro_ushr( 879 union tgsi_exec_channel *dst, 880 const union tgsi_exec_channel *src0, 881 const union tgsi_exec_channel *src1 ) 882{ 883 dst->u[0] = src0->u[0] >> src1->u[0]; 884 dst->u[1] = src0->u[1] >> src1->u[1]; 885 dst->u[2] = src0->u[2] >> src1->u[2]; 886 dst->u[3] = src0->u[3] >> src1->u[3]; 887} 888 889static void 890micro_sin( 891 union tgsi_exec_channel *dst, 892 const union tgsi_exec_channel *src ) 893{ 894 dst->f[0] = sinf( src->f[0] ); 895 dst->f[1] = sinf( src->f[1] ); 896 dst->f[2] = sinf( src->f[2] ); 897 dst->f[3] = sinf( src->f[3] ); 898} 899 900static void 901micro_sqrt( union tgsi_exec_channel *dst, 902 const union tgsi_exec_channel *src ) 903{ 904 dst->f[0] = sqrtf( src->f[0] ); 905 dst->f[1] = sqrtf( src->f[1] ); 906 dst->f[2] = sqrtf( src->f[2] ); 907 dst->f[3] = sqrtf( src->f[3] ); 908} 909 910static void 911micro_sub( 912 union tgsi_exec_channel *dst, 913 const union tgsi_exec_channel *src0, 914 const union tgsi_exec_channel *src1 ) 915{ 916 dst->f[0] = src0->f[0] - src1->f[0]; 917 dst->f[1] = src0->f[1] - src1->f[1]; 918 dst->f[2] = src0->f[2] - src1->f[2]; 919 dst->f[3] = src0->f[3] - src1->f[3]; 920} 921 922static void 923micro_u2f( 924 union tgsi_exec_channel *dst, 925 const union tgsi_exec_channel *src ) 926{ 927 dst->f[0] = (float) src->u[0]; 928 dst->f[1] = (float) src->u[1]; 929 dst->f[2] = (float) src->u[2]; 930 dst->f[3] = (float) src->u[3]; 931} 932 933static void 934micro_xor( 935 union tgsi_exec_channel *dst, 936 const union tgsi_exec_channel *src0, 937 const union tgsi_exec_channel *src1 ) 938{ 939 dst->u[0] = src0->u[0] ^ src1->u[0]; 940 dst->u[1] = src0->u[1] ^ src1->u[1]; 941 dst->u[2] = src0->u[2] ^ src1->u[2]; 942 dst->u[3] = src0->u[3] ^ src1->u[3]; 943} 944 945static void 946fetch_src_file_channel( 947 const struct tgsi_exec_machine *mach, 948 const uint file, 949 const uint swizzle, 950 const union tgsi_exec_channel *index, 951 union tgsi_exec_channel *chan ) 952{ 953 switch( swizzle ) { 954 case TGSI_EXTSWIZZLE_X: 955 case TGSI_EXTSWIZZLE_Y: 956 case TGSI_EXTSWIZZLE_Z: 957 case TGSI_EXTSWIZZLE_W: 958 switch( file ) { 959 case TGSI_FILE_CONSTANT: 960 assert(mach->Consts); 961 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 962 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 963 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 964 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 965 break; 966 967 case TGSI_FILE_INPUT: 968 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 969 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 970 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 971 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 972 break; 973 974 case TGSI_FILE_TEMPORARY: 975 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 976 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 977 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 978 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 979 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 980 break; 981 982 case TGSI_FILE_IMMEDIATE: 983 assert( index->i[0] < (int) mach->ImmLimit ); 984 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 985 assert( index->i[1] < (int) mach->ImmLimit ); 986 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 987 assert( index->i[2] < (int) mach->ImmLimit ); 988 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 989 assert( index->i[3] < (int) mach->ImmLimit ); 990 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 991 break; 992 993 case TGSI_FILE_ADDRESS: 994 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 995 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 996 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 997 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 998 break; 999 1000 case TGSI_FILE_OUTPUT: 1001 /* vertex/fragment output vars can be read too */ 1002 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1003 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1004 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1005 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1006 break; 1007 1008 default: 1009 assert( 0 ); 1010 } 1011 break; 1012 1013 case TGSI_EXTSWIZZLE_ZERO: 1014 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1015 break; 1016 1017 case TGSI_EXTSWIZZLE_ONE: 1018 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1019 break; 1020 1021 default: 1022 assert( 0 ); 1023 } 1024} 1025 1026static void 1027fetch_source( 1028 const struct tgsi_exec_machine *mach, 1029 union tgsi_exec_channel *chan, 1030 const struct tgsi_full_src_register *reg, 1031 const uint chan_index ) 1032{ 1033 union tgsi_exec_channel index; 1034 uint swizzle; 1035 1036 index.i[0] = 1037 index.i[1] = 1038 index.i[2] = 1039 index.i[3] = reg->SrcRegister.Index; 1040 1041 if (reg->SrcRegister.Indirect) { 1042 union tgsi_exec_channel index2; 1043 union tgsi_exec_channel indir_index; 1044 1045 index2.i[0] = 1046 index2.i[1] = 1047 index2.i[2] = 1048 index2.i[3] = reg->SrcRegisterInd.Index; 1049 1050 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1051 fetch_src_file_channel( 1052 mach, 1053 reg->SrcRegisterInd.File, 1054 swizzle, 1055 &index2, 1056 &indir_index ); 1057 1058 index.i[0] += indir_index.i[0]; 1059 index.i[1] += indir_index.i[1]; 1060 index.i[2] += indir_index.i[2]; 1061 index.i[3] += indir_index.i[3]; 1062 } 1063 1064 if( reg->SrcRegister.Dimension ) { 1065 switch( reg->SrcRegister.File ) { 1066 case TGSI_FILE_INPUT: 1067 index.i[0] *= 17; 1068 index.i[1] *= 17; 1069 index.i[2] *= 17; 1070 index.i[3] *= 17; 1071 break; 1072 case TGSI_FILE_CONSTANT: 1073 index.i[0] *= 4096; 1074 index.i[1] *= 4096; 1075 index.i[2] *= 4096; 1076 index.i[3] *= 4096; 1077 break; 1078 default: 1079 assert( 0 ); 1080 } 1081 1082 index.i[0] += reg->SrcRegisterDim.Index; 1083 index.i[1] += reg->SrcRegisterDim.Index; 1084 index.i[2] += reg->SrcRegisterDim.Index; 1085 index.i[3] += reg->SrcRegisterDim.Index; 1086 1087 if (reg->SrcRegisterDim.Indirect) { 1088 union tgsi_exec_channel index2; 1089 union tgsi_exec_channel indir_index; 1090 1091 index2.i[0] = 1092 index2.i[1] = 1093 index2.i[2] = 1094 index2.i[3] = reg->SrcRegisterDimInd.Index; 1095 1096 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1097 fetch_src_file_channel( 1098 mach, 1099 reg->SrcRegisterDimInd.File, 1100 swizzle, 1101 &index2, 1102 &indir_index ); 1103 1104 index.i[0] += indir_index.i[0]; 1105 index.i[1] += indir_index.i[1]; 1106 index.i[2] += indir_index.i[2]; 1107 index.i[3] += indir_index.i[3]; 1108 } 1109 } 1110 1111 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1112 fetch_src_file_channel( 1113 mach, 1114 reg->SrcRegister.File, 1115 swizzle, 1116 &index, 1117 chan ); 1118 1119 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1120 case TGSI_UTIL_SIGN_CLEAR: 1121 micro_abs( chan, chan ); 1122 break; 1123 1124 case TGSI_UTIL_SIGN_SET: 1125 micro_abs( chan, chan ); 1126 micro_neg( chan, chan ); 1127 break; 1128 1129 case TGSI_UTIL_SIGN_TOGGLE: 1130 micro_neg( chan, chan ); 1131 break; 1132 1133 case TGSI_UTIL_SIGN_KEEP: 1134 break; 1135 } 1136 1137 if (reg->SrcRegisterExtMod.Complement) { 1138 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1139 } 1140} 1141 1142static void 1143store_dest( 1144 struct tgsi_exec_machine *mach, 1145 const union tgsi_exec_channel *chan, 1146 const struct tgsi_full_dst_register *reg, 1147 const struct tgsi_full_instruction *inst, 1148 uint chan_index ) 1149{ 1150 uint i; 1151 union tgsi_exec_channel null; 1152 union tgsi_exec_channel *dst; 1153 uint execmask = mach->ExecMask; 1154 1155 switch (reg->DstRegister.File) { 1156 case TGSI_FILE_NULL: 1157 dst = &null; 1158 break; 1159 1160 case TGSI_FILE_OUTPUT: 1161 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1162 + reg->DstRegister.Index].xyzw[chan_index]; 1163 break; 1164 1165 case TGSI_FILE_TEMPORARY: 1166 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); 1167 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 1168 break; 1169 1170 case TGSI_FILE_ADDRESS: 1171 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 1172 break; 1173 1174 default: 1175 assert( 0 ); 1176 return; 1177 } 1178 1179 if (inst->InstructionExtNv.CondFlowEnable) { 1180 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1181 uint swizzle; 1182 uint shift; 1183 uint mask; 1184 uint test; 1185 1186 /* Only CC0 supported. 1187 */ 1188 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1189 1190 switch (chan_index) { 1191 case CHAN_X: 1192 swizzle = inst->InstructionExtNv.CondSwizzleX; 1193 break; 1194 case CHAN_Y: 1195 swizzle = inst->InstructionExtNv.CondSwizzleY; 1196 break; 1197 case CHAN_Z: 1198 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1199 break; 1200 case CHAN_W: 1201 swizzle = inst->InstructionExtNv.CondSwizzleW; 1202 break; 1203 default: 1204 assert( 0 ); 1205 return; 1206 } 1207 1208 switch (swizzle) { 1209 case TGSI_SWIZZLE_X: 1210 shift = TGSI_EXEC_CC_X_SHIFT; 1211 mask = TGSI_EXEC_CC_X_MASK; 1212 break; 1213 case TGSI_SWIZZLE_Y: 1214 shift = TGSI_EXEC_CC_Y_SHIFT; 1215 mask = TGSI_EXEC_CC_Y_MASK; 1216 break; 1217 case TGSI_SWIZZLE_Z: 1218 shift = TGSI_EXEC_CC_Z_SHIFT; 1219 mask = TGSI_EXEC_CC_Z_MASK; 1220 break; 1221 case TGSI_SWIZZLE_W: 1222 shift = TGSI_EXEC_CC_W_SHIFT; 1223 mask = TGSI_EXEC_CC_W_MASK; 1224 break; 1225 default: 1226 assert( 0 ); 1227 return; 1228 } 1229 1230 switch (inst->InstructionExtNv.CondMask) { 1231 case TGSI_CC_GT: 1232 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1233 for (i = 0; i < QUAD_SIZE; i++) 1234 if (cc->u[i] & test) 1235 execmask &= ~(1 << i); 1236 break; 1237 1238 case TGSI_CC_EQ: 1239 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1240 for (i = 0; i < QUAD_SIZE; i++) 1241 if (cc->u[i] & test) 1242 execmask &= ~(1 << i); 1243 break; 1244 1245 case TGSI_CC_LT: 1246 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1247 for (i = 0; i < QUAD_SIZE; i++) 1248 if (cc->u[i] & test) 1249 execmask &= ~(1 << i); 1250 break; 1251 1252 case TGSI_CC_GE: 1253 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1254 for (i = 0; i < QUAD_SIZE; i++) 1255 if (cc->u[i] & test) 1256 execmask &= ~(1 << i); 1257 break; 1258 1259 case TGSI_CC_LE: 1260 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1261 for (i = 0; i < QUAD_SIZE; i++) 1262 if (cc->u[i] & test) 1263 execmask &= ~(1 << i); 1264 break; 1265 1266 case TGSI_CC_NE: 1267 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1268 for (i = 0; i < QUAD_SIZE; i++) 1269 if (cc->u[i] & test) 1270 execmask &= ~(1 << i); 1271 break; 1272 1273 case TGSI_CC_TR: 1274 break; 1275 1276 case TGSI_CC_FL: 1277 for (i = 0; i < QUAD_SIZE; i++) 1278 execmask &= ~(1 << i); 1279 break; 1280 1281 default: 1282 assert( 0 ); 1283 return; 1284 } 1285 } 1286 1287 switch (inst->Instruction.Saturate) { 1288 case TGSI_SAT_NONE: 1289 for (i = 0; i < QUAD_SIZE; i++) 1290 if (execmask & (1 << i)) 1291 dst->i[i] = chan->i[i]; 1292 break; 1293 1294 case TGSI_SAT_ZERO_ONE: 1295 for (i = 0; i < QUAD_SIZE; i++) 1296 if (execmask & (1 << i)) { 1297 if (chan->f[i] < 0.0f) 1298 dst->f[i] = 0.0f; 1299 else if (chan->f[i] > 1.0f) 1300 dst->f[i] = 1.0f; 1301 else 1302 dst->i[i] = chan->i[i]; 1303 } 1304 break; 1305 1306 case TGSI_SAT_MINUS_PLUS_ONE: 1307 for (i = 0; i < QUAD_SIZE; i++) 1308 if (execmask & (1 << i)) { 1309 if (chan->f[i] < -1.0f) 1310 dst->f[i] = -1.0f; 1311 else if (chan->f[i] > 1.0f) 1312 dst->f[i] = 1.0f; 1313 else 1314 dst->i[i] = chan->i[i]; 1315 } 1316 break; 1317 1318 default: 1319 assert( 0 ); 1320 } 1321 1322 if (inst->InstructionExtNv.CondDstUpdate) { 1323 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1324 uint shift; 1325 uint mask; 1326 1327 /* Only CC0 supported. 1328 */ 1329 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1330 1331 switch (chan_index) { 1332 case CHAN_X: 1333 shift = TGSI_EXEC_CC_X_SHIFT; 1334 mask = ~TGSI_EXEC_CC_X_MASK; 1335 break; 1336 case CHAN_Y: 1337 shift = TGSI_EXEC_CC_Y_SHIFT; 1338 mask = ~TGSI_EXEC_CC_Y_MASK; 1339 break; 1340 case CHAN_Z: 1341 shift = TGSI_EXEC_CC_Z_SHIFT; 1342 mask = ~TGSI_EXEC_CC_Z_MASK; 1343 break; 1344 case CHAN_W: 1345 shift = TGSI_EXEC_CC_W_SHIFT; 1346 mask = ~TGSI_EXEC_CC_W_MASK; 1347 break; 1348 default: 1349 assert( 0 ); 1350 return; 1351 } 1352 1353 for (i = 0; i < QUAD_SIZE; i++) 1354 if (execmask & (1 << i)) { 1355 cc->u[i] &= mask; 1356 if (dst->f[i] < 0.0f) 1357 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1358 else if (dst->f[i] > 0.0f) 1359 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1360 else if (dst->f[i] == 0.0f) 1361 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1362 else 1363 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1364 } 1365 } 1366} 1367 1368#define FETCH(VAL,INDEX,CHAN)\ 1369 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1370 1371#define STORE(VAL,INDEX,CHAN)\ 1372 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1373 1374 1375/** 1376 * Execute ARB-style KIL which is predicated by a src register. 1377 * Kill fragment if any of the four values is less than zero. 1378 */ 1379static void 1380exec_kil(struct tgsi_exec_machine *mach, 1381 const struct tgsi_full_instruction *inst) 1382{ 1383 uint uniquemask; 1384 uint chan_index; 1385 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1386 union tgsi_exec_channel r[1]; 1387 1388 /* This mask stores component bits that were already tested. Note that 1389 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1390 * tested. */ 1391 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1392 1393 for (chan_index = 0; chan_index < 4; chan_index++) 1394 { 1395 uint swizzle; 1396 uint i; 1397 1398 /* unswizzle channel */ 1399 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1400 &inst->FullSrcRegisters[0], 1401 chan_index); 1402 1403 /* check if the component has not been already tested */ 1404 if (uniquemask & (1 << swizzle)) 1405 continue; 1406 uniquemask |= 1 << swizzle; 1407 1408 FETCH(&r[0], 0, chan_index); 1409 for (i = 0; i < 4; i++) 1410 if (r[0].f[i] < 0.0f) 1411 kilmask |= 1 << i; 1412 } 1413 1414 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1415} 1416 1417/** 1418 * Execute NVIDIA-style KIL which is predicated by a condition code. 1419 * Kill fragment if the condition code is TRUE. 1420 */ 1421static void 1422exec_kilp(struct tgsi_exec_machine *mach, 1423 const struct tgsi_full_instruction *inst) 1424{ 1425 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1426 1427 if (inst->InstructionExtNv.CondFlowEnable) { 1428 uint swizzle[4]; 1429 uint chan_index; 1430 1431 kilmask = 0x0; 1432 1433 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1434 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1435 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1436 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1437 1438 for (chan_index = 0; chan_index < 4; chan_index++) 1439 { 1440 uint i; 1441 1442 for (i = 0; i < 4; i++) { 1443 /* TODO: evaluate the condition code */ 1444 if (0) 1445 kilmask |= 1 << i; 1446 } 1447 } 1448 } 1449 else { 1450 /* "unconditional" kil */ 1451 kilmask = mach->ExecMask; 1452 } 1453 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1454} 1455 1456 1457/* 1458 * Fetch a texel using STR texture coordinates. 1459 */ 1460static void 1461fetch_texel( struct tgsi_sampler *sampler, 1462 const union tgsi_exec_channel *s, 1463 const union tgsi_exec_channel *t, 1464 const union tgsi_exec_channel *p, 1465 float lodbias, /* XXX should be float[4] */ 1466 union tgsi_exec_channel *r, 1467 union tgsi_exec_channel *g, 1468 union tgsi_exec_channel *b, 1469 union tgsi_exec_channel *a ) 1470{ 1471 uint j; 1472 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1473 1474 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1475 1476 for (j = 0; j < 4; j++) { 1477 r->f[j] = rgba[0][j]; 1478 g->f[j] = rgba[1][j]; 1479 b->f[j] = rgba[2][j]; 1480 a->f[j] = rgba[3][j]; 1481 } 1482} 1483 1484 1485static void 1486exec_tex(struct tgsi_exec_machine *mach, 1487 const struct tgsi_full_instruction *inst, 1488 boolean biasLod, 1489 boolean projected) 1490{ 1491 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1492 union tgsi_exec_channel r[8]; 1493 uint chan_index; 1494 float lodBias; 1495 1496 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1497 1498 switch (inst->InstructionExtTexture.Texture) { 1499 case TGSI_TEXTURE_1D: 1500 1501 FETCH(&r[0], 0, CHAN_X); 1502 1503 if (projected) { 1504 FETCH(&r[1], 0, CHAN_W); 1505 micro_div( &r[0], &r[0], &r[1] ); 1506 } 1507 1508 if (biasLod) { 1509 FETCH(&r[1], 0, CHAN_W); 1510 lodBias = r[2].f[0]; 1511 } 1512 else 1513 lodBias = 0.0; 1514 1515 fetch_texel(&mach->Samplers[unit], 1516 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ 1517 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1518 break; 1519 1520 case TGSI_TEXTURE_2D: 1521 case TGSI_TEXTURE_RECT: 1522 1523 FETCH(&r[0], 0, CHAN_X); 1524 FETCH(&r[1], 0, CHAN_Y); 1525 FETCH(&r[2], 0, CHAN_Z); 1526 1527 if (projected) { 1528 FETCH(&r[3], 0, CHAN_W); 1529 micro_div( &r[0], &r[0], &r[3] ); 1530 micro_div( &r[1], &r[1], &r[3] ); 1531 micro_div( &r[2], &r[2], &r[3] ); 1532 } 1533 1534 if (biasLod) { 1535 FETCH(&r[3], 0, CHAN_W); 1536 lodBias = r[3].f[0]; 1537 } 1538 else 1539 lodBias = 0.0; 1540 1541 fetch_texel(&mach->Samplers[unit], 1542 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1543 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1544 break; 1545 1546 case TGSI_TEXTURE_3D: 1547 case TGSI_TEXTURE_CUBE: 1548 1549 FETCH(&r[0], 0, CHAN_X); 1550 FETCH(&r[1], 0, CHAN_Y); 1551 FETCH(&r[2], 0, CHAN_Z); 1552 1553 if (projected) { 1554 FETCH(&r[3], 0, CHAN_W); 1555 micro_div( &r[0], &r[0], &r[3] ); 1556 micro_div( &r[1], &r[1], &r[3] ); 1557 micro_div( &r[2], &r[2], &r[3] ); 1558 } 1559 1560 if (biasLod) { 1561 FETCH(&r[3], 0, CHAN_W); 1562 lodBias = r[3].f[0]; 1563 } 1564 else 1565 lodBias = 0.0; 1566 1567 fetch_texel(&mach->Samplers[unit], 1568 &r[0], &r[1], &r[2], lodBias, 1569 &r[0], &r[1], &r[2], &r[3]); 1570 break; 1571 1572 default: 1573 assert (0); 1574 } 1575 1576 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1577 STORE( &r[chan_index], 0, chan_index ); 1578 } 1579} 1580 1581 1582/** 1583 * Evaluate a constant-valued coefficient at the position of the 1584 * current quad. 1585 */ 1586static void 1587eval_constant_coef( 1588 struct tgsi_exec_machine *mach, 1589 unsigned attrib, 1590 unsigned chan ) 1591{ 1592 unsigned i; 1593 1594 for( i = 0; i < QUAD_SIZE; i++ ) { 1595 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1596 } 1597} 1598 1599/** 1600 * Evaluate a linear-valued coefficient at the position of the 1601 * current quad. 1602 */ 1603static void 1604eval_linear_coef( 1605 struct tgsi_exec_machine *mach, 1606 unsigned attrib, 1607 unsigned chan ) 1608{ 1609 const float x = mach->QuadPos.xyzw[0].f[0]; 1610 const float y = mach->QuadPos.xyzw[1].f[0]; 1611 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1612 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1613 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1614 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1615 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1616 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1617 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1618} 1619 1620/** 1621 * Evaluate a perspective-valued coefficient at the position of the 1622 * current quad. 1623 */ 1624static void 1625eval_perspective_coef( 1626 struct tgsi_exec_machine *mach, 1627 unsigned attrib, 1628 unsigned chan ) 1629{ 1630 const float x = mach->QuadPos.xyzw[0].f[0]; 1631 const float y = mach->QuadPos.xyzw[1].f[0]; 1632 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1633 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1634 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1635 const float *w = mach->QuadPos.xyzw[3].f; 1636 /* divide by W here */ 1637 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1638 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1639 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1640 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1641} 1642 1643 1644typedef void (* eval_coef_func)( 1645 struct tgsi_exec_machine *mach, 1646 unsigned attrib, 1647 unsigned chan ); 1648 1649static void 1650exec_declaration( 1651 struct tgsi_exec_machine *mach, 1652 const struct tgsi_full_declaration *decl ) 1653{ 1654 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1655 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1656 unsigned first, last, mask; 1657 eval_coef_func eval; 1658 1659 first = decl->DeclarationRange.First; 1660 last = decl->DeclarationRange.Last; 1661 mask = decl->Declaration.UsageMask; 1662 1663 switch( decl->Declaration.Interpolate ) { 1664 case TGSI_INTERPOLATE_CONSTANT: 1665 eval = eval_constant_coef; 1666 break; 1667 1668 case TGSI_INTERPOLATE_LINEAR: 1669 eval = eval_linear_coef; 1670 break; 1671 1672 case TGSI_INTERPOLATE_PERSPECTIVE: 1673 eval = eval_perspective_coef; 1674 break; 1675 1676 default: 1677 eval = NULL; 1678 assert( 0 ); 1679 } 1680 1681 if( mask == TGSI_WRITEMASK_XYZW ) { 1682 unsigned i, j; 1683 1684 for( i = first; i <= last; i++ ) { 1685 for( j = 0; j < NUM_CHANNELS; j++ ) { 1686 eval( mach, i, j ); 1687 } 1688 } 1689 } 1690 else { 1691 unsigned i, j; 1692 1693 for( j = 0; j < NUM_CHANNELS; j++ ) { 1694 if( mask & (1 << j) ) { 1695 for( i = first; i <= last; i++ ) { 1696 eval( mach, i, j ); 1697 } 1698 } 1699 } 1700 } 1701 } 1702 } 1703} 1704 1705static void 1706exec_instruction( 1707 struct tgsi_exec_machine *mach, 1708 const struct tgsi_full_instruction *inst, 1709 int *pc ) 1710{ 1711 uint chan_index; 1712 union tgsi_exec_channel r[8]; 1713 1714 (*pc)++; 1715 1716 switch (inst->Instruction.Opcode) { 1717 case TGSI_OPCODE_ARL: 1718 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1719 FETCH( &r[0], 0, chan_index ); 1720 micro_f2it( &r[0], &r[0] ); 1721 STORE( &r[0], 0, chan_index ); 1722 } 1723 break; 1724 1725 case TGSI_OPCODE_MOV: 1726 case TGSI_OPCODE_SWZ: 1727 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1728 FETCH( &r[0], 0, chan_index ); 1729 STORE( &r[0], 0, chan_index ); 1730 } 1731 break; 1732 1733 case TGSI_OPCODE_LIT: 1734 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1735 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1736 } 1737 1738 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1739 FETCH( &r[0], 0, CHAN_X ); 1740 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1741 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1742 STORE( &r[0], 0, CHAN_Y ); 1743 } 1744 1745 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1746 FETCH( &r[1], 0, CHAN_Y ); 1747 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1748 1749 FETCH( &r[2], 0, CHAN_W ); 1750 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1751 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1752 micro_pow( &r[1], &r[1], &r[2] ); 1753 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1754 STORE( &r[0], 0, CHAN_Z ); 1755 } 1756 } 1757 1758 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1759 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1760 } 1761 break; 1762 1763 case TGSI_OPCODE_RCP: 1764 /* TGSI_OPCODE_RECIP */ 1765 FETCH( &r[0], 0, CHAN_X ); 1766 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1767 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1768 STORE( &r[0], 0, chan_index ); 1769 } 1770 break; 1771 1772 case TGSI_OPCODE_RSQ: 1773 /* TGSI_OPCODE_RECIPSQRT */ 1774 FETCH( &r[0], 0, CHAN_X ); 1775 micro_sqrt( &r[0], &r[0] ); 1776 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1777 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1778 STORE( &r[0], 0, chan_index ); 1779 } 1780 break; 1781 1782 case TGSI_OPCODE_EXP: 1783 FETCH( &r[0], 0, CHAN_X ); 1784 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1785 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1786 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1787 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1788 } 1789 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1790 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1791 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1792 } 1793 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1794 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1795 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1796 } 1797 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1798 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1799 } 1800 break; 1801 1802 case TGSI_OPCODE_LOG: 1803 FETCH( &r[0], 0, CHAN_X ); 1804 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1805 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1806 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1807 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1808 STORE( &r[0], 0, CHAN_X ); 1809 } 1810 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1811 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1812 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 1813 STORE( &r[0], 0, CHAN_Y ); 1814 } 1815 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1816 STORE( &r[1], 0, CHAN_Z ); 1817 } 1818 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1819 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1820 } 1821 break; 1822 1823 case TGSI_OPCODE_MUL: 1824 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1825 { 1826 FETCH(&r[0], 0, chan_index); 1827 FETCH(&r[1], 1, chan_index); 1828 1829 micro_mul( &r[0], &r[0], &r[1] ); 1830 1831 STORE(&r[0], 0, chan_index); 1832 } 1833 break; 1834 1835 case TGSI_OPCODE_ADD: 1836 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1837 FETCH( &r[0], 0, chan_index ); 1838 FETCH( &r[1], 1, chan_index ); 1839 micro_add( &r[0], &r[0], &r[1] ); 1840 STORE( &r[0], 0, chan_index ); 1841 } 1842 break; 1843 1844 case TGSI_OPCODE_DP3: 1845 /* TGSI_OPCODE_DOT3 */ 1846 FETCH( &r[0], 0, CHAN_X ); 1847 FETCH( &r[1], 1, CHAN_X ); 1848 micro_mul( &r[0], &r[0], &r[1] ); 1849 1850 FETCH( &r[1], 0, CHAN_Y ); 1851 FETCH( &r[2], 1, CHAN_Y ); 1852 micro_mul( &r[1], &r[1], &r[2] ); 1853 micro_add( &r[0], &r[0], &r[1] ); 1854 1855 FETCH( &r[1], 0, CHAN_Z ); 1856 FETCH( &r[2], 1, CHAN_Z ); 1857 micro_mul( &r[1], &r[1], &r[2] ); 1858 micro_add( &r[0], &r[0], &r[1] ); 1859 1860 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1861 STORE( &r[0], 0, chan_index ); 1862 } 1863 break; 1864 1865 case TGSI_OPCODE_DP4: 1866 /* TGSI_OPCODE_DOT4 */ 1867 FETCH(&r[0], 0, CHAN_X); 1868 FETCH(&r[1], 1, CHAN_X); 1869 1870 micro_mul( &r[0], &r[0], &r[1] ); 1871 1872 FETCH(&r[1], 0, CHAN_Y); 1873 FETCH(&r[2], 1, CHAN_Y); 1874 1875 micro_mul( &r[1], &r[1], &r[2] ); 1876 micro_add( &r[0], &r[0], &r[1] ); 1877 1878 FETCH(&r[1], 0, CHAN_Z); 1879 FETCH(&r[2], 1, CHAN_Z); 1880 1881 micro_mul( &r[1], &r[1], &r[2] ); 1882 micro_add( &r[0], &r[0], &r[1] ); 1883 1884 FETCH(&r[1], 0, CHAN_W); 1885 FETCH(&r[2], 1, CHAN_W); 1886 1887 micro_mul( &r[1], &r[1], &r[2] ); 1888 micro_add( &r[0], &r[0], &r[1] ); 1889 1890 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1891 STORE( &r[0], 0, chan_index ); 1892 } 1893 break; 1894 1895 case TGSI_OPCODE_DST: 1896 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1897 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1898 } 1899 1900 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1901 FETCH( &r[0], 0, CHAN_Y ); 1902 FETCH( &r[1], 1, CHAN_Y); 1903 micro_mul( &r[0], &r[0], &r[1] ); 1904 STORE( &r[0], 0, CHAN_Y ); 1905 } 1906 1907 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1908 FETCH( &r[0], 0, CHAN_Z ); 1909 STORE( &r[0], 0, CHAN_Z ); 1910 } 1911 1912 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1913 FETCH( &r[0], 1, CHAN_W ); 1914 STORE( &r[0], 0, CHAN_W ); 1915 } 1916 break; 1917 1918 case TGSI_OPCODE_MIN: 1919 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1920 FETCH(&r[0], 0, chan_index); 1921 FETCH(&r[1], 1, chan_index); 1922 1923 /* XXX use micro_min()?? */ 1924 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 1925 1926 STORE(&r[0], 0, chan_index); 1927 } 1928 break; 1929 1930 case TGSI_OPCODE_MAX: 1931 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1932 FETCH(&r[0], 0, chan_index); 1933 FETCH(&r[1], 1, chan_index); 1934 1935 /* XXX use micro_max()?? */ 1936 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 1937 1938 STORE(&r[0], 0, chan_index ); 1939 } 1940 break; 1941 1942 case TGSI_OPCODE_SLT: 1943 /* TGSI_OPCODE_SETLT */ 1944 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1945 FETCH( &r[0], 0, chan_index ); 1946 FETCH( &r[1], 1, chan_index ); 1947 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1948 STORE( &r[0], 0, chan_index ); 1949 } 1950 break; 1951 1952 case TGSI_OPCODE_SGE: 1953 /* TGSI_OPCODE_SETGE */ 1954 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1955 FETCH( &r[0], 0, chan_index ); 1956 FETCH( &r[1], 1, chan_index ); 1957 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1958 STORE( &r[0], 0, chan_index ); 1959 } 1960 break; 1961 1962 case TGSI_OPCODE_MAD: 1963 /* TGSI_OPCODE_MADD */ 1964 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1965 FETCH( &r[0], 0, chan_index ); 1966 FETCH( &r[1], 1, chan_index ); 1967 micro_mul( &r[0], &r[0], &r[1] ); 1968 FETCH( &r[1], 2, chan_index ); 1969 micro_add( &r[0], &r[0], &r[1] ); 1970 STORE( &r[0], 0, chan_index ); 1971 } 1972 break; 1973 1974 case TGSI_OPCODE_SUB: 1975 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1976 FETCH(&r[0], 0, chan_index); 1977 FETCH(&r[1], 1, chan_index); 1978 1979 micro_sub( &r[0], &r[0], &r[1] ); 1980 1981 STORE(&r[0], 0, chan_index); 1982 } 1983 break; 1984 1985 case TGSI_OPCODE_LERP: 1986 /* TGSI_OPCODE_LRP */ 1987 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1988 FETCH(&r[0], 0, chan_index); 1989 FETCH(&r[1], 1, chan_index); 1990 FETCH(&r[2], 2, chan_index); 1991 1992 micro_sub( &r[1], &r[1], &r[2] ); 1993 micro_mul( &r[0], &r[0], &r[1] ); 1994 micro_add( &r[0], &r[0], &r[2] ); 1995 1996 STORE(&r[0], 0, chan_index); 1997 } 1998 break; 1999 2000 case TGSI_OPCODE_CND: 2001 assert (0); 2002 break; 2003 2004 case TGSI_OPCODE_CND0: 2005 assert (0); 2006 break; 2007 2008 case TGSI_OPCODE_DOT2ADD: 2009 /* TGSI_OPCODE_DP2A */ 2010 assert (0); 2011 break; 2012 2013 case TGSI_OPCODE_INDEX: 2014 assert (0); 2015 break; 2016 2017 case TGSI_OPCODE_NEGATE: 2018 assert (0); 2019 break; 2020 2021 case TGSI_OPCODE_FRAC: 2022 /* TGSI_OPCODE_FRC */ 2023 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2024 FETCH( &r[0], 0, chan_index ); 2025 micro_frc( &r[0], &r[0] ); 2026 STORE( &r[0], 0, chan_index ); 2027 } 2028 break; 2029 2030 case TGSI_OPCODE_CLAMP: 2031 assert (0); 2032 break; 2033 2034 case TGSI_OPCODE_FLOOR: 2035 /* TGSI_OPCODE_FLR */ 2036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2037 FETCH( &r[0], 0, chan_index ); 2038 micro_flr( &r[0], &r[0] ); 2039 STORE( &r[0], 0, chan_index ); 2040 } 2041 break; 2042 2043 case TGSI_OPCODE_ROUND: 2044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2045 FETCH( &r[0], 0, chan_index ); 2046 micro_rnd( &r[0], &r[0] ); 2047 STORE( &r[0], 0, chan_index ); 2048 } 2049 break; 2050 2051 case TGSI_OPCODE_EXPBASE2: 2052 /* TGSI_OPCODE_EX2 */ 2053 FETCH(&r[0], 0, CHAN_X); 2054 2055#if FAST_MATH 2056 micro_exp2( &r[0], &r[0] ); 2057#else 2058 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2059#endif 2060 2061 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2062 STORE( &r[0], 0, chan_index ); 2063 } 2064 break; 2065 2066 case TGSI_OPCODE_LOGBASE2: 2067 /* TGSI_OPCODE_LG2 */ 2068 FETCH( &r[0], 0, CHAN_X ); 2069 micro_lg2( &r[0], &r[0] ); 2070 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2071 STORE( &r[0], 0, chan_index ); 2072 } 2073 break; 2074 2075 case TGSI_OPCODE_POWER: 2076 /* TGSI_OPCODE_POW */ 2077 FETCH(&r[0], 0, CHAN_X); 2078 FETCH(&r[1], 1, CHAN_X); 2079 2080 micro_pow( &r[0], &r[0], &r[1] ); 2081 2082 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2083 STORE( &r[0], 0, chan_index ); 2084 } 2085 break; 2086 2087 case TGSI_OPCODE_CROSSPRODUCT: 2088 /* TGSI_OPCODE_XPD */ 2089 FETCH(&r[0], 0, CHAN_Y); 2090 FETCH(&r[1], 1, CHAN_Z); 2091 2092 micro_mul( &r[2], &r[0], &r[1] ); 2093 2094 FETCH(&r[3], 0, CHAN_Z); 2095 FETCH(&r[4], 1, CHAN_Y); 2096 2097 micro_mul( &r[5], &r[3], &r[4] ); 2098 micro_sub( &r[2], &r[2], &r[5] ); 2099 2100 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2101 STORE( &r[2], 0, CHAN_X ); 2102 } 2103 2104 FETCH(&r[2], 1, CHAN_X); 2105 2106 micro_mul( &r[3], &r[3], &r[2] ); 2107 2108 FETCH(&r[5], 0, CHAN_X); 2109 2110 micro_mul( &r[1], &r[1], &r[5] ); 2111 micro_sub( &r[3], &r[3], &r[1] ); 2112 2113 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2114 STORE( &r[3], 0, CHAN_Y ); 2115 } 2116 2117 micro_mul( &r[5], &r[5], &r[4] ); 2118 micro_mul( &r[0], &r[0], &r[2] ); 2119 micro_sub( &r[5], &r[5], &r[0] ); 2120 2121 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2122 STORE( &r[5], 0, CHAN_Z ); 2123 } 2124 2125 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2126 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2127 } 2128 break; 2129 2130 case TGSI_OPCODE_MULTIPLYMATRIX: 2131 assert (0); 2132 break; 2133 2134 case TGSI_OPCODE_ABS: 2135 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2136 FETCH(&r[0], 0, chan_index); 2137 2138 micro_abs( &r[0], &r[0] ); 2139 2140 STORE(&r[0], 0, chan_index); 2141 } 2142 break; 2143 2144 case TGSI_OPCODE_RCC: 2145 assert (0); 2146 break; 2147 2148 case TGSI_OPCODE_DPH: 2149 FETCH(&r[0], 0, CHAN_X); 2150 FETCH(&r[1], 1, CHAN_X); 2151 2152 micro_mul( &r[0], &r[0], &r[1] ); 2153 2154 FETCH(&r[1], 0, CHAN_Y); 2155 FETCH(&r[2], 1, CHAN_Y); 2156 2157 micro_mul( &r[1], &r[1], &r[2] ); 2158 micro_add( &r[0], &r[0], &r[1] ); 2159 2160 FETCH(&r[1], 0, CHAN_Z); 2161 FETCH(&r[2], 1, CHAN_Z); 2162 2163 micro_mul( &r[1], &r[1], &r[2] ); 2164 micro_add( &r[0], &r[0], &r[1] ); 2165 2166 FETCH(&r[1], 1, CHAN_W); 2167 2168 micro_add( &r[0], &r[0], &r[1] ); 2169 2170 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2171 STORE( &r[0], 0, chan_index ); 2172 } 2173 break; 2174 2175 case TGSI_OPCODE_COS: 2176 FETCH(&r[0], 0, CHAN_X); 2177 2178 micro_cos( &r[0], &r[0] ); 2179 2180 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2181 STORE( &r[0], 0, chan_index ); 2182 } 2183 break; 2184 2185 case TGSI_OPCODE_DDX: 2186 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2187 FETCH( &r[0], 0, chan_index ); 2188 micro_ddx( &r[0], &r[0] ); 2189 STORE( &r[0], 0, chan_index ); 2190 } 2191 break; 2192 2193 case TGSI_OPCODE_DDY: 2194 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2195 FETCH( &r[0], 0, chan_index ); 2196 micro_ddy( &r[0], &r[0] ); 2197 STORE( &r[0], 0, chan_index ); 2198 } 2199 break; 2200 2201 case TGSI_OPCODE_KILP: 2202 exec_kilp (mach, inst); 2203 break; 2204 2205 case TGSI_OPCODE_KIL: 2206 exec_kil (mach, inst); 2207 break; 2208 2209 case TGSI_OPCODE_PK2H: 2210 assert (0); 2211 break; 2212 2213 case TGSI_OPCODE_PK2US: 2214 assert (0); 2215 break; 2216 2217 case TGSI_OPCODE_PK4B: 2218 assert (0); 2219 break; 2220 2221 case TGSI_OPCODE_PK4UB: 2222 assert (0); 2223 break; 2224 2225 case TGSI_OPCODE_RFL: 2226 assert (0); 2227 break; 2228 2229 case TGSI_OPCODE_SEQ: 2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2231 FETCH( &r[0], 0, chan_index ); 2232 FETCH( &r[1], 1, chan_index ); 2233 micro_eq( &r[0], &r[0], &r[1], 2234 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2235 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2236 STORE( &r[0], 0, chan_index ); 2237 } 2238 break; 2239 2240 case TGSI_OPCODE_SFL: 2241 assert (0); 2242 break; 2243 2244 case TGSI_OPCODE_SGT: 2245 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2246 FETCH( &r[0], 0, chan_index ); 2247 FETCH( &r[1], 1, chan_index ); 2248 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2249 STORE( &r[0], 0, chan_index ); 2250 } 2251 break; 2252 2253 case TGSI_OPCODE_SIN: 2254 FETCH( &r[0], 0, CHAN_X ); 2255 micro_sin( &r[0], &r[0] ); 2256 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2257 STORE( &r[0], 0, chan_index ); 2258 } 2259 break; 2260 2261 case TGSI_OPCODE_SLE: 2262 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2263 FETCH( &r[0], 0, chan_index ); 2264 FETCH( &r[1], 1, chan_index ); 2265 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2266 STORE( &r[0], 0, chan_index ); 2267 } 2268 break; 2269 2270 case TGSI_OPCODE_SNE: 2271 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2272 FETCH( &r[0], 0, chan_index ); 2273 FETCH( &r[1], 1, chan_index ); 2274 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2275 STORE( &r[0], 0, chan_index ); 2276 } 2277 break; 2278 2279 case TGSI_OPCODE_STR: 2280 assert (0); 2281 break; 2282 2283 case TGSI_OPCODE_TEX: 2284 /* simple texture lookup */ 2285 /* src[0] = texcoord */ 2286 /* src[1] = sampler unit */ 2287 exec_tex(mach, inst, FALSE, FALSE); 2288 break; 2289 2290 case TGSI_OPCODE_TXB: 2291 /* Texture lookup with lod bias */ 2292 /* src[0] = texcoord (src[0].w = LOD bias) */ 2293 /* src[1] = sampler unit */ 2294 exec_tex(mach, inst, TRUE, FALSE); 2295 break; 2296 2297 case TGSI_OPCODE_TXD: 2298 /* Texture lookup with explict partial derivatives */ 2299 /* src[0] = texcoord */ 2300 /* src[1] = d[strq]/dx */ 2301 /* src[2] = d[strq]/dy */ 2302 /* src[3] = sampler unit */ 2303 assert (0); 2304 break; 2305 2306 case TGSI_OPCODE_TXL: 2307 /* Texture lookup with explit LOD */ 2308 /* src[0] = texcoord (src[0].w = LOD) */ 2309 /* src[1] = sampler unit */ 2310 exec_tex(mach, inst, TRUE, FALSE); 2311 break; 2312 2313 case TGSI_OPCODE_TXP: 2314 /* Texture lookup with projection */ 2315 /* src[0] = texcoord (src[0].w = projection) */ 2316 /* src[1] = sampler unit */ 2317 exec_tex(mach, inst, FALSE, TRUE); 2318 break; 2319 2320 case TGSI_OPCODE_UP2H: 2321 assert (0); 2322 break; 2323 2324 case TGSI_OPCODE_UP2US: 2325 assert (0); 2326 break; 2327 2328 case TGSI_OPCODE_UP4B: 2329 assert (0); 2330 break; 2331 2332 case TGSI_OPCODE_UP4UB: 2333 assert (0); 2334 break; 2335 2336 case TGSI_OPCODE_X2D: 2337 assert (0); 2338 break; 2339 2340 case TGSI_OPCODE_ARA: 2341 assert (0); 2342 break; 2343 2344 case TGSI_OPCODE_ARR: 2345 assert (0); 2346 break; 2347 2348 case TGSI_OPCODE_BRA: 2349 assert (0); 2350 break; 2351 2352 case TGSI_OPCODE_CAL: 2353 /* skip the call if no execution channels are enabled */ 2354 if (mach->ExecMask) { 2355 /* do the call */ 2356 2357 /* push the Cond, Loop, Cont stacks */ 2358 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2359 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2360 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2361 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2362 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2363 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2364 2365 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2366 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2367 2368 /* note that PC was already incremented above */ 2369 mach->CallStack[mach->CallStackTop++] = *pc; 2370 *pc = inst->InstructionExtLabel.Label; 2371 } 2372 break; 2373 2374 case TGSI_OPCODE_RET: 2375 mach->FuncMask &= ~mach->ExecMask; 2376 UPDATE_EXEC_MASK(mach); 2377 2378 if (mach->FuncMask == 0x0) { 2379 /* really return now (otherwise, keep executing */ 2380 2381 if (mach->CallStackTop == 0) { 2382 /* returning from main() */ 2383 *pc = -1; 2384 return; 2385 } 2386 *pc = mach->CallStack[--mach->CallStackTop]; 2387 2388 /* pop the Cond, Loop, Cont stacks */ 2389 assert(mach->CondStackTop > 0); 2390 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2391 assert(mach->LoopStackTop > 0); 2392 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2393 assert(mach->ContStackTop > 0); 2394 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2395 assert(mach->FuncStackTop > 0); 2396 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2397 2398 UPDATE_EXEC_MASK(mach); 2399 } 2400 break; 2401 2402 case TGSI_OPCODE_SSG: 2403 assert (0); 2404 break; 2405 2406 case TGSI_OPCODE_CMP: 2407 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2408 FETCH(&r[0], 0, chan_index); 2409 FETCH(&r[1], 1, chan_index); 2410 FETCH(&r[2], 2, chan_index); 2411 2412 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2413 2414 STORE(&r[0], 0, chan_index); 2415 } 2416 break; 2417 2418 case TGSI_OPCODE_SCS: 2419 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2420 FETCH( &r[0], 0, CHAN_X ); 2421 } 2422 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { 2423 micro_cos( &r[1], &r[0] ); 2424 STORE( &r[1], 0, CHAN_X ); 2425 } 2426 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2427 micro_sin( &r[1], &r[0] ); 2428 STORE( &r[1], 0, CHAN_Y ); 2429 } 2430 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2431 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2432 } 2433 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2434 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2435 } 2436 break; 2437 2438 case TGSI_OPCODE_NRM: 2439 assert (0); 2440 break; 2441 2442 case TGSI_OPCODE_DIV: 2443 assert( 0 ); 2444 break; 2445 2446 case TGSI_OPCODE_DP2: 2447 FETCH( &r[0], 0, CHAN_X ); 2448 FETCH( &r[1], 1, CHAN_X ); 2449 micro_mul( &r[0], &r[0], &r[1] ); 2450 2451 FETCH( &r[1], 0, CHAN_Y ); 2452 FETCH( &r[2], 1, CHAN_Y ); 2453 micro_mul( &r[1], &r[1], &r[2] ); 2454 micro_add( &r[0], &r[0], &r[1] ); 2455 2456 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2457 STORE( &r[0], 0, chan_index ); 2458 } 2459 break; 2460 2461 case TGSI_OPCODE_IF: 2462 /* push CondMask */ 2463 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2464 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2465 FETCH( &r[0], 0, CHAN_X ); 2466 /* update CondMask */ 2467 if( ! r[0].u[0] ) { 2468 mach->CondMask &= ~0x1; 2469 } 2470 if( ! r[0].u[1] ) { 2471 mach->CondMask &= ~0x2; 2472 } 2473 if( ! r[0].u[2] ) { 2474 mach->CondMask &= ~0x4; 2475 } 2476 if( ! r[0].u[3] ) { 2477 mach->CondMask &= ~0x8; 2478 } 2479 UPDATE_EXEC_MASK(mach); 2480 /* Todo: If CondMask==0, jump to ELSE */ 2481 break; 2482 2483 case TGSI_OPCODE_ELSE: 2484 /* invert CondMask wrt previous mask */ 2485 { 2486 uint prevMask; 2487 assert(mach->CondStackTop > 0); 2488 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2489 mach->CondMask = ~mach->CondMask & prevMask; 2490 UPDATE_EXEC_MASK(mach); 2491 /* Todo: If CondMask==0, jump to ENDIF */ 2492 } 2493 break; 2494 2495 case TGSI_OPCODE_ENDIF: 2496 /* pop CondMask */ 2497 assert(mach->CondStackTop > 0); 2498 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2499 UPDATE_EXEC_MASK(mach); 2500 break; 2501 2502 case TGSI_OPCODE_END: 2503 /* halt execution */ 2504 *pc = -1; 2505 break; 2506 2507 case TGSI_OPCODE_REP: 2508 assert (0); 2509 break; 2510 2511 case TGSI_OPCODE_ENDREP: 2512 assert (0); 2513 break; 2514 2515 case TGSI_OPCODE_PUSHA: 2516 assert (0); 2517 break; 2518 2519 case TGSI_OPCODE_POPA: 2520 assert (0); 2521 break; 2522 2523 case TGSI_OPCODE_CEIL: 2524 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2525 FETCH( &r[0], 0, chan_index ); 2526 micro_ceil( &r[0], &r[0] ); 2527 STORE( &r[0], 0, chan_index ); 2528 } 2529 break; 2530 2531 case TGSI_OPCODE_I2F: 2532 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2533 FETCH( &r[0], 0, chan_index ); 2534 micro_i2f( &r[0], &r[0] ); 2535 STORE( &r[0], 0, chan_index ); 2536 } 2537 break; 2538 2539 case TGSI_OPCODE_NOT: 2540 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2541 FETCH( &r[0], 0, chan_index ); 2542 micro_not( &r[0], &r[0] ); 2543 STORE( &r[0], 0, chan_index ); 2544 } 2545 break; 2546 2547 case TGSI_OPCODE_TRUNC: 2548 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2549 FETCH( &r[0], 0, chan_index ); 2550 micro_trunc( &r[0], &r[0] ); 2551 STORE( &r[0], 0, chan_index ); 2552 } 2553 break; 2554 2555 case TGSI_OPCODE_SHL: 2556 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2557 FETCH( &r[0], 0, chan_index ); 2558 FETCH( &r[1], 1, chan_index ); 2559 micro_shl( &r[0], &r[0], &r[1] ); 2560 STORE( &r[0], 0, chan_index ); 2561 } 2562 break; 2563 2564 case TGSI_OPCODE_SHR: 2565 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2566 FETCH( &r[0], 0, chan_index ); 2567 FETCH( &r[1], 1, chan_index ); 2568 micro_ishr( &r[0], &r[0], &r[1] ); 2569 STORE( &r[0], 0, chan_index ); 2570 } 2571 break; 2572 2573 case TGSI_OPCODE_AND: 2574 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2575 FETCH( &r[0], 0, chan_index ); 2576 FETCH( &r[1], 1, chan_index ); 2577 micro_and( &r[0], &r[0], &r[1] ); 2578 STORE( &r[0], 0, chan_index ); 2579 } 2580 break; 2581 2582 case TGSI_OPCODE_OR: 2583 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2584 FETCH( &r[0], 0, chan_index ); 2585 FETCH( &r[1], 1, chan_index ); 2586 micro_or( &r[0], &r[0], &r[1] ); 2587 STORE( &r[0], 0, chan_index ); 2588 } 2589 break; 2590 2591 case TGSI_OPCODE_MOD: 2592 assert (0); 2593 break; 2594 2595 case TGSI_OPCODE_XOR: 2596 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2597 FETCH( &r[0], 0, chan_index ); 2598 FETCH( &r[1], 1, chan_index ); 2599 micro_xor( &r[0], &r[0], &r[1] ); 2600 STORE( &r[0], 0, chan_index ); 2601 } 2602 break; 2603 2604 case TGSI_OPCODE_SAD: 2605 assert (0); 2606 break; 2607 2608 case TGSI_OPCODE_TXF: 2609 assert (0); 2610 break; 2611 2612 case TGSI_OPCODE_TXQ: 2613 assert (0); 2614 break; 2615 2616 case TGSI_OPCODE_EMIT: 2617 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 2618 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2619 break; 2620 2621 case TGSI_OPCODE_ENDPRIM: 2622 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 2623 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 2624 break; 2625 2626 case TGSI_OPCODE_LOOP: 2627 /* fall-through (for now) */ 2628 case TGSI_OPCODE_BGNLOOP2: 2629 /* push LoopMask and ContMasks */ 2630 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2631 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2632 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2633 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2634 break; 2635 2636 case TGSI_OPCODE_ENDLOOP: 2637 /* fall-through (for now at least) */ 2638 case TGSI_OPCODE_ENDLOOP2: 2639 /* Restore ContMask, but don't pop */ 2640 assert(mach->ContStackTop > 0); 2641 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 2642 UPDATE_EXEC_MASK(mach); 2643 if (mach->ExecMask) { 2644 /* repeat loop: jump to instruction just past BGNLOOP */ 2645 *pc = inst->InstructionExtLabel.Label + 1; 2646 } 2647 else { 2648 /* exit loop: pop LoopMask */ 2649 assert(mach->LoopStackTop > 0); 2650 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2651 /* pop ContMask */ 2652 assert(mach->ContStackTop > 0); 2653 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2654 } 2655 UPDATE_EXEC_MASK(mach); 2656 break; 2657 2658 case TGSI_OPCODE_BRK: 2659 /* turn off loop channels for each enabled exec channel */ 2660 mach->LoopMask &= ~mach->ExecMask; 2661 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2662 UPDATE_EXEC_MASK(mach); 2663 break; 2664 2665 case TGSI_OPCODE_CONT: 2666 /* turn off cont channels for each enabled exec channel */ 2667 mach->ContMask &= ~mach->ExecMask; 2668 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2669 UPDATE_EXEC_MASK(mach); 2670 break; 2671 2672 case TGSI_OPCODE_BGNSUB: 2673 /* no-op */ 2674 break; 2675 2676 case TGSI_OPCODE_ENDSUB: 2677 /* no-op */ 2678 break; 2679 2680 case TGSI_OPCODE_NOISE1: 2681 assert( 0 ); 2682 break; 2683 2684 case TGSI_OPCODE_NOISE2: 2685 assert( 0 ); 2686 break; 2687 2688 case TGSI_OPCODE_NOISE3: 2689 assert( 0 ); 2690 break; 2691 2692 case TGSI_OPCODE_NOISE4: 2693 assert( 0 ); 2694 break; 2695 2696 case TGSI_OPCODE_NOP: 2697 break; 2698 2699 default: 2700 assert( 0 ); 2701 } 2702} 2703 2704 2705/** 2706 * Run TGSI interpreter. 2707 * \return bitmask of "alive" quad components 2708 */ 2709uint 2710tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 2711{ 2712 uint i; 2713 int pc = 0; 2714 2715 mach->CondMask = 0xf; 2716 mach->LoopMask = 0xf; 2717 mach->ContMask = 0xf; 2718 mach->FuncMask = 0xf; 2719 mach->ExecMask = 0xf; 2720 2721 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 2722 assert(mach->CondStackTop == 0); 2723 assert(mach->LoopStackTop == 0); 2724 assert(mach->ContStackTop == 0); 2725 assert(mach->CallStackTop == 0); 2726 2727 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 2728 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 2729 2730 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 2731 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 2732 mach->Primitives[0] = 0; 2733 } 2734 2735 for (i = 0; i < QUAD_SIZE; i++) { 2736 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 2737 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 2738 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 2739 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 2740 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 2741 } 2742 2743 /* execute declarations (interpolants) */ 2744 for (i = 0; i < mach->NumDeclarations; i++) { 2745 exec_declaration( mach, mach->Declarations+i ); 2746 } 2747 2748 /* execute instructions, until pc is set to -1 */ 2749 while (pc != -1) { 2750 assert(pc < (int) mach->NumInstructions); 2751 exec_instruction( mach, mach->Instructions + pc, &pc ); 2752 } 2753 2754#if 0 2755 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 2756 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 2757 /* 2758 * Scale back depth component. 2759 */ 2760 for (i = 0; i < 4; i++) 2761 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 2762 } 2763#endif 2764 2765 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 2766} 2767 2768 2769