tgsi_exec.c revision dd55083ac1c13723dba6be71f161e2ca7cac7c66
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_parse.h" 57#include "tgsi/tgsi_util.h" 58#include "tgsi_exec.h" 59#include "util/u_memory.h" 60#include "util/u_math.h" 61 62#define FAST_MATH 1 63 64#define TILE_TOP_LEFT 0 65#define TILE_TOP_RIGHT 1 66#define TILE_BOTTOM_LEFT 2 67#define TILE_BOTTOM_RIGHT 3 68 69#define CHAN_X 0 70#define CHAN_Y 1 71#define CHAN_Z 2 72#define CHAN_W 3 73 74/* 75 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 76 */ 77#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 78#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 79#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 80#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 81#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 82#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 83#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 84#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 85#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 86#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 87#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 88#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 89#define TEMP_128_I TGSI_EXEC_TEMP_128_I 90#define TEMP_128_C TGSI_EXEC_TEMP_128_C 91#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 92#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 93#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 94#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 95#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 96#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 97#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 98#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 99#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 100#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 101#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 102#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 103#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 104#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 105#define TEMP_R0 TGSI_EXEC_TEMP_R0 106 107#define IS_CHANNEL_ENABLED(INST, CHAN)\ 108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 109 110#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 112 113#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 115 if (IS_CHANNEL_ENABLED( INST, CHAN )) 116 117#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 119 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 120 121 122/** The execution mask depends on the conditional mask and the loop mask */ 123#define UPDATE_EXEC_MASK(MACH) \ 124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 125 126/** 127 * Initialize machine state by expanding tokens to full instructions, 128 * allocating temporary storage, setting up constants, etc. 129 * After this, we can call tgsi_exec_machine_run() many times. 130 */ 131void 132tgsi_exec_machine_bind_shader( 133 struct tgsi_exec_machine *mach, 134 const struct tgsi_token *tokens, 135 uint numSamplers, 136 struct tgsi_sampler **samplers) 137{ 138 uint k; 139 struct tgsi_parse_context parse; 140 struct tgsi_exec_labels *labels = &mach->Labels; 141 struct tgsi_full_instruction *instructions; 142 struct tgsi_full_declaration *declarations; 143 uint maxInstructions = 10, numInstructions = 0; 144 uint maxDeclarations = 10, numDeclarations = 0; 145 uint instno = 0; 146 147#if 0 148 tgsi_dump(tokens, 0); 149#endif 150 151 util_init_math(); 152 153 mach->Tokens = tokens; 154 mach->Samplers = samplers; 155 156 k = tgsi_parse_init (&parse, mach->Tokens); 157 if (k != TGSI_PARSE_OK) { 158 debug_printf( "Problem parsing!\n" ); 159 return; 160 } 161 162 mach->Processor = parse.FullHeader.Processor.Processor; 163 mach->ImmLimit = 0; 164 labels->count = 0; 165 166 declarations = (struct tgsi_full_declaration *) 167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 168 169 if (!declarations) { 170 return; 171 } 172 173 instructions = (struct tgsi_full_instruction *) 174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 175 176 if (!instructions) { 177 FREE( declarations ); 178 return; 179 } 180 181 while( !tgsi_parse_end_of_tokens( &parse ) ) { 182 uint pointer = parse.Position; 183 uint i; 184 185 tgsi_parse_token( &parse ); 186 switch( parse.FullToken.Token.Type ) { 187 case TGSI_TOKEN_TYPE_DECLARATION: 188 /* save expanded declaration */ 189 if (numDeclarations == maxDeclarations) { 190 declarations = REALLOC(declarations, 191 maxDeclarations 192 * sizeof(struct tgsi_full_declaration), 193 (maxDeclarations + 10) 194 * sizeof(struct tgsi_full_declaration)); 195 maxDeclarations += 10; 196 } 197 memcpy(declarations + numDeclarations, 198 &parse.FullToken.FullDeclaration, 199 sizeof(declarations[0])); 200 numDeclarations++; 201 break; 202 203 case TGSI_TOKEN_TYPE_IMMEDIATE: 204 { 205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; 206 assert( size % 4 == 0 ); 207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); 208 209 for( i = 0; i < size; i++ ) { 210 mach->Imms[mach->ImmLimit + i / 4][i % 4] = 211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 212 } 213 mach->ImmLimit += size / 4; 214 } 215 break; 216 217 case TGSI_TOKEN_TYPE_INSTRUCTION: 218 assert( labels->count < MAX_LABELS ); 219 220 labels->labels[labels->count][0] = instno; 221 labels->labels[labels->count][1] = pointer; 222 labels->count++; 223 224 /* save expanded instruction */ 225 if (numInstructions == maxInstructions) { 226 instructions = REALLOC(instructions, 227 maxInstructions 228 * sizeof(struct tgsi_full_instruction), 229 (maxInstructions + 10) 230 * sizeof(struct tgsi_full_instruction)); 231 maxInstructions += 10; 232 } 233 memcpy(instructions + numInstructions, 234 &parse.FullToken.FullInstruction, 235 sizeof(instructions[0])); 236 numInstructions++; 237 break; 238 239 default: 240 assert( 0 ); 241 } 242 } 243 tgsi_parse_free (&parse); 244 245 if (mach->Declarations) { 246 FREE( mach->Declarations ); 247 } 248 mach->Declarations = declarations; 249 mach->NumDeclarations = numDeclarations; 250 251 if (mach->Instructions) { 252 FREE( mach->Instructions ); 253 } 254 mach->Instructions = instructions; 255 mach->NumInstructions = numInstructions; 256} 257 258 259void 260tgsi_exec_machine_init( 261 struct tgsi_exec_machine *mach ) 262{ 263 uint i; 264 265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); 266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 267 268 /* Setup constants. */ 269 for( i = 0; i < 4; i++ ) { 270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 280 } 281} 282 283 284void 285tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) 286{ 287 if (mach->Instructions) { 288 FREE(mach->Instructions); 289 mach->Instructions = NULL; 290 mach->NumInstructions = 0; 291 } 292 if (mach->Declarations) { 293 FREE(mach->Declarations); 294 mach->Declarations = NULL; 295 mach->NumDeclarations = 0; 296 } 297} 298 299 300static void 301micro_abs( 302 union tgsi_exec_channel *dst, 303 const union tgsi_exec_channel *src ) 304{ 305 dst->f[0] = fabsf( src->f[0] ); 306 dst->f[1] = fabsf( src->f[1] ); 307 dst->f[2] = fabsf( src->f[2] ); 308 dst->f[3] = fabsf( src->f[3] ); 309} 310 311static void 312micro_add( 313 union tgsi_exec_channel *dst, 314 const union tgsi_exec_channel *src0, 315 const union tgsi_exec_channel *src1 ) 316{ 317 dst->f[0] = src0->f[0] + src1->f[0]; 318 dst->f[1] = src0->f[1] + src1->f[1]; 319 dst->f[2] = src0->f[2] + src1->f[2]; 320 dst->f[3] = src0->f[3] + src1->f[3]; 321} 322 323static void 324micro_iadd( 325 union tgsi_exec_channel *dst, 326 const union tgsi_exec_channel *src0, 327 const union tgsi_exec_channel *src1 ) 328{ 329 dst->i[0] = src0->i[0] + src1->i[0]; 330 dst->i[1] = src0->i[1] + src1->i[1]; 331 dst->i[2] = src0->i[2] + src1->i[2]; 332 dst->i[3] = src0->i[3] + src1->i[3]; 333} 334 335static void 336micro_and( 337 union tgsi_exec_channel *dst, 338 const union tgsi_exec_channel *src0, 339 const union tgsi_exec_channel *src1 ) 340{ 341 dst->u[0] = src0->u[0] & src1->u[0]; 342 dst->u[1] = src0->u[1] & src1->u[1]; 343 dst->u[2] = src0->u[2] & src1->u[2]; 344 dst->u[3] = src0->u[3] & src1->u[3]; 345} 346 347static void 348micro_ceil( 349 union tgsi_exec_channel *dst, 350 const union tgsi_exec_channel *src ) 351{ 352 dst->f[0] = ceilf( src->f[0] ); 353 dst->f[1] = ceilf( src->f[1] ); 354 dst->f[2] = ceilf( src->f[2] ); 355 dst->f[3] = ceilf( src->f[3] ); 356} 357 358static void 359micro_cos( 360 union tgsi_exec_channel *dst, 361 const union tgsi_exec_channel *src ) 362{ 363 dst->f[0] = cosf( src->f[0] ); 364 dst->f[1] = cosf( src->f[1] ); 365 dst->f[2] = cosf( src->f[2] ); 366 dst->f[3] = cosf( src->f[3] ); 367} 368 369static void 370micro_ddx( 371 union tgsi_exec_channel *dst, 372 const union tgsi_exec_channel *src ) 373{ 374 dst->f[0] = 375 dst->f[1] = 376 dst->f[2] = 377 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 378} 379 380static void 381micro_ddy( 382 union tgsi_exec_channel *dst, 383 const union tgsi_exec_channel *src ) 384{ 385 dst->f[0] = 386 dst->f[1] = 387 dst->f[2] = 388 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 389} 390 391static void 392micro_div( 393 union tgsi_exec_channel *dst, 394 const union tgsi_exec_channel *src0, 395 const union tgsi_exec_channel *src1 ) 396{ 397 if (src1->f[0] != 0) { 398 dst->f[0] = src0->f[0] / src1->f[0]; 399 } 400 if (src1->f[1] != 0) { 401 dst->f[1] = src0->f[1] / src1->f[1]; 402 } 403 if (src1->f[2] != 0) { 404 dst->f[2] = src0->f[2] / src1->f[2]; 405 } 406 if (src1->f[3] != 0) { 407 dst->f[3] = src0->f[3] / src1->f[3]; 408 } 409} 410 411static void 412micro_udiv( 413 union tgsi_exec_channel *dst, 414 const union tgsi_exec_channel *src0, 415 const union tgsi_exec_channel *src1 ) 416{ 417 dst->u[0] = src0->u[0] / src1->u[0]; 418 dst->u[1] = src0->u[1] / src1->u[1]; 419 dst->u[2] = src0->u[2] / src1->u[2]; 420 dst->u[3] = src0->u[3] / src1->u[3]; 421} 422 423static void 424micro_eq( 425 union tgsi_exec_channel *dst, 426 const union tgsi_exec_channel *src0, 427 const union tgsi_exec_channel *src1, 428 const union tgsi_exec_channel *src2, 429 const union tgsi_exec_channel *src3 ) 430{ 431 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 432 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 433 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 434 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 435} 436 437static void 438micro_ieq( 439 union tgsi_exec_channel *dst, 440 const union tgsi_exec_channel *src0, 441 const union tgsi_exec_channel *src1, 442 const union tgsi_exec_channel *src2, 443 const union tgsi_exec_channel *src3 ) 444{ 445 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 446 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 447 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 448 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 449} 450 451static void 452micro_exp2( 453 union tgsi_exec_channel *dst, 454 const union tgsi_exec_channel *src) 455{ 456#if FAST_MATH 457 dst->f[0] = util_fast_exp2( src->f[0] ); 458 dst->f[1] = util_fast_exp2( src->f[1] ); 459 dst->f[2] = util_fast_exp2( src->f[2] ); 460 dst->f[3] = util_fast_exp2( src->f[3] ); 461#else 462 dst->f[0] = powf( 2.0f, src->f[0] ); 463 dst->f[1] = powf( 2.0f, src->f[1] ); 464 dst->f[2] = powf( 2.0f, src->f[2] ); 465 dst->f[3] = powf( 2.0f, src->f[3] ); 466#endif 467} 468 469static void 470micro_f2ut( 471 union tgsi_exec_channel *dst, 472 const union tgsi_exec_channel *src ) 473{ 474 dst->u[0] = (uint) src->f[0]; 475 dst->u[1] = (uint) src->f[1]; 476 dst->u[2] = (uint) src->f[2]; 477 dst->u[3] = (uint) src->f[3]; 478} 479 480static void 481micro_flr( 482 union tgsi_exec_channel *dst, 483 const union tgsi_exec_channel *src ) 484{ 485 dst->f[0] = floorf( src->f[0] ); 486 dst->f[1] = floorf( src->f[1] ); 487 dst->f[2] = floorf( src->f[2] ); 488 dst->f[3] = floorf( src->f[3] ); 489} 490 491static void 492micro_frc( 493 union tgsi_exec_channel *dst, 494 const union tgsi_exec_channel *src ) 495{ 496 dst->f[0] = src->f[0] - floorf( src->f[0] ); 497 dst->f[1] = src->f[1] - floorf( src->f[1] ); 498 dst->f[2] = src->f[2] - floorf( src->f[2] ); 499 dst->f[3] = src->f[3] - floorf( src->f[3] ); 500} 501 502static void 503micro_ge( 504 union tgsi_exec_channel *dst, 505 const union tgsi_exec_channel *src0, 506 const union tgsi_exec_channel *src1, 507 const union tgsi_exec_channel *src2, 508 const union tgsi_exec_channel *src3 ) 509{ 510 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; 511 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; 512 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; 513 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; 514} 515 516static void 517micro_i2f( 518 union tgsi_exec_channel *dst, 519 const union tgsi_exec_channel *src ) 520{ 521 dst->f[0] = (float) src->i[0]; 522 dst->f[1] = (float) src->i[1]; 523 dst->f[2] = (float) src->i[2]; 524 dst->f[3] = (float) src->i[3]; 525} 526 527static void 528micro_lg2( 529 union tgsi_exec_channel *dst, 530 const union tgsi_exec_channel *src ) 531{ 532#if FAST_MATH 533 dst->f[0] = util_fast_log2( src->f[0] ); 534 dst->f[1] = util_fast_log2( src->f[1] ); 535 dst->f[2] = util_fast_log2( src->f[2] ); 536 dst->f[3] = util_fast_log2( src->f[3] ); 537#else 538 dst->f[0] = logf( src->f[0] ) * 1.442695f; 539 dst->f[1] = logf( src->f[1] ) * 1.442695f; 540 dst->f[2] = logf( src->f[2] ) * 1.442695f; 541 dst->f[3] = logf( src->f[3] ) * 1.442695f; 542#endif 543} 544 545static void 546micro_le( 547 union tgsi_exec_channel *dst, 548 const union tgsi_exec_channel *src0, 549 const union tgsi_exec_channel *src1, 550 const union tgsi_exec_channel *src2, 551 const union tgsi_exec_channel *src3 ) 552{ 553 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 554 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 555 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 556 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 557} 558 559static void 560micro_lt( 561 union tgsi_exec_channel *dst, 562 const union tgsi_exec_channel *src0, 563 const union tgsi_exec_channel *src1, 564 const union tgsi_exec_channel *src2, 565 const union tgsi_exec_channel *src3 ) 566{ 567 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 568 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 569 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 570 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 571} 572 573static void 574micro_ilt( 575 union tgsi_exec_channel *dst, 576 const union tgsi_exec_channel *src0, 577 const union tgsi_exec_channel *src1, 578 const union tgsi_exec_channel *src2, 579 const union tgsi_exec_channel *src3 ) 580{ 581 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 582 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 583 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 584 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 585} 586 587static void 588micro_ult( 589 union tgsi_exec_channel *dst, 590 const union tgsi_exec_channel *src0, 591 const union tgsi_exec_channel *src1, 592 const union tgsi_exec_channel *src2, 593 const union tgsi_exec_channel *src3 ) 594{ 595 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 596 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 597 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 598 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 599} 600 601static void 602micro_max( 603 union tgsi_exec_channel *dst, 604 const union tgsi_exec_channel *src0, 605 const union tgsi_exec_channel *src1 ) 606{ 607 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 608 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 609 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 610 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 611} 612 613static void 614micro_imax( 615 union tgsi_exec_channel *dst, 616 const union tgsi_exec_channel *src0, 617 const union tgsi_exec_channel *src1 ) 618{ 619 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 620 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 621 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 622 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 623} 624 625static void 626micro_umax( 627 union tgsi_exec_channel *dst, 628 const union tgsi_exec_channel *src0, 629 const union tgsi_exec_channel *src1 ) 630{ 631 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 632 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 633 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 634 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 635} 636 637static void 638micro_min( 639 union tgsi_exec_channel *dst, 640 const union tgsi_exec_channel *src0, 641 const union tgsi_exec_channel *src1 ) 642{ 643 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 644 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 645 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 646 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 647} 648 649static void 650micro_imin( 651 union tgsi_exec_channel *dst, 652 const union tgsi_exec_channel *src0, 653 const union tgsi_exec_channel *src1 ) 654{ 655 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 656 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 657 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 658 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 659} 660 661static void 662micro_umin( 663 union tgsi_exec_channel *dst, 664 const union tgsi_exec_channel *src0, 665 const union tgsi_exec_channel *src1 ) 666{ 667 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 668 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 669 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 670 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 671} 672 673static void 674micro_umod( 675 union tgsi_exec_channel *dst, 676 const union tgsi_exec_channel *src0, 677 const union tgsi_exec_channel *src1 ) 678{ 679 dst->u[0] = src0->u[0] % src1->u[0]; 680 dst->u[1] = src0->u[1] % src1->u[1]; 681 dst->u[2] = src0->u[2] % src1->u[2]; 682 dst->u[3] = src0->u[3] % src1->u[3]; 683} 684 685static void 686micro_mul( 687 union tgsi_exec_channel *dst, 688 const union tgsi_exec_channel *src0, 689 const union tgsi_exec_channel *src1 ) 690{ 691 dst->f[0] = src0->f[0] * src1->f[0]; 692 dst->f[1] = src0->f[1] * src1->f[1]; 693 dst->f[2] = src0->f[2] * src1->f[2]; 694 dst->f[3] = src0->f[3] * src1->f[3]; 695} 696 697static void 698micro_imul( 699 union tgsi_exec_channel *dst, 700 const union tgsi_exec_channel *src0, 701 const union tgsi_exec_channel *src1 ) 702{ 703 dst->i[0] = src0->i[0] * src1->i[0]; 704 dst->i[1] = src0->i[1] * src1->i[1]; 705 dst->i[2] = src0->i[2] * src1->i[2]; 706 dst->i[3] = src0->i[3] * src1->i[3]; 707} 708 709static void 710micro_imul64( 711 union tgsi_exec_channel *dst0, 712 union tgsi_exec_channel *dst1, 713 const union tgsi_exec_channel *src0, 714 const union tgsi_exec_channel *src1 ) 715{ 716 dst1->i[0] = src0->i[0] * src1->i[0]; 717 dst1->i[1] = src0->i[1] * src1->i[1]; 718 dst1->i[2] = src0->i[2] * src1->i[2]; 719 dst1->i[3] = src0->i[3] * src1->i[3]; 720 dst0->i[0] = 0; 721 dst0->i[1] = 0; 722 dst0->i[2] = 0; 723 dst0->i[3] = 0; 724} 725 726static void 727micro_umul64( 728 union tgsi_exec_channel *dst0, 729 union tgsi_exec_channel *dst1, 730 const union tgsi_exec_channel *src0, 731 const union tgsi_exec_channel *src1 ) 732{ 733 dst1->u[0] = src0->u[0] * src1->u[0]; 734 dst1->u[1] = src0->u[1] * src1->u[1]; 735 dst1->u[2] = src0->u[2] * src1->u[2]; 736 dst1->u[3] = src0->u[3] * src1->u[3]; 737 dst0->u[0] = 0; 738 dst0->u[1] = 0; 739 dst0->u[2] = 0; 740 dst0->u[3] = 0; 741} 742 743static void 744micro_movc( 745 union tgsi_exec_channel *dst, 746 const union tgsi_exec_channel *src0, 747 const union tgsi_exec_channel *src1, 748 const union tgsi_exec_channel *src2 ) 749{ 750 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 751 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 752 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 753 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 754} 755 756static void 757micro_neg( 758 union tgsi_exec_channel *dst, 759 const union tgsi_exec_channel *src ) 760{ 761 dst->f[0] = -src->f[0]; 762 dst->f[1] = -src->f[1]; 763 dst->f[2] = -src->f[2]; 764 dst->f[3] = -src->f[3]; 765} 766 767static void 768micro_ineg( 769 union tgsi_exec_channel *dst, 770 const union tgsi_exec_channel *src ) 771{ 772 dst->i[0] = -src->i[0]; 773 dst->i[1] = -src->i[1]; 774 dst->i[2] = -src->i[2]; 775 dst->i[3] = -src->i[3]; 776} 777 778static void 779micro_not( 780 union tgsi_exec_channel *dst, 781 const union tgsi_exec_channel *src ) 782{ 783 dst->u[0] = ~src->u[0]; 784 dst->u[1] = ~src->u[1]; 785 dst->u[2] = ~src->u[2]; 786 dst->u[3] = ~src->u[3]; 787} 788 789static void 790micro_or( 791 union tgsi_exec_channel *dst, 792 const union tgsi_exec_channel *src0, 793 const union tgsi_exec_channel *src1 ) 794{ 795 dst->u[0] = src0->u[0] | src1->u[0]; 796 dst->u[1] = src0->u[1] | src1->u[1]; 797 dst->u[2] = src0->u[2] | src1->u[2]; 798 dst->u[3] = src0->u[3] | src1->u[3]; 799} 800 801static void 802micro_pow( 803 union tgsi_exec_channel *dst, 804 const union tgsi_exec_channel *src0, 805 const union tgsi_exec_channel *src1 ) 806{ 807#if FAST_MATH 808 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 809 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 810 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 811 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 812#else 813 dst->f[0] = powf( src0->f[0], src1->f[0] ); 814 dst->f[1] = powf( src0->f[1], src1->f[1] ); 815 dst->f[2] = powf( src0->f[2], src1->f[2] ); 816 dst->f[3] = powf( src0->f[3], src1->f[3] ); 817#endif 818} 819 820static void 821micro_rnd( 822 union tgsi_exec_channel *dst, 823 const union tgsi_exec_channel *src ) 824{ 825 dst->f[0] = floorf( src->f[0] + 0.5f ); 826 dst->f[1] = floorf( src->f[1] + 0.5f ); 827 dst->f[2] = floorf( src->f[2] + 0.5f ); 828 dst->f[3] = floorf( src->f[3] + 0.5f ); 829} 830 831static void 832micro_sgn( 833 union tgsi_exec_channel *dst, 834 const union tgsi_exec_channel *src ) 835{ 836 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 837 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 838 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 839 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 840} 841 842static void 843micro_shl( 844 union tgsi_exec_channel *dst, 845 const union tgsi_exec_channel *src0, 846 const union tgsi_exec_channel *src1 ) 847{ 848 dst->i[0] = src0->i[0] << src1->i[0]; 849 dst->i[1] = src0->i[1] << src1->i[1]; 850 dst->i[2] = src0->i[2] << src1->i[2]; 851 dst->i[3] = src0->i[3] << src1->i[3]; 852} 853 854static void 855micro_ishr( 856 union tgsi_exec_channel *dst, 857 const union tgsi_exec_channel *src0, 858 const union tgsi_exec_channel *src1 ) 859{ 860 dst->i[0] = src0->i[0] >> src1->i[0]; 861 dst->i[1] = src0->i[1] >> src1->i[1]; 862 dst->i[2] = src0->i[2] >> src1->i[2]; 863 dst->i[3] = src0->i[3] >> src1->i[3]; 864} 865 866static void 867micro_trunc( 868 union tgsi_exec_channel *dst, 869 const union tgsi_exec_channel *src0 ) 870{ 871 dst->f[0] = (float) (int) src0->f[0]; 872 dst->f[1] = (float) (int) src0->f[1]; 873 dst->f[2] = (float) (int) src0->f[2]; 874 dst->f[3] = (float) (int) src0->f[3]; 875} 876 877static void 878micro_ushr( 879 union tgsi_exec_channel *dst, 880 const union tgsi_exec_channel *src0, 881 const union tgsi_exec_channel *src1 ) 882{ 883 dst->u[0] = src0->u[0] >> src1->u[0]; 884 dst->u[1] = src0->u[1] >> src1->u[1]; 885 dst->u[2] = src0->u[2] >> src1->u[2]; 886 dst->u[3] = src0->u[3] >> src1->u[3]; 887} 888 889static void 890micro_sin( 891 union tgsi_exec_channel *dst, 892 const union tgsi_exec_channel *src ) 893{ 894 dst->f[0] = sinf( src->f[0] ); 895 dst->f[1] = sinf( src->f[1] ); 896 dst->f[2] = sinf( src->f[2] ); 897 dst->f[3] = sinf( src->f[3] ); 898} 899 900static void 901micro_sqrt( union tgsi_exec_channel *dst, 902 const union tgsi_exec_channel *src ) 903{ 904 dst->f[0] = sqrtf( src->f[0] ); 905 dst->f[1] = sqrtf( src->f[1] ); 906 dst->f[2] = sqrtf( src->f[2] ); 907 dst->f[3] = sqrtf( src->f[3] ); 908} 909 910static void 911micro_sub( 912 union tgsi_exec_channel *dst, 913 const union tgsi_exec_channel *src0, 914 const union tgsi_exec_channel *src1 ) 915{ 916 dst->f[0] = src0->f[0] - src1->f[0]; 917 dst->f[1] = src0->f[1] - src1->f[1]; 918 dst->f[2] = src0->f[2] - src1->f[2]; 919 dst->f[3] = src0->f[3] - src1->f[3]; 920} 921 922static void 923micro_u2f( 924 union tgsi_exec_channel *dst, 925 const union tgsi_exec_channel *src ) 926{ 927 dst->f[0] = (float) src->u[0]; 928 dst->f[1] = (float) src->u[1]; 929 dst->f[2] = (float) src->u[2]; 930 dst->f[3] = (float) src->u[3]; 931} 932 933static void 934micro_xor( 935 union tgsi_exec_channel *dst, 936 const union tgsi_exec_channel *src0, 937 const union tgsi_exec_channel *src1 ) 938{ 939 dst->u[0] = src0->u[0] ^ src1->u[0]; 940 dst->u[1] = src0->u[1] ^ src1->u[1]; 941 dst->u[2] = src0->u[2] ^ src1->u[2]; 942 dst->u[3] = src0->u[3] ^ src1->u[3]; 943} 944 945static void 946fetch_src_file_channel( 947 const struct tgsi_exec_machine *mach, 948 const uint file, 949 const uint swizzle, 950 const union tgsi_exec_channel *index, 951 union tgsi_exec_channel *chan ) 952{ 953 switch( swizzle ) { 954 case TGSI_EXTSWIZZLE_X: 955 case TGSI_EXTSWIZZLE_Y: 956 case TGSI_EXTSWIZZLE_Z: 957 case TGSI_EXTSWIZZLE_W: 958 switch( file ) { 959 case TGSI_FILE_CONSTANT: 960 assert(mach->Consts); 961 if (index->i[0] < 0) 962 chan->f[0] = 0.0f; 963 else 964 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 965 if (index->i[1] < 0) 966 chan->f[1] = 0.0f; 967 else 968 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 969 if (index->i[2] < 0) 970 chan->f[2] = 0.0f; 971 else 972 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 973 if (index->i[3] < 0) 974 chan->f[3] = 0.0f; 975 else 976 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 977 break; 978 979 case TGSI_FILE_INPUT: 980 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 981 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 982 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 983 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 984 break; 985 986 case TGSI_FILE_TEMPORARY: 987 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 988 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 989 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 990 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 991 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 992 break; 993 994 case TGSI_FILE_IMMEDIATE: 995 assert( index->i[0] < (int) mach->ImmLimit ); 996 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 997 assert( index->i[1] < (int) mach->ImmLimit ); 998 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 999 assert( index->i[2] < (int) mach->ImmLimit ); 1000 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1001 assert( index->i[3] < (int) mach->ImmLimit ); 1002 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1003 break; 1004 1005 case TGSI_FILE_ADDRESS: 1006 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1007 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1008 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1009 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1010 break; 1011 1012 case TGSI_FILE_OUTPUT: 1013 /* vertex/fragment output vars can be read too */ 1014 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1015 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1016 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1017 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1018 break; 1019 1020 default: 1021 assert( 0 ); 1022 } 1023 break; 1024 1025 case TGSI_EXTSWIZZLE_ZERO: 1026 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1027 break; 1028 1029 case TGSI_EXTSWIZZLE_ONE: 1030 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1031 break; 1032 1033 default: 1034 assert( 0 ); 1035 } 1036} 1037 1038static void 1039fetch_source( 1040 const struct tgsi_exec_machine *mach, 1041 union tgsi_exec_channel *chan, 1042 const struct tgsi_full_src_register *reg, 1043 const uint chan_index ) 1044{ 1045 union tgsi_exec_channel index; 1046 uint swizzle; 1047 1048 /* We start with a direct index into a register file. 1049 * 1050 * file[1], 1051 * where: 1052 * file = SrcRegister.File 1053 * [1] = SrcRegister.Index 1054 */ 1055 index.i[0] = 1056 index.i[1] = 1057 index.i[2] = 1058 index.i[3] = reg->SrcRegister.Index; 1059 1060 /* There is an extra source register that indirectly subscripts 1061 * a register file. The direct index now becomes an offset 1062 * that is being added to the indirect register. 1063 * 1064 * file[ind[2].x+1], 1065 * where: 1066 * ind = SrcRegisterInd.File 1067 * [2] = SrcRegisterInd.Index 1068 * .x = SrcRegisterInd.SwizzleX 1069 */ 1070 if (reg->SrcRegister.Indirect) { 1071 union tgsi_exec_channel index2; 1072 union tgsi_exec_channel indir_index; 1073 const uint execmask = mach->ExecMask; 1074 uint i; 1075 1076 /* which address register (always zero now) */ 1077 index2.i[0] = 1078 index2.i[1] = 1079 index2.i[2] = 1080 index2.i[3] = reg->SrcRegisterInd.Index; 1081 1082 /* get current value of address register[swizzle] */ 1083 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1084 fetch_src_file_channel( 1085 mach, 1086 reg->SrcRegisterInd.File, 1087 swizzle, 1088 &index2, 1089 &indir_index ); 1090 1091 /* add value of address register to the offset */ 1092 index.i[0] += (int) indir_index.f[0]; 1093 index.i[1] += (int) indir_index.f[1]; 1094 index.i[2] += (int) indir_index.f[2]; 1095 index.i[3] += (int) indir_index.f[3]; 1096 1097 /* for disabled execution channels, zero-out the index to 1098 * avoid using a potential garbage value. 1099 */ 1100 for (i = 0; i < QUAD_SIZE; i++) { 1101 if ((execmask & (1 << i)) == 0) 1102 index.i[i] = 0; 1103 } 1104 } 1105 1106 /* There is an extra source register that is a second 1107 * subscript to a register file. Effectively it means that 1108 * the register file is actually a 2D array of registers. 1109 * 1110 * file[1][3] == file[1*sizeof(file[1])+3], 1111 * where: 1112 * [3] = SrcRegisterDim.Index 1113 */ 1114 if (reg->SrcRegister.Dimension) { 1115 /* The size of the first-order array depends on the register file type. 1116 * We need to multiply the index to the first array to get an effective, 1117 * "flat" index that points to the beginning of the second-order array. 1118 */ 1119 switch (reg->SrcRegister.File) { 1120 case TGSI_FILE_INPUT: 1121 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1122 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1123 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1124 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1125 break; 1126 case TGSI_FILE_CONSTANT: 1127 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1128 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1129 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1130 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1131 break; 1132 default: 1133 assert( 0 ); 1134 } 1135 1136 index.i[0] += reg->SrcRegisterDim.Index; 1137 index.i[1] += reg->SrcRegisterDim.Index; 1138 index.i[2] += reg->SrcRegisterDim.Index; 1139 index.i[3] += reg->SrcRegisterDim.Index; 1140 1141 /* Again, the second subscript index can be addressed indirectly 1142 * identically to the first one. 1143 * Nothing stops us from indirectly addressing the indirect register, 1144 * but there is no need for that, so we won't exercise it. 1145 * 1146 * file[1][ind[4].y+3], 1147 * where: 1148 * ind = SrcRegisterDimInd.File 1149 * [4] = SrcRegisterDimInd.Index 1150 * .y = SrcRegisterDimInd.SwizzleX 1151 */ 1152 if (reg->SrcRegisterDim.Indirect) { 1153 union tgsi_exec_channel index2; 1154 union tgsi_exec_channel indir_index; 1155 const uint execmask = mach->ExecMask; 1156 uint i; 1157 1158 index2.i[0] = 1159 index2.i[1] = 1160 index2.i[2] = 1161 index2.i[3] = reg->SrcRegisterDimInd.Index; 1162 1163 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1164 fetch_src_file_channel( 1165 mach, 1166 reg->SrcRegisterDimInd.File, 1167 swizzle, 1168 &index2, 1169 &indir_index ); 1170 1171 index.i[0] += (int) indir_index.f[0]; 1172 index.i[1] += (int) indir_index.f[1]; 1173 index.i[2] += (int) indir_index.f[2]; 1174 index.i[3] += (int) indir_index.f[3]; 1175 1176 /* for disabled execution channels, zero-out the index to 1177 * avoid using a potential garbage value. 1178 */ 1179 for (i = 0; i < QUAD_SIZE; i++) { 1180 if ((execmask & (1 << i)) == 0) 1181 index.i[i] = 0; 1182 } 1183 } 1184 1185 /* If by any chance there was a need for a 3D array of register 1186 * files, we would have to check whether SrcRegisterDim is followed 1187 * by a dimension register and continue the saga. 1188 */ 1189 } 1190 1191 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1192 fetch_src_file_channel( 1193 mach, 1194 reg->SrcRegister.File, 1195 swizzle, 1196 &index, 1197 chan ); 1198 1199 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1200 case TGSI_UTIL_SIGN_CLEAR: 1201 micro_abs( chan, chan ); 1202 break; 1203 1204 case TGSI_UTIL_SIGN_SET: 1205 micro_abs( chan, chan ); 1206 micro_neg( chan, chan ); 1207 break; 1208 1209 case TGSI_UTIL_SIGN_TOGGLE: 1210 micro_neg( chan, chan ); 1211 break; 1212 1213 case TGSI_UTIL_SIGN_KEEP: 1214 break; 1215 } 1216 1217 if (reg->SrcRegisterExtMod.Complement) { 1218 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1219 } 1220} 1221 1222static void 1223store_dest( 1224 struct tgsi_exec_machine *mach, 1225 const union tgsi_exec_channel *chan, 1226 const struct tgsi_full_dst_register *reg, 1227 const struct tgsi_full_instruction *inst, 1228 uint chan_index ) 1229{ 1230 uint i; 1231 union tgsi_exec_channel null; 1232 union tgsi_exec_channel *dst; 1233 uint execmask = mach->ExecMask; 1234 1235 switch (reg->DstRegister.File) { 1236 case TGSI_FILE_NULL: 1237 dst = &null; 1238 break; 1239 1240 case TGSI_FILE_OUTPUT: 1241 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1242 + reg->DstRegister.Index].xyzw[chan_index]; 1243 break; 1244 1245 case TGSI_FILE_TEMPORARY: 1246 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); 1247 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 1248 break; 1249 1250 case TGSI_FILE_ADDRESS: 1251 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 1252 break; 1253 1254 default: 1255 assert( 0 ); 1256 return; 1257 } 1258 1259 if (inst->InstructionExtNv.CondFlowEnable) { 1260 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1261 uint swizzle; 1262 uint shift; 1263 uint mask; 1264 uint test; 1265 1266 /* Only CC0 supported. 1267 */ 1268 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1269 1270 switch (chan_index) { 1271 case CHAN_X: 1272 swizzle = inst->InstructionExtNv.CondSwizzleX; 1273 break; 1274 case CHAN_Y: 1275 swizzle = inst->InstructionExtNv.CondSwizzleY; 1276 break; 1277 case CHAN_Z: 1278 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1279 break; 1280 case CHAN_W: 1281 swizzle = inst->InstructionExtNv.CondSwizzleW; 1282 break; 1283 default: 1284 assert( 0 ); 1285 return; 1286 } 1287 1288 switch (swizzle) { 1289 case TGSI_SWIZZLE_X: 1290 shift = TGSI_EXEC_CC_X_SHIFT; 1291 mask = TGSI_EXEC_CC_X_MASK; 1292 break; 1293 case TGSI_SWIZZLE_Y: 1294 shift = TGSI_EXEC_CC_Y_SHIFT; 1295 mask = TGSI_EXEC_CC_Y_MASK; 1296 break; 1297 case TGSI_SWIZZLE_Z: 1298 shift = TGSI_EXEC_CC_Z_SHIFT; 1299 mask = TGSI_EXEC_CC_Z_MASK; 1300 break; 1301 case TGSI_SWIZZLE_W: 1302 shift = TGSI_EXEC_CC_W_SHIFT; 1303 mask = TGSI_EXEC_CC_W_MASK; 1304 break; 1305 default: 1306 assert( 0 ); 1307 return; 1308 } 1309 1310 switch (inst->InstructionExtNv.CondMask) { 1311 case TGSI_CC_GT: 1312 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1313 for (i = 0; i < QUAD_SIZE; i++) 1314 if (cc->u[i] & test) 1315 execmask &= ~(1 << i); 1316 break; 1317 1318 case TGSI_CC_EQ: 1319 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1320 for (i = 0; i < QUAD_SIZE; i++) 1321 if (cc->u[i] & test) 1322 execmask &= ~(1 << i); 1323 break; 1324 1325 case TGSI_CC_LT: 1326 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1327 for (i = 0; i < QUAD_SIZE; i++) 1328 if (cc->u[i] & test) 1329 execmask &= ~(1 << i); 1330 break; 1331 1332 case TGSI_CC_GE: 1333 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1334 for (i = 0; i < QUAD_SIZE; i++) 1335 if (cc->u[i] & test) 1336 execmask &= ~(1 << i); 1337 break; 1338 1339 case TGSI_CC_LE: 1340 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1341 for (i = 0; i < QUAD_SIZE; i++) 1342 if (cc->u[i] & test) 1343 execmask &= ~(1 << i); 1344 break; 1345 1346 case TGSI_CC_NE: 1347 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1348 for (i = 0; i < QUAD_SIZE; i++) 1349 if (cc->u[i] & test) 1350 execmask &= ~(1 << i); 1351 break; 1352 1353 case TGSI_CC_TR: 1354 break; 1355 1356 case TGSI_CC_FL: 1357 for (i = 0; i < QUAD_SIZE; i++) 1358 execmask &= ~(1 << i); 1359 break; 1360 1361 default: 1362 assert( 0 ); 1363 return; 1364 } 1365 } 1366 1367 switch (inst->Instruction.Saturate) { 1368 case TGSI_SAT_NONE: 1369 for (i = 0; i < QUAD_SIZE; i++) 1370 if (execmask & (1 << i)) 1371 dst->i[i] = chan->i[i]; 1372 break; 1373 1374 case TGSI_SAT_ZERO_ONE: 1375 for (i = 0; i < QUAD_SIZE; i++) 1376 if (execmask & (1 << i)) { 1377 if (chan->f[i] < 0.0f) 1378 dst->f[i] = 0.0f; 1379 else if (chan->f[i] > 1.0f) 1380 dst->f[i] = 1.0f; 1381 else 1382 dst->i[i] = chan->i[i]; 1383 } 1384 break; 1385 1386 case TGSI_SAT_MINUS_PLUS_ONE: 1387 for (i = 0; i < QUAD_SIZE; i++) 1388 if (execmask & (1 << i)) { 1389 if (chan->f[i] < -1.0f) 1390 dst->f[i] = -1.0f; 1391 else if (chan->f[i] > 1.0f) 1392 dst->f[i] = 1.0f; 1393 else 1394 dst->i[i] = chan->i[i]; 1395 } 1396 break; 1397 1398 default: 1399 assert( 0 ); 1400 } 1401 1402 if (inst->InstructionExtNv.CondDstUpdate) { 1403 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1404 uint shift; 1405 uint mask; 1406 1407 /* Only CC0 supported. 1408 */ 1409 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1410 1411 switch (chan_index) { 1412 case CHAN_X: 1413 shift = TGSI_EXEC_CC_X_SHIFT; 1414 mask = ~TGSI_EXEC_CC_X_MASK; 1415 break; 1416 case CHAN_Y: 1417 shift = TGSI_EXEC_CC_Y_SHIFT; 1418 mask = ~TGSI_EXEC_CC_Y_MASK; 1419 break; 1420 case CHAN_Z: 1421 shift = TGSI_EXEC_CC_Z_SHIFT; 1422 mask = ~TGSI_EXEC_CC_Z_MASK; 1423 break; 1424 case CHAN_W: 1425 shift = TGSI_EXEC_CC_W_SHIFT; 1426 mask = ~TGSI_EXEC_CC_W_MASK; 1427 break; 1428 default: 1429 assert( 0 ); 1430 return; 1431 } 1432 1433 for (i = 0; i < QUAD_SIZE; i++) 1434 if (execmask & (1 << i)) { 1435 cc->u[i] &= mask; 1436 if (dst->f[i] < 0.0f) 1437 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1438 else if (dst->f[i] > 0.0f) 1439 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1440 else if (dst->f[i] == 0.0f) 1441 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1442 else 1443 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1444 } 1445 } 1446} 1447 1448#define FETCH(VAL,INDEX,CHAN)\ 1449 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1450 1451#define STORE(VAL,INDEX,CHAN)\ 1452 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1453 1454 1455/** 1456 * Execute ARB-style KIL which is predicated by a src register. 1457 * Kill fragment if any of the four values is less than zero. 1458 */ 1459static void 1460exec_kil(struct tgsi_exec_machine *mach, 1461 const struct tgsi_full_instruction *inst) 1462{ 1463 uint uniquemask; 1464 uint chan_index; 1465 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1466 union tgsi_exec_channel r[1]; 1467 1468 /* This mask stores component bits that were already tested. Note that 1469 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1470 * tested. */ 1471 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1472 1473 for (chan_index = 0; chan_index < 4; chan_index++) 1474 { 1475 uint swizzle; 1476 uint i; 1477 1478 /* unswizzle channel */ 1479 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1480 &inst->FullSrcRegisters[0], 1481 chan_index); 1482 1483 /* check if the component has not been already tested */ 1484 if (uniquemask & (1 << swizzle)) 1485 continue; 1486 uniquemask |= 1 << swizzle; 1487 1488 FETCH(&r[0], 0, chan_index); 1489 for (i = 0; i < 4; i++) 1490 if (r[0].f[i] < 0.0f) 1491 kilmask |= 1 << i; 1492 } 1493 1494 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1495} 1496 1497/** 1498 * Execute NVIDIA-style KIL which is predicated by a condition code. 1499 * Kill fragment if the condition code is TRUE. 1500 */ 1501static void 1502exec_kilp(struct tgsi_exec_machine *mach, 1503 const struct tgsi_full_instruction *inst) 1504{ 1505 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1506 1507 if (inst->InstructionExtNv.CondFlowEnable) { 1508 uint swizzle[4]; 1509 uint chan_index; 1510 1511 kilmask = 0x0; 1512 1513 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1514 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1515 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1516 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1517 1518 for (chan_index = 0; chan_index < 4; chan_index++) 1519 { 1520 uint i; 1521 1522 for (i = 0; i < 4; i++) { 1523 /* TODO: evaluate the condition code */ 1524 if (0) 1525 kilmask |= 1 << i; 1526 } 1527 } 1528 } 1529 else { 1530 /* "unconditional" kil */ 1531 kilmask = mach->ExecMask; 1532 } 1533 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1534} 1535 1536 1537/* 1538 * Fetch a four texture samples using STR texture coordinates. 1539 */ 1540static void 1541fetch_texel( struct tgsi_sampler *sampler, 1542 const union tgsi_exec_channel *s, 1543 const union tgsi_exec_channel *t, 1544 const union tgsi_exec_channel *p, 1545 float lodbias, /* XXX should be float[4] */ 1546 union tgsi_exec_channel *r, 1547 union tgsi_exec_channel *g, 1548 union tgsi_exec_channel *b, 1549 union tgsi_exec_channel *a ) 1550{ 1551 uint j; 1552 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1553 1554 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1555 1556 for (j = 0; j < 4; j++) { 1557 r->f[j] = rgba[0][j]; 1558 g->f[j] = rgba[1][j]; 1559 b->f[j] = rgba[2][j]; 1560 a->f[j] = rgba[3][j]; 1561 } 1562} 1563 1564 1565static void 1566exec_tex(struct tgsi_exec_machine *mach, 1567 const struct tgsi_full_instruction *inst, 1568 boolean biasLod, 1569 boolean projected) 1570{ 1571 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1572 union tgsi_exec_channel r[4]; 1573 uint chan_index; 1574 float lodBias; 1575 1576 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1577 1578 switch (inst->InstructionExtTexture.Texture) { 1579 case TGSI_TEXTURE_1D: 1580 1581 FETCH(&r[0], 0, CHAN_X); 1582 1583 if (projected) { 1584 FETCH(&r[1], 0, CHAN_W); 1585 micro_div( &r[0], &r[0], &r[1] ); 1586 } 1587 1588 if (biasLod) { 1589 FETCH(&r[1], 0, CHAN_W); 1590 lodBias = r[2].f[0]; 1591 } 1592 else 1593 lodBias = 0.0; 1594 1595 fetch_texel(mach->Samplers[unit], 1596 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ 1597 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1598 break; 1599 1600 case TGSI_TEXTURE_2D: 1601 case TGSI_TEXTURE_RECT: 1602 1603 FETCH(&r[0], 0, CHAN_X); 1604 FETCH(&r[1], 0, CHAN_Y); 1605 FETCH(&r[2], 0, CHAN_Z); 1606 1607 if (projected) { 1608 FETCH(&r[3], 0, CHAN_W); 1609 micro_div( &r[0], &r[0], &r[3] ); 1610 micro_div( &r[1], &r[1], &r[3] ); 1611 micro_div( &r[2], &r[2], &r[3] ); 1612 } 1613 1614 if (biasLod) { 1615 FETCH(&r[3], 0, CHAN_W); 1616 lodBias = r[3].f[0]; 1617 } 1618 else 1619 lodBias = 0.0; 1620 1621 fetch_texel(mach->Samplers[unit], 1622 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1623 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1624 break; 1625 1626 case TGSI_TEXTURE_3D: 1627 case TGSI_TEXTURE_CUBE: 1628 1629 FETCH(&r[0], 0, CHAN_X); 1630 FETCH(&r[1], 0, CHAN_Y); 1631 FETCH(&r[2], 0, CHAN_Z); 1632 1633 if (projected) { 1634 FETCH(&r[3], 0, CHAN_W); 1635 micro_div( &r[0], &r[0], &r[3] ); 1636 micro_div( &r[1], &r[1], &r[3] ); 1637 micro_div( &r[2], &r[2], &r[3] ); 1638 } 1639 1640 if (biasLod) { 1641 FETCH(&r[3], 0, CHAN_W); 1642 lodBias = r[3].f[0]; 1643 } 1644 else 1645 lodBias = 0.0; 1646 1647 fetch_texel(mach->Samplers[unit], 1648 &r[0], &r[1], &r[2], lodBias, 1649 &r[0], &r[1], &r[2], &r[3]); 1650 break; 1651 1652 default: 1653 assert (0); 1654 } 1655 1656 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1657 STORE( &r[chan_index], 0, chan_index ); 1658 } 1659} 1660 1661 1662/** 1663 * Evaluate a constant-valued coefficient at the position of the 1664 * current quad. 1665 */ 1666static void 1667eval_constant_coef( 1668 struct tgsi_exec_machine *mach, 1669 unsigned attrib, 1670 unsigned chan ) 1671{ 1672 unsigned i; 1673 1674 for( i = 0; i < QUAD_SIZE; i++ ) { 1675 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1676 } 1677} 1678 1679/** 1680 * Evaluate a linear-valued coefficient at the position of the 1681 * current quad. 1682 */ 1683static void 1684eval_linear_coef( 1685 struct tgsi_exec_machine *mach, 1686 unsigned attrib, 1687 unsigned chan ) 1688{ 1689 const float x = mach->QuadPos.xyzw[0].f[0]; 1690 const float y = mach->QuadPos.xyzw[1].f[0]; 1691 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1692 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1693 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1694 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1695 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1696 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1697 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1698} 1699 1700/** 1701 * Evaluate a perspective-valued coefficient at the position of the 1702 * current quad. 1703 */ 1704static void 1705eval_perspective_coef( 1706 struct tgsi_exec_machine *mach, 1707 unsigned attrib, 1708 unsigned chan ) 1709{ 1710 const float x = mach->QuadPos.xyzw[0].f[0]; 1711 const float y = mach->QuadPos.xyzw[1].f[0]; 1712 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1713 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1714 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1715 const float *w = mach->QuadPos.xyzw[3].f; 1716 /* divide by W here */ 1717 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1718 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1719 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1720 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1721} 1722 1723 1724typedef void (* eval_coef_func)( 1725 struct tgsi_exec_machine *mach, 1726 unsigned attrib, 1727 unsigned chan ); 1728 1729static void 1730exec_declaration( 1731 struct tgsi_exec_machine *mach, 1732 const struct tgsi_full_declaration *decl ) 1733{ 1734 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1735 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1736 unsigned first, last, mask; 1737 eval_coef_func eval; 1738 1739 first = decl->DeclarationRange.First; 1740 last = decl->DeclarationRange.Last; 1741 mask = decl->Declaration.UsageMask; 1742 1743 switch( decl->Declaration.Interpolate ) { 1744 case TGSI_INTERPOLATE_CONSTANT: 1745 eval = eval_constant_coef; 1746 break; 1747 1748 case TGSI_INTERPOLATE_LINEAR: 1749 eval = eval_linear_coef; 1750 break; 1751 1752 case TGSI_INTERPOLATE_PERSPECTIVE: 1753 eval = eval_perspective_coef; 1754 break; 1755 1756 default: 1757 eval = NULL; 1758 assert( 0 ); 1759 } 1760 1761 if( mask == TGSI_WRITEMASK_XYZW ) { 1762 unsigned i, j; 1763 1764 for( i = first; i <= last; i++ ) { 1765 for( j = 0; j < NUM_CHANNELS; j++ ) { 1766 eval( mach, i, j ); 1767 } 1768 } 1769 } 1770 else { 1771 unsigned i, j; 1772 1773 for( j = 0; j < NUM_CHANNELS; j++ ) { 1774 if( mask & (1 << j) ) { 1775 for( i = first; i <= last; i++ ) { 1776 eval( mach, i, j ); 1777 } 1778 } 1779 } 1780 } 1781 } 1782 } 1783} 1784 1785static void 1786exec_instruction( 1787 struct tgsi_exec_machine *mach, 1788 const struct tgsi_full_instruction *inst, 1789 int *pc ) 1790{ 1791 uint chan_index; 1792 union tgsi_exec_channel r[8]; 1793 1794 (*pc)++; 1795 1796 switch (inst->Instruction.Opcode) { 1797 case TGSI_OPCODE_ARL: 1798 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1799 FETCH( &r[0], 0, chan_index ); 1800 micro_trunc( &r[0], &r[0] ); 1801 STORE( &r[0], 0, chan_index ); 1802 } 1803 break; 1804 1805 case TGSI_OPCODE_MOV: 1806 case TGSI_OPCODE_SWZ: 1807 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1808 FETCH( &r[0], 0, chan_index ); 1809 STORE( &r[0], 0, chan_index ); 1810 } 1811 break; 1812 1813 case TGSI_OPCODE_LIT: 1814 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1815 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1816 } 1817 1818 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1819 FETCH( &r[0], 0, CHAN_X ); 1820 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1821 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1822 STORE( &r[0], 0, CHAN_Y ); 1823 } 1824 1825 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1826 FETCH( &r[1], 0, CHAN_Y ); 1827 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1828 1829 FETCH( &r[2], 0, CHAN_W ); 1830 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1831 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1832 micro_pow( &r[1], &r[1], &r[2] ); 1833 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1834 STORE( &r[0], 0, CHAN_Z ); 1835 } 1836 } 1837 1838 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1839 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1840 } 1841 break; 1842 1843 case TGSI_OPCODE_RCP: 1844 /* TGSI_OPCODE_RECIP */ 1845 FETCH( &r[0], 0, CHAN_X ); 1846 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1847 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1848 STORE( &r[0], 0, chan_index ); 1849 } 1850 break; 1851 1852 case TGSI_OPCODE_RSQ: 1853 /* TGSI_OPCODE_RECIPSQRT */ 1854 FETCH( &r[0], 0, CHAN_X ); 1855 micro_sqrt( &r[0], &r[0] ); 1856 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1857 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1858 STORE( &r[0], 0, chan_index ); 1859 } 1860 break; 1861 1862 case TGSI_OPCODE_EXP: 1863 FETCH( &r[0], 0, CHAN_X ); 1864 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1865 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1866 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1867 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1868 } 1869 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1870 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1871 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1872 } 1873 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1874 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1875 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1876 } 1877 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1878 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1879 } 1880 break; 1881 1882 case TGSI_OPCODE_LOG: 1883 FETCH( &r[0], 0, CHAN_X ); 1884 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1885 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1886 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1887 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1888 STORE( &r[0], 0, CHAN_X ); 1889 } 1890 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1891 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1892 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 1893 STORE( &r[0], 0, CHAN_Y ); 1894 } 1895 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1896 STORE( &r[1], 0, CHAN_Z ); 1897 } 1898 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1899 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1900 } 1901 break; 1902 1903 case TGSI_OPCODE_MUL: 1904 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1905 { 1906 FETCH(&r[0], 0, chan_index); 1907 FETCH(&r[1], 1, chan_index); 1908 1909 micro_mul( &r[0], &r[0], &r[1] ); 1910 1911 STORE(&r[0], 0, chan_index); 1912 } 1913 break; 1914 1915 case TGSI_OPCODE_ADD: 1916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1917 FETCH( &r[0], 0, chan_index ); 1918 FETCH( &r[1], 1, chan_index ); 1919 micro_add( &r[0], &r[0], &r[1] ); 1920 STORE( &r[0], 0, chan_index ); 1921 } 1922 break; 1923 1924 case TGSI_OPCODE_DP3: 1925 /* TGSI_OPCODE_DOT3 */ 1926 FETCH( &r[0], 0, CHAN_X ); 1927 FETCH( &r[1], 1, CHAN_X ); 1928 micro_mul( &r[0], &r[0], &r[1] ); 1929 1930 FETCH( &r[1], 0, CHAN_Y ); 1931 FETCH( &r[2], 1, CHAN_Y ); 1932 micro_mul( &r[1], &r[1], &r[2] ); 1933 micro_add( &r[0], &r[0], &r[1] ); 1934 1935 FETCH( &r[1], 0, CHAN_Z ); 1936 FETCH( &r[2], 1, CHAN_Z ); 1937 micro_mul( &r[1], &r[1], &r[2] ); 1938 micro_add( &r[0], &r[0], &r[1] ); 1939 1940 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1941 STORE( &r[0], 0, chan_index ); 1942 } 1943 break; 1944 1945 case TGSI_OPCODE_DP4: 1946 /* TGSI_OPCODE_DOT4 */ 1947 FETCH(&r[0], 0, CHAN_X); 1948 FETCH(&r[1], 1, CHAN_X); 1949 1950 micro_mul( &r[0], &r[0], &r[1] ); 1951 1952 FETCH(&r[1], 0, CHAN_Y); 1953 FETCH(&r[2], 1, CHAN_Y); 1954 1955 micro_mul( &r[1], &r[1], &r[2] ); 1956 micro_add( &r[0], &r[0], &r[1] ); 1957 1958 FETCH(&r[1], 0, CHAN_Z); 1959 FETCH(&r[2], 1, CHAN_Z); 1960 1961 micro_mul( &r[1], &r[1], &r[2] ); 1962 micro_add( &r[0], &r[0], &r[1] ); 1963 1964 FETCH(&r[1], 0, CHAN_W); 1965 FETCH(&r[2], 1, CHAN_W); 1966 1967 micro_mul( &r[1], &r[1], &r[2] ); 1968 micro_add( &r[0], &r[0], &r[1] ); 1969 1970 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1971 STORE( &r[0], 0, chan_index ); 1972 } 1973 break; 1974 1975 case TGSI_OPCODE_DST: 1976 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1977 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1978 } 1979 1980 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1981 FETCH( &r[0], 0, CHAN_Y ); 1982 FETCH( &r[1], 1, CHAN_Y); 1983 micro_mul( &r[0], &r[0], &r[1] ); 1984 STORE( &r[0], 0, CHAN_Y ); 1985 } 1986 1987 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1988 FETCH( &r[0], 0, CHAN_Z ); 1989 STORE( &r[0], 0, CHAN_Z ); 1990 } 1991 1992 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1993 FETCH( &r[0], 1, CHAN_W ); 1994 STORE( &r[0], 0, CHAN_W ); 1995 } 1996 break; 1997 1998 case TGSI_OPCODE_MIN: 1999 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2000 FETCH(&r[0], 0, chan_index); 2001 FETCH(&r[1], 1, chan_index); 2002 2003 /* XXX use micro_min()?? */ 2004 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2005 2006 STORE(&r[0], 0, chan_index); 2007 } 2008 break; 2009 2010 case TGSI_OPCODE_MAX: 2011 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2012 FETCH(&r[0], 0, chan_index); 2013 FETCH(&r[1], 1, chan_index); 2014 2015 /* XXX use micro_max()?? */ 2016 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2017 2018 STORE(&r[0], 0, chan_index ); 2019 } 2020 break; 2021 2022 case TGSI_OPCODE_SLT: 2023 /* TGSI_OPCODE_SETLT */ 2024 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2025 FETCH( &r[0], 0, chan_index ); 2026 FETCH( &r[1], 1, chan_index ); 2027 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2028 STORE( &r[0], 0, chan_index ); 2029 } 2030 break; 2031 2032 case TGSI_OPCODE_SGE: 2033 /* TGSI_OPCODE_SETGE */ 2034 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2035 FETCH( &r[0], 0, chan_index ); 2036 FETCH( &r[1], 1, chan_index ); 2037 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2038 STORE( &r[0], 0, chan_index ); 2039 } 2040 break; 2041 2042 case TGSI_OPCODE_MAD: 2043 /* TGSI_OPCODE_MADD */ 2044 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2045 FETCH( &r[0], 0, chan_index ); 2046 FETCH( &r[1], 1, chan_index ); 2047 micro_mul( &r[0], &r[0], &r[1] ); 2048 FETCH( &r[1], 2, chan_index ); 2049 micro_add( &r[0], &r[0], &r[1] ); 2050 STORE( &r[0], 0, chan_index ); 2051 } 2052 break; 2053 2054 case TGSI_OPCODE_SUB: 2055 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2056 FETCH(&r[0], 0, chan_index); 2057 FETCH(&r[1], 1, chan_index); 2058 2059 micro_sub( &r[0], &r[0], &r[1] ); 2060 2061 STORE(&r[0], 0, chan_index); 2062 } 2063 break; 2064 2065 case TGSI_OPCODE_LERP: 2066 /* TGSI_OPCODE_LRP */ 2067 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2068 FETCH(&r[0], 0, chan_index); 2069 FETCH(&r[1], 1, chan_index); 2070 FETCH(&r[2], 2, chan_index); 2071 2072 micro_sub( &r[1], &r[1], &r[2] ); 2073 micro_mul( &r[0], &r[0], &r[1] ); 2074 micro_add( &r[0], &r[0], &r[2] ); 2075 2076 STORE(&r[0], 0, chan_index); 2077 } 2078 break; 2079 2080 case TGSI_OPCODE_CND: 2081 assert (0); 2082 break; 2083 2084 case TGSI_OPCODE_CND0: 2085 assert (0); 2086 break; 2087 2088 case TGSI_OPCODE_DOT2ADD: 2089 /* TGSI_OPCODE_DP2A */ 2090 FETCH( &r[0], 0, CHAN_X ); 2091 FETCH( &r[1], 1, CHAN_X ); 2092 micro_mul( &r[0], &r[0], &r[1] ); 2093 2094 FETCH( &r[1], 0, CHAN_Y ); 2095 FETCH( &r[2], 1, CHAN_Y ); 2096 micro_mul( &r[1], &r[1], &r[2] ); 2097 micro_add( &r[0], &r[0], &r[1] ); 2098 2099 FETCH( &r[2], 2, CHAN_X ); 2100 micro_add( &r[0], &r[0], &r[2] ); 2101 2102 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2103 STORE( &r[0], 0, chan_index ); 2104 } 2105 break; 2106 2107 case TGSI_OPCODE_INDEX: 2108 assert (0); 2109 break; 2110 2111 case TGSI_OPCODE_NEGATE: 2112 assert (0); 2113 break; 2114 2115 case TGSI_OPCODE_FRAC: 2116 /* TGSI_OPCODE_FRC */ 2117 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2118 FETCH( &r[0], 0, chan_index ); 2119 micro_frc( &r[0], &r[0] ); 2120 STORE( &r[0], 0, chan_index ); 2121 } 2122 break; 2123 2124 case TGSI_OPCODE_CLAMP: 2125 assert (0); 2126 break; 2127 2128 case TGSI_OPCODE_FLOOR: 2129 /* TGSI_OPCODE_FLR */ 2130 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2131 FETCH( &r[0], 0, chan_index ); 2132 micro_flr( &r[0], &r[0] ); 2133 STORE( &r[0], 0, chan_index ); 2134 } 2135 break; 2136 2137 case TGSI_OPCODE_ROUND: 2138 case TGSI_OPCODE_ARR: 2139 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2140 FETCH( &r[0], 0, chan_index ); 2141 micro_rnd( &r[0], &r[0] ); 2142 STORE( &r[0], 0, chan_index ); 2143 } 2144 break; 2145 2146 case TGSI_OPCODE_EXPBASE2: 2147 /* TGSI_OPCODE_EX2 */ 2148 FETCH(&r[0], 0, CHAN_X); 2149 2150#if FAST_MATH 2151 micro_exp2( &r[0], &r[0] ); 2152#else 2153 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2154#endif 2155 2156 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2157 STORE( &r[0], 0, chan_index ); 2158 } 2159 break; 2160 2161 case TGSI_OPCODE_LOGBASE2: 2162 /* TGSI_OPCODE_LG2 */ 2163 FETCH( &r[0], 0, CHAN_X ); 2164 micro_lg2( &r[0], &r[0] ); 2165 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2166 STORE( &r[0], 0, chan_index ); 2167 } 2168 break; 2169 2170 case TGSI_OPCODE_POWER: 2171 /* TGSI_OPCODE_POW */ 2172 FETCH(&r[0], 0, CHAN_X); 2173 FETCH(&r[1], 1, CHAN_X); 2174 2175 micro_pow( &r[0], &r[0], &r[1] ); 2176 2177 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2178 STORE( &r[0], 0, chan_index ); 2179 } 2180 break; 2181 2182 case TGSI_OPCODE_CROSSPRODUCT: 2183 /* TGSI_OPCODE_XPD */ 2184 FETCH(&r[0], 0, CHAN_Y); 2185 FETCH(&r[1], 1, CHAN_Z); 2186 2187 micro_mul( &r[2], &r[0], &r[1] ); 2188 2189 FETCH(&r[3], 0, CHAN_Z); 2190 FETCH(&r[4], 1, CHAN_Y); 2191 2192 micro_mul( &r[5], &r[3], &r[4] ); 2193 micro_sub( &r[2], &r[2], &r[5] ); 2194 2195 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2196 STORE( &r[2], 0, CHAN_X ); 2197 } 2198 2199 FETCH(&r[2], 1, CHAN_X); 2200 2201 micro_mul( &r[3], &r[3], &r[2] ); 2202 2203 FETCH(&r[5], 0, CHAN_X); 2204 2205 micro_mul( &r[1], &r[1], &r[5] ); 2206 micro_sub( &r[3], &r[3], &r[1] ); 2207 2208 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2209 STORE( &r[3], 0, CHAN_Y ); 2210 } 2211 2212 micro_mul( &r[5], &r[5], &r[4] ); 2213 micro_mul( &r[0], &r[0], &r[2] ); 2214 micro_sub( &r[5], &r[5], &r[0] ); 2215 2216 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2217 STORE( &r[5], 0, CHAN_Z ); 2218 } 2219 2220 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2221 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2222 } 2223 break; 2224 2225 case TGSI_OPCODE_MULTIPLYMATRIX: 2226 assert (0); 2227 break; 2228 2229 case TGSI_OPCODE_ABS: 2230 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2231 FETCH(&r[0], 0, chan_index); 2232 2233 micro_abs( &r[0], &r[0] ); 2234 2235 STORE(&r[0], 0, chan_index); 2236 } 2237 break; 2238 2239 case TGSI_OPCODE_RCC: 2240 assert (0); 2241 break; 2242 2243 case TGSI_OPCODE_DPH: 2244 FETCH(&r[0], 0, CHAN_X); 2245 FETCH(&r[1], 1, CHAN_X); 2246 2247 micro_mul( &r[0], &r[0], &r[1] ); 2248 2249 FETCH(&r[1], 0, CHAN_Y); 2250 FETCH(&r[2], 1, CHAN_Y); 2251 2252 micro_mul( &r[1], &r[1], &r[2] ); 2253 micro_add( &r[0], &r[0], &r[1] ); 2254 2255 FETCH(&r[1], 0, CHAN_Z); 2256 FETCH(&r[2], 1, CHAN_Z); 2257 2258 micro_mul( &r[1], &r[1], &r[2] ); 2259 micro_add( &r[0], &r[0], &r[1] ); 2260 2261 FETCH(&r[1], 1, CHAN_W); 2262 2263 micro_add( &r[0], &r[0], &r[1] ); 2264 2265 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2266 STORE( &r[0], 0, chan_index ); 2267 } 2268 break; 2269 2270 case TGSI_OPCODE_COS: 2271 FETCH(&r[0], 0, CHAN_X); 2272 2273 micro_cos( &r[0], &r[0] ); 2274 2275 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2276 STORE( &r[0], 0, chan_index ); 2277 } 2278 break; 2279 2280 case TGSI_OPCODE_DDX: 2281 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2282 FETCH( &r[0], 0, chan_index ); 2283 micro_ddx( &r[0], &r[0] ); 2284 STORE( &r[0], 0, chan_index ); 2285 } 2286 break; 2287 2288 case TGSI_OPCODE_DDY: 2289 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2290 FETCH( &r[0], 0, chan_index ); 2291 micro_ddy( &r[0], &r[0] ); 2292 STORE( &r[0], 0, chan_index ); 2293 } 2294 break; 2295 2296 case TGSI_OPCODE_KILP: 2297 exec_kilp (mach, inst); 2298 break; 2299 2300 case TGSI_OPCODE_KIL: 2301 exec_kil (mach, inst); 2302 break; 2303 2304 case TGSI_OPCODE_PK2H: 2305 assert (0); 2306 break; 2307 2308 case TGSI_OPCODE_PK2US: 2309 assert (0); 2310 break; 2311 2312 case TGSI_OPCODE_PK4B: 2313 assert (0); 2314 break; 2315 2316 case TGSI_OPCODE_PK4UB: 2317 assert (0); 2318 break; 2319 2320 case TGSI_OPCODE_RFL: 2321 assert (0); 2322 break; 2323 2324 case TGSI_OPCODE_SEQ: 2325 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2326 FETCH( &r[0], 0, chan_index ); 2327 FETCH( &r[1], 1, chan_index ); 2328 micro_eq( &r[0], &r[0], &r[1], 2329 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2330 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2331 STORE( &r[0], 0, chan_index ); 2332 } 2333 break; 2334 2335 case TGSI_OPCODE_SFL: 2336 assert (0); 2337 break; 2338 2339 case TGSI_OPCODE_SGT: 2340 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2341 FETCH( &r[0], 0, chan_index ); 2342 FETCH( &r[1], 1, chan_index ); 2343 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2344 STORE( &r[0], 0, chan_index ); 2345 } 2346 break; 2347 2348 case TGSI_OPCODE_SIN: 2349 FETCH( &r[0], 0, CHAN_X ); 2350 micro_sin( &r[0], &r[0] ); 2351 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2352 STORE( &r[0], 0, chan_index ); 2353 } 2354 break; 2355 2356 case TGSI_OPCODE_SLE: 2357 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2358 FETCH( &r[0], 0, chan_index ); 2359 FETCH( &r[1], 1, chan_index ); 2360 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2361 STORE( &r[0], 0, chan_index ); 2362 } 2363 break; 2364 2365 case TGSI_OPCODE_SNE: 2366 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2367 FETCH( &r[0], 0, chan_index ); 2368 FETCH( &r[1], 1, chan_index ); 2369 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2370 STORE( &r[0], 0, chan_index ); 2371 } 2372 break; 2373 2374 case TGSI_OPCODE_STR: 2375 assert (0); 2376 break; 2377 2378 case TGSI_OPCODE_TEX: 2379 /* simple texture lookup */ 2380 /* src[0] = texcoord */ 2381 /* src[1] = sampler unit */ 2382 exec_tex(mach, inst, FALSE, FALSE); 2383 break; 2384 2385 case TGSI_OPCODE_TXB: 2386 /* Texture lookup with lod bias */ 2387 /* src[0] = texcoord (src[0].w = LOD bias) */ 2388 /* src[1] = sampler unit */ 2389 exec_tex(mach, inst, TRUE, FALSE); 2390 break; 2391 2392 case TGSI_OPCODE_TXD: 2393 /* Texture lookup with explict partial derivatives */ 2394 /* src[0] = texcoord */ 2395 /* src[1] = d[strq]/dx */ 2396 /* src[2] = d[strq]/dy */ 2397 /* src[3] = sampler unit */ 2398 assert (0); 2399 break; 2400 2401 case TGSI_OPCODE_TXL: 2402 /* Texture lookup with explit LOD */ 2403 /* src[0] = texcoord (src[0].w = LOD) */ 2404 /* src[1] = sampler unit */ 2405 exec_tex(mach, inst, TRUE, FALSE); 2406 break; 2407 2408 case TGSI_OPCODE_TXP: 2409 /* Texture lookup with projection */ 2410 /* src[0] = texcoord (src[0].w = projection) */ 2411 /* src[1] = sampler unit */ 2412 exec_tex(mach, inst, FALSE, TRUE); 2413 break; 2414 2415 case TGSI_OPCODE_UP2H: 2416 assert (0); 2417 break; 2418 2419 case TGSI_OPCODE_UP2US: 2420 assert (0); 2421 break; 2422 2423 case TGSI_OPCODE_UP4B: 2424 assert (0); 2425 break; 2426 2427 case TGSI_OPCODE_UP4UB: 2428 assert (0); 2429 break; 2430 2431 case TGSI_OPCODE_X2D: 2432 assert (0); 2433 break; 2434 2435 case TGSI_OPCODE_ARA: 2436 assert (0); 2437 break; 2438 2439 case TGSI_OPCODE_BRA: 2440 assert (0); 2441 break; 2442 2443 case TGSI_OPCODE_CAL: 2444 /* skip the call if no execution channels are enabled */ 2445 if (mach->ExecMask) { 2446 /* do the call */ 2447 2448 /* push the Cond, Loop, Cont stacks */ 2449 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2450 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2451 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2452 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2453 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2454 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2455 2456 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2457 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2458 2459 /* note that PC was already incremented above */ 2460 mach->CallStack[mach->CallStackTop++] = *pc; 2461 *pc = inst->InstructionExtLabel.Label; 2462 } 2463 break; 2464 2465 case TGSI_OPCODE_RET: 2466 mach->FuncMask &= ~mach->ExecMask; 2467 UPDATE_EXEC_MASK(mach); 2468 2469 if (mach->FuncMask == 0x0) { 2470 /* really return now (otherwise, keep executing */ 2471 2472 if (mach->CallStackTop == 0) { 2473 /* returning from main() */ 2474 *pc = -1; 2475 return; 2476 } 2477 *pc = mach->CallStack[--mach->CallStackTop]; 2478 2479 /* pop the Cond, Loop, Cont stacks */ 2480 assert(mach->CondStackTop > 0); 2481 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2482 assert(mach->LoopStackTop > 0); 2483 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2484 assert(mach->ContStackTop > 0); 2485 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2486 assert(mach->FuncStackTop > 0); 2487 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2488 2489 UPDATE_EXEC_MASK(mach); 2490 } 2491 break; 2492 2493 case TGSI_OPCODE_SSG: 2494 /* TGSI_OPCODE_SGN */ 2495 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2496 FETCH( &r[0], 0, chan_index ); 2497 micro_sgn( &r[0], &r[0] ); 2498 STORE( &r[0], 0, chan_index ); 2499 } 2500 break; 2501 2502 case TGSI_OPCODE_CMP: 2503 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2504 FETCH(&r[0], 0, chan_index); 2505 FETCH(&r[1], 1, chan_index); 2506 FETCH(&r[2], 2, chan_index); 2507 2508 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2509 2510 STORE(&r[0], 0, chan_index); 2511 } 2512 break; 2513 2514 case TGSI_OPCODE_SCS: 2515 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2516 FETCH( &r[0], 0, CHAN_X ); 2517 } 2518 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { 2519 micro_cos( &r[1], &r[0] ); 2520 STORE( &r[1], 0, CHAN_X ); 2521 } 2522 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2523 micro_sin( &r[1], &r[0] ); 2524 STORE( &r[1], 0, CHAN_Y ); 2525 } 2526 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2527 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2528 } 2529 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2530 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2531 } 2532 break; 2533 2534 case TGSI_OPCODE_NRM: 2535 /* 3-component vector normalize */ 2536 { 2537 union tgsi_exec_channel tmp, dot; 2538 2539 /* tmp = dp3(src0, src0): */ 2540 FETCH( &r[0], 0, CHAN_X ); 2541 micro_mul( &tmp, &r[0], &r[0] ); 2542 2543 FETCH( &r[1], 0, CHAN_Y ); 2544 micro_mul( &dot, &r[1], &r[1] ); 2545 micro_add( &tmp, &tmp, &dot ); 2546 2547 FETCH( &r[2], 0, CHAN_Z ); 2548 micro_mul( &dot, &r[2], &r[2] ); 2549 micro_add( &tmp, &tmp, &dot ); 2550 2551 /* tmp = 1 / sqrt(tmp) */ 2552 micro_sqrt( &tmp, &tmp ); 2553 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2554 2555 /* note: w channel is undefined */ 2556 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2557 /* chan = chan * tmp */ 2558 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2559 STORE( &r[chan_index], 0, chan_index ); 2560 } 2561 } 2562 break; 2563 2564 case TGSI_OPCODE_NRM4: 2565 /* 4-component vector normalize */ 2566 { 2567 union tgsi_exec_channel tmp, dot; 2568 2569 /* tmp = dp4(src0, src0): */ 2570 FETCH( &r[0], 0, CHAN_X ); 2571 micro_mul( &tmp, &r[0], &r[0] ); 2572 2573 FETCH( &r[1], 0, CHAN_Y ); 2574 micro_mul( &dot, &r[1], &r[1] ); 2575 micro_add( &tmp, &tmp, &dot ); 2576 2577 FETCH( &r[2], 0, CHAN_Z ); 2578 micro_mul( &dot, &r[2], &r[2] ); 2579 micro_add( &tmp, &tmp, &dot ); 2580 2581 FETCH( &r[3], 0, CHAN_W ); 2582 micro_mul( &dot, &r[3], &r[3] ); 2583 micro_add( &tmp, &tmp, &dot ); 2584 2585 /* tmp = 1 / sqrt(tmp) */ 2586 micro_sqrt( &tmp, &tmp ); 2587 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2588 2589 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2590 /* chan = chan * tmp */ 2591 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2592 STORE( &r[chan_index], 0, chan_index ); 2593 } 2594 } 2595 break; 2596 2597 case TGSI_OPCODE_DIV: 2598 assert( 0 ); 2599 break; 2600 2601 case TGSI_OPCODE_DP2: 2602 FETCH( &r[0], 0, CHAN_X ); 2603 FETCH( &r[1], 1, CHAN_X ); 2604 micro_mul( &r[0], &r[0], &r[1] ); 2605 2606 FETCH( &r[1], 0, CHAN_Y ); 2607 FETCH( &r[2], 1, CHAN_Y ); 2608 micro_mul( &r[1], &r[1], &r[2] ); 2609 micro_add( &r[0], &r[0], &r[1] ); 2610 2611 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2612 STORE( &r[0], 0, chan_index ); 2613 } 2614 break; 2615 2616 case TGSI_OPCODE_IF: 2617 /* push CondMask */ 2618 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2619 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2620 FETCH( &r[0], 0, CHAN_X ); 2621 /* update CondMask */ 2622 if( ! r[0].u[0] ) { 2623 mach->CondMask &= ~0x1; 2624 } 2625 if( ! r[0].u[1] ) { 2626 mach->CondMask &= ~0x2; 2627 } 2628 if( ! r[0].u[2] ) { 2629 mach->CondMask &= ~0x4; 2630 } 2631 if( ! r[0].u[3] ) { 2632 mach->CondMask &= ~0x8; 2633 } 2634 UPDATE_EXEC_MASK(mach); 2635 /* Todo: If CondMask==0, jump to ELSE */ 2636 break; 2637 2638 case TGSI_OPCODE_ELSE: 2639 /* invert CondMask wrt previous mask */ 2640 { 2641 uint prevMask; 2642 assert(mach->CondStackTop > 0); 2643 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2644 mach->CondMask = ~mach->CondMask & prevMask; 2645 UPDATE_EXEC_MASK(mach); 2646 /* Todo: If CondMask==0, jump to ENDIF */ 2647 } 2648 break; 2649 2650 case TGSI_OPCODE_ENDIF: 2651 /* pop CondMask */ 2652 assert(mach->CondStackTop > 0); 2653 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2654 UPDATE_EXEC_MASK(mach); 2655 break; 2656 2657 case TGSI_OPCODE_END: 2658 /* halt execution */ 2659 *pc = -1; 2660 break; 2661 2662 case TGSI_OPCODE_REP: 2663 assert (0); 2664 break; 2665 2666 case TGSI_OPCODE_ENDREP: 2667 assert (0); 2668 break; 2669 2670 case TGSI_OPCODE_PUSHA: 2671 assert (0); 2672 break; 2673 2674 case TGSI_OPCODE_POPA: 2675 assert (0); 2676 break; 2677 2678 case TGSI_OPCODE_CEIL: 2679 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2680 FETCH( &r[0], 0, chan_index ); 2681 micro_ceil( &r[0], &r[0] ); 2682 STORE( &r[0], 0, chan_index ); 2683 } 2684 break; 2685 2686 case TGSI_OPCODE_I2F: 2687 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2688 FETCH( &r[0], 0, chan_index ); 2689 micro_i2f( &r[0], &r[0] ); 2690 STORE( &r[0], 0, chan_index ); 2691 } 2692 break; 2693 2694 case TGSI_OPCODE_NOT: 2695 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2696 FETCH( &r[0], 0, chan_index ); 2697 micro_not( &r[0], &r[0] ); 2698 STORE( &r[0], 0, chan_index ); 2699 } 2700 break; 2701 2702 case TGSI_OPCODE_TRUNC: 2703 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2704 FETCH( &r[0], 0, chan_index ); 2705 micro_trunc( &r[0], &r[0] ); 2706 STORE( &r[0], 0, chan_index ); 2707 } 2708 break; 2709 2710 case TGSI_OPCODE_SHL: 2711 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2712 FETCH( &r[0], 0, chan_index ); 2713 FETCH( &r[1], 1, chan_index ); 2714 micro_shl( &r[0], &r[0], &r[1] ); 2715 STORE( &r[0], 0, chan_index ); 2716 } 2717 break; 2718 2719 case TGSI_OPCODE_SHR: 2720 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2721 FETCH( &r[0], 0, chan_index ); 2722 FETCH( &r[1], 1, chan_index ); 2723 micro_ishr( &r[0], &r[0], &r[1] ); 2724 STORE( &r[0], 0, chan_index ); 2725 } 2726 break; 2727 2728 case TGSI_OPCODE_AND: 2729 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2730 FETCH( &r[0], 0, chan_index ); 2731 FETCH( &r[1], 1, chan_index ); 2732 micro_and( &r[0], &r[0], &r[1] ); 2733 STORE( &r[0], 0, chan_index ); 2734 } 2735 break; 2736 2737 case TGSI_OPCODE_OR: 2738 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2739 FETCH( &r[0], 0, chan_index ); 2740 FETCH( &r[1], 1, chan_index ); 2741 micro_or( &r[0], &r[0], &r[1] ); 2742 STORE( &r[0], 0, chan_index ); 2743 } 2744 break; 2745 2746 case TGSI_OPCODE_MOD: 2747 assert (0); 2748 break; 2749 2750 case TGSI_OPCODE_XOR: 2751 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2752 FETCH( &r[0], 0, chan_index ); 2753 FETCH( &r[1], 1, chan_index ); 2754 micro_xor( &r[0], &r[0], &r[1] ); 2755 STORE( &r[0], 0, chan_index ); 2756 } 2757 break; 2758 2759 case TGSI_OPCODE_SAD: 2760 assert (0); 2761 break; 2762 2763 case TGSI_OPCODE_TXF: 2764 assert (0); 2765 break; 2766 2767 case TGSI_OPCODE_TXQ: 2768 assert (0); 2769 break; 2770 2771 case TGSI_OPCODE_EMIT: 2772 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 2773 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2774 break; 2775 2776 case TGSI_OPCODE_ENDPRIM: 2777 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 2778 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 2779 break; 2780 2781 case TGSI_OPCODE_LOOP: 2782 /* fall-through (for now) */ 2783 case TGSI_OPCODE_BGNLOOP2: 2784 /* push LoopMask and ContMasks */ 2785 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2786 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2787 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2788 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2789 break; 2790 2791 case TGSI_OPCODE_ENDLOOP: 2792 /* fall-through (for now at least) */ 2793 case TGSI_OPCODE_ENDLOOP2: 2794 /* Restore ContMask, but don't pop */ 2795 assert(mach->ContStackTop > 0); 2796 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 2797 UPDATE_EXEC_MASK(mach); 2798 if (mach->ExecMask) { 2799 /* repeat loop: jump to instruction just past BGNLOOP */ 2800 *pc = inst->InstructionExtLabel.Label + 1; 2801 } 2802 else { 2803 /* exit loop: pop LoopMask */ 2804 assert(mach->LoopStackTop > 0); 2805 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2806 /* pop ContMask */ 2807 assert(mach->ContStackTop > 0); 2808 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2809 } 2810 UPDATE_EXEC_MASK(mach); 2811 break; 2812 2813 case TGSI_OPCODE_BRK: 2814 /* turn off loop channels for each enabled exec channel */ 2815 mach->LoopMask &= ~mach->ExecMask; 2816 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2817 UPDATE_EXEC_MASK(mach); 2818 break; 2819 2820 case TGSI_OPCODE_CONT: 2821 /* turn off cont channels for each enabled exec channel */ 2822 mach->ContMask &= ~mach->ExecMask; 2823 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2824 UPDATE_EXEC_MASK(mach); 2825 break; 2826 2827 case TGSI_OPCODE_BGNSUB: 2828 /* no-op */ 2829 break; 2830 2831 case TGSI_OPCODE_ENDSUB: 2832 /* no-op */ 2833 break; 2834 2835 case TGSI_OPCODE_NOISE1: 2836 assert( 0 ); 2837 break; 2838 2839 case TGSI_OPCODE_NOISE2: 2840 assert( 0 ); 2841 break; 2842 2843 case TGSI_OPCODE_NOISE3: 2844 assert( 0 ); 2845 break; 2846 2847 case TGSI_OPCODE_NOISE4: 2848 assert( 0 ); 2849 break; 2850 2851 case TGSI_OPCODE_NOP: 2852 break; 2853 2854 default: 2855 assert( 0 ); 2856 } 2857} 2858 2859 2860/** 2861 * Run TGSI interpreter. 2862 * \return bitmask of "alive" quad components 2863 */ 2864uint 2865tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 2866{ 2867 uint i; 2868 int pc = 0; 2869 2870 mach->CondMask = 0xf; 2871 mach->LoopMask = 0xf; 2872 mach->ContMask = 0xf; 2873 mach->FuncMask = 0xf; 2874 mach->ExecMask = 0xf; 2875 2876 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 2877 assert(mach->CondStackTop == 0); 2878 assert(mach->LoopStackTop == 0); 2879 assert(mach->ContStackTop == 0); 2880 assert(mach->CallStackTop == 0); 2881 2882 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 2883 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 2884 2885 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 2886 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 2887 mach->Primitives[0] = 0; 2888 } 2889 2890 for (i = 0; i < QUAD_SIZE; i++) { 2891 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 2892 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 2893 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 2894 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 2895 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 2896 } 2897 2898 /* execute declarations (interpolants) */ 2899 for (i = 0; i < mach->NumDeclarations; i++) { 2900 exec_declaration( mach, mach->Declarations+i ); 2901 } 2902 2903 /* execute instructions, until pc is set to -1 */ 2904 while (pc != -1) { 2905 assert(pc < (int) mach->NumInstructions); 2906 exec_instruction( mach, mach->Instructions + pc, &pc ); 2907 } 2908 2909#if 0 2910 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 2911 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 2912 /* 2913 * Scale back depth component. 2914 */ 2915 for (i = 0; i < 4; i++) 2916 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 2917 } 2918#endif 2919 2920 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 2921} 2922 2923 2924