tgsi_exec.c revision 4d710dd3cf3187e94e5765b46e4dd6899a7a41d6
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_parse.h" 57#include "tgsi/tgsi_util.h" 58#include "tgsi_exec.h" 59#include "util/u_memory.h" 60#include "util/u_math.h" 61 62#define FAST_MATH 1 63 64#define TILE_TOP_LEFT 0 65#define TILE_TOP_RIGHT 1 66#define TILE_BOTTOM_LEFT 2 67#define TILE_BOTTOM_RIGHT 3 68 69#define CHAN_X 0 70#define CHAN_Y 1 71#define CHAN_Z 2 72#define CHAN_W 3 73 74/* 75 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 76 */ 77#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 78#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 79#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 80#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 81#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 82#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 83#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 84#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 85#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 86#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 87#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 88#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 89#define TEMP_128_I TGSI_EXEC_TEMP_128_I 90#define TEMP_128_C TGSI_EXEC_TEMP_128_C 91#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 92#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 93#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 94#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 95#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 96#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 97#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 98#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 99#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 100#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 101#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 102#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 103#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 104#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 105#define TEMP_R0 TGSI_EXEC_TEMP_R0 106 107#define IS_CHANNEL_ENABLED(INST, CHAN)\ 108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 109 110#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 112 113#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 115 if (IS_CHANNEL_ENABLED( INST, CHAN )) 116 117#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 119 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 120 121 122/** The execution mask depends on the conditional mask and the loop mask */ 123#define UPDATE_EXEC_MASK(MACH) \ 124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 125 126/** 127 * Initialize machine state by expanding tokens to full instructions, 128 * allocating temporary storage, setting up constants, etc. 129 * After this, we can call tgsi_exec_machine_run() many times. 130 */ 131void 132tgsi_exec_machine_bind_shader( 133 struct tgsi_exec_machine *mach, 134 const struct tgsi_token *tokens, 135 uint numSamplers, 136 struct tgsi_sampler **samplers) 137{ 138 uint k; 139 struct tgsi_parse_context parse; 140 struct tgsi_exec_labels *labels = &mach->Labels; 141 struct tgsi_full_instruction *instructions; 142 struct tgsi_full_declaration *declarations; 143 uint maxInstructions = 10, numInstructions = 0; 144 uint maxDeclarations = 10, numDeclarations = 0; 145 uint instno = 0; 146 147#if 0 148 tgsi_dump(tokens, 0); 149#endif 150 151 util_init_math(); 152 153 mach->Tokens = tokens; 154 mach->Samplers = samplers; 155 156 k = tgsi_parse_init (&parse, mach->Tokens); 157 if (k != TGSI_PARSE_OK) { 158 debug_printf( "Problem parsing!\n" ); 159 return; 160 } 161 162 mach->Processor = parse.FullHeader.Processor.Processor; 163 mach->ImmLimit = 0; 164 labels->count = 0; 165 166 declarations = (struct tgsi_full_declaration *) 167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 168 169 if (!declarations) { 170 return; 171 } 172 173 instructions = (struct tgsi_full_instruction *) 174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 175 176 if (!instructions) { 177 FREE( declarations ); 178 return; 179 } 180 181 while( !tgsi_parse_end_of_tokens( &parse ) ) { 182 uint pointer = parse.Position; 183 uint i; 184 185 tgsi_parse_token( &parse ); 186 switch( parse.FullToken.Token.Type ) { 187 case TGSI_TOKEN_TYPE_DECLARATION: 188 /* save expanded declaration */ 189 if (numDeclarations == maxDeclarations) { 190 declarations = REALLOC(declarations, 191 maxDeclarations 192 * sizeof(struct tgsi_full_declaration), 193 (maxDeclarations + 10) 194 * sizeof(struct tgsi_full_declaration)); 195 maxDeclarations += 10; 196 } 197 memcpy(declarations + numDeclarations, 198 &parse.FullToken.FullDeclaration, 199 sizeof(declarations[0])); 200 numDeclarations++; 201 break; 202 203 case TGSI_TOKEN_TYPE_IMMEDIATE: 204 { 205 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; 206 assert( size % 4 == 0 ); 207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); 208 209 for( i = 0; i < size; i++ ) { 210 mach->Imms[mach->ImmLimit + i / 4][i % 4] = 211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 212 } 213 mach->ImmLimit += size / 4; 214 } 215 break; 216 217 case TGSI_TOKEN_TYPE_INSTRUCTION: 218 assert( labels->count < MAX_LABELS ); 219 220 labels->labels[labels->count][0] = instno; 221 labels->labels[labels->count][1] = pointer; 222 labels->count++; 223 224 /* save expanded instruction */ 225 if (numInstructions == maxInstructions) { 226 instructions = REALLOC(instructions, 227 maxInstructions 228 * sizeof(struct tgsi_full_instruction), 229 (maxInstructions + 10) 230 * sizeof(struct tgsi_full_instruction)); 231 maxInstructions += 10; 232 } 233 memcpy(instructions + numInstructions, 234 &parse.FullToken.FullInstruction, 235 sizeof(instructions[0])); 236 numInstructions++; 237 break; 238 239 default: 240 assert( 0 ); 241 } 242 } 243 tgsi_parse_free (&parse); 244 245 if (mach->Declarations) { 246 FREE( mach->Declarations ); 247 } 248 mach->Declarations = declarations; 249 mach->NumDeclarations = numDeclarations; 250 251 if (mach->Instructions) { 252 FREE( mach->Instructions ); 253 } 254 mach->Instructions = instructions; 255 mach->NumInstructions = numInstructions; 256} 257 258 259void 260tgsi_exec_machine_init( 261 struct tgsi_exec_machine *mach ) 262{ 263 uint i; 264 265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); 266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 267 268 /* Setup constants. */ 269 for( i = 0; i < 4; i++ ) { 270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 280 } 281} 282 283 284void 285tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) 286{ 287 if (mach->Instructions) { 288 FREE(mach->Instructions); 289 mach->Instructions = NULL; 290 mach->NumInstructions = 0; 291 } 292 if (mach->Declarations) { 293 FREE(mach->Declarations); 294 mach->Declarations = NULL; 295 mach->NumDeclarations = 0; 296 } 297} 298 299 300static void 301micro_abs( 302 union tgsi_exec_channel *dst, 303 const union tgsi_exec_channel *src ) 304{ 305 dst->f[0] = fabsf( src->f[0] ); 306 dst->f[1] = fabsf( src->f[1] ); 307 dst->f[2] = fabsf( src->f[2] ); 308 dst->f[3] = fabsf( src->f[3] ); 309} 310 311static void 312micro_add( 313 union tgsi_exec_channel *dst, 314 const union tgsi_exec_channel *src0, 315 const union tgsi_exec_channel *src1 ) 316{ 317 dst->f[0] = src0->f[0] + src1->f[0]; 318 dst->f[1] = src0->f[1] + src1->f[1]; 319 dst->f[2] = src0->f[2] + src1->f[2]; 320 dst->f[3] = src0->f[3] + src1->f[3]; 321} 322 323#if 0 324static void 325micro_iadd( 326 union tgsi_exec_channel *dst, 327 const union tgsi_exec_channel *src0, 328 const union tgsi_exec_channel *src1 ) 329{ 330 dst->i[0] = src0->i[0] + src1->i[0]; 331 dst->i[1] = src0->i[1] + src1->i[1]; 332 dst->i[2] = src0->i[2] + src1->i[2]; 333 dst->i[3] = src0->i[3] + src1->i[3]; 334} 335#endif 336 337static void 338micro_and( 339 union tgsi_exec_channel *dst, 340 const union tgsi_exec_channel *src0, 341 const union tgsi_exec_channel *src1 ) 342{ 343 dst->u[0] = src0->u[0] & src1->u[0]; 344 dst->u[1] = src0->u[1] & src1->u[1]; 345 dst->u[2] = src0->u[2] & src1->u[2]; 346 dst->u[3] = src0->u[3] & src1->u[3]; 347} 348 349static void 350micro_ceil( 351 union tgsi_exec_channel *dst, 352 const union tgsi_exec_channel *src ) 353{ 354 dst->f[0] = ceilf( src->f[0] ); 355 dst->f[1] = ceilf( src->f[1] ); 356 dst->f[2] = ceilf( src->f[2] ); 357 dst->f[3] = ceilf( src->f[3] ); 358} 359 360static void 361micro_cos( 362 union tgsi_exec_channel *dst, 363 const union tgsi_exec_channel *src ) 364{ 365 dst->f[0] = cosf( src->f[0] ); 366 dst->f[1] = cosf( src->f[1] ); 367 dst->f[2] = cosf( src->f[2] ); 368 dst->f[3] = cosf( src->f[3] ); 369} 370 371static void 372micro_ddx( 373 union tgsi_exec_channel *dst, 374 const union tgsi_exec_channel *src ) 375{ 376 dst->f[0] = 377 dst->f[1] = 378 dst->f[2] = 379 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 380} 381 382static void 383micro_ddy( 384 union tgsi_exec_channel *dst, 385 const union tgsi_exec_channel *src ) 386{ 387 dst->f[0] = 388 dst->f[1] = 389 dst->f[2] = 390 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 391} 392 393static void 394micro_div( 395 union tgsi_exec_channel *dst, 396 const union tgsi_exec_channel *src0, 397 const union tgsi_exec_channel *src1 ) 398{ 399 if (src1->f[0] != 0) { 400 dst->f[0] = src0->f[0] / src1->f[0]; 401 } 402 if (src1->f[1] != 0) { 403 dst->f[1] = src0->f[1] / src1->f[1]; 404 } 405 if (src1->f[2] != 0) { 406 dst->f[2] = src0->f[2] / src1->f[2]; 407 } 408 if (src1->f[3] != 0) { 409 dst->f[3] = src0->f[3] / src1->f[3]; 410 } 411} 412 413#if 0 414static void 415micro_udiv( 416 union tgsi_exec_channel *dst, 417 const union tgsi_exec_channel *src0, 418 const union tgsi_exec_channel *src1 ) 419{ 420 dst->u[0] = src0->u[0] / src1->u[0]; 421 dst->u[1] = src0->u[1] / src1->u[1]; 422 dst->u[2] = src0->u[2] / src1->u[2]; 423 dst->u[3] = src0->u[3] / src1->u[3]; 424} 425#endif 426 427static void 428micro_eq( 429 union tgsi_exec_channel *dst, 430 const union tgsi_exec_channel *src0, 431 const union tgsi_exec_channel *src1, 432 const union tgsi_exec_channel *src2, 433 const union tgsi_exec_channel *src3 ) 434{ 435 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 436 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 437 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 438 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 439} 440 441#if 0 442static void 443micro_ieq( 444 union tgsi_exec_channel *dst, 445 const union tgsi_exec_channel *src0, 446 const union tgsi_exec_channel *src1, 447 const union tgsi_exec_channel *src2, 448 const union tgsi_exec_channel *src3 ) 449{ 450 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 451 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 452 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 453 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 454} 455#endif 456 457static void 458micro_exp2( 459 union tgsi_exec_channel *dst, 460 const union tgsi_exec_channel *src) 461{ 462#if FAST_MATH 463 dst->f[0] = util_fast_exp2( src->f[0] ); 464 dst->f[1] = util_fast_exp2( src->f[1] ); 465 dst->f[2] = util_fast_exp2( src->f[2] ); 466 dst->f[3] = util_fast_exp2( src->f[3] ); 467#else 468 dst->f[0] = powf( 2.0f, src->f[0] ); 469 dst->f[1] = powf( 2.0f, src->f[1] ); 470 dst->f[2] = powf( 2.0f, src->f[2] ); 471 dst->f[3] = powf( 2.0f, src->f[3] ); 472#endif 473} 474 475#if 0 476static void 477micro_f2ut( 478 union tgsi_exec_channel *dst, 479 const union tgsi_exec_channel *src ) 480{ 481 dst->u[0] = (uint) src->f[0]; 482 dst->u[1] = (uint) src->f[1]; 483 dst->u[2] = (uint) src->f[2]; 484 dst->u[3] = (uint) src->f[3]; 485} 486#endif 487 488static void 489micro_flr( 490 union tgsi_exec_channel *dst, 491 const union tgsi_exec_channel *src ) 492{ 493 dst->f[0] = floorf( src->f[0] ); 494 dst->f[1] = floorf( src->f[1] ); 495 dst->f[2] = floorf( src->f[2] ); 496 dst->f[3] = floorf( src->f[3] ); 497} 498 499static void 500micro_frc( 501 union tgsi_exec_channel *dst, 502 const union tgsi_exec_channel *src ) 503{ 504 dst->f[0] = src->f[0] - floorf( src->f[0] ); 505 dst->f[1] = src->f[1] - floorf( src->f[1] ); 506 dst->f[2] = src->f[2] - floorf( src->f[2] ); 507 dst->f[3] = src->f[3] - floorf( src->f[3] ); 508} 509 510static void 511micro_ge( 512 union tgsi_exec_channel *dst, 513 const union tgsi_exec_channel *src0, 514 const union tgsi_exec_channel *src1, 515 const union tgsi_exec_channel *src2, 516 const union tgsi_exec_channel *src3 ) 517{ 518 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; 519 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; 520 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; 521 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; 522} 523 524static void 525micro_i2f( 526 union tgsi_exec_channel *dst, 527 const union tgsi_exec_channel *src ) 528{ 529 dst->f[0] = (float) src->i[0]; 530 dst->f[1] = (float) src->i[1]; 531 dst->f[2] = (float) src->i[2]; 532 dst->f[3] = (float) src->i[3]; 533} 534 535static void 536micro_lg2( 537 union tgsi_exec_channel *dst, 538 const union tgsi_exec_channel *src ) 539{ 540#if FAST_MATH 541 dst->f[0] = util_fast_log2( src->f[0] ); 542 dst->f[1] = util_fast_log2( src->f[1] ); 543 dst->f[2] = util_fast_log2( src->f[2] ); 544 dst->f[3] = util_fast_log2( src->f[3] ); 545#else 546 dst->f[0] = logf( src->f[0] ) * 1.442695f; 547 dst->f[1] = logf( src->f[1] ) * 1.442695f; 548 dst->f[2] = logf( src->f[2] ) * 1.442695f; 549 dst->f[3] = logf( src->f[3] ) * 1.442695f; 550#endif 551} 552 553static void 554micro_le( 555 union tgsi_exec_channel *dst, 556 const union tgsi_exec_channel *src0, 557 const union tgsi_exec_channel *src1, 558 const union tgsi_exec_channel *src2, 559 const union tgsi_exec_channel *src3 ) 560{ 561 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 562 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 563 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 564 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 565} 566 567static void 568micro_lt( 569 union tgsi_exec_channel *dst, 570 const union tgsi_exec_channel *src0, 571 const union tgsi_exec_channel *src1, 572 const union tgsi_exec_channel *src2, 573 const union tgsi_exec_channel *src3 ) 574{ 575 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 576 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 577 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 578 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 579} 580 581#if 0 582static void 583micro_ilt( 584 union tgsi_exec_channel *dst, 585 const union tgsi_exec_channel *src0, 586 const union tgsi_exec_channel *src1, 587 const union tgsi_exec_channel *src2, 588 const union tgsi_exec_channel *src3 ) 589{ 590 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 591 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 592 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 593 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 594} 595#endif 596 597#if 0 598static void 599micro_ult( 600 union tgsi_exec_channel *dst, 601 const union tgsi_exec_channel *src0, 602 const union tgsi_exec_channel *src1, 603 const union tgsi_exec_channel *src2, 604 const union tgsi_exec_channel *src3 ) 605{ 606 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 607 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 608 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 609 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 610} 611#endif 612 613static void 614micro_max( 615 union tgsi_exec_channel *dst, 616 const union tgsi_exec_channel *src0, 617 const union tgsi_exec_channel *src1 ) 618{ 619 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 620 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 621 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 622 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 623} 624 625#if 0 626static void 627micro_imax( 628 union tgsi_exec_channel *dst, 629 const union tgsi_exec_channel *src0, 630 const union tgsi_exec_channel *src1 ) 631{ 632 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 633 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 634 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 635 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 636} 637#endif 638 639#if 0 640static void 641micro_umax( 642 union tgsi_exec_channel *dst, 643 const union tgsi_exec_channel *src0, 644 const union tgsi_exec_channel *src1 ) 645{ 646 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 647 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 648 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 649 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 650} 651#endif 652 653static void 654micro_min( 655 union tgsi_exec_channel *dst, 656 const union tgsi_exec_channel *src0, 657 const union tgsi_exec_channel *src1 ) 658{ 659 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 660 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 661 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 662 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 663} 664 665#if 0 666static void 667micro_imin( 668 union tgsi_exec_channel *dst, 669 const union tgsi_exec_channel *src0, 670 const union tgsi_exec_channel *src1 ) 671{ 672 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 673 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 674 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 675 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 676} 677#endif 678 679#if 0 680static void 681micro_umin( 682 union tgsi_exec_channel *dst, 683 const union tgsi_exec_channel *src0, 684 const union tgsi_exec_channel *src1 ) 685{ 686 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 687 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 688 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 689 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 690} 691#endif 692 693#if 0 694static void 695micro_umod( 696 union tgsi_exec_channel *dst, 697 const union tgsi_exec_channel *src0, 698 const union tgsi_exec_channel *src1 ) 699{ 700 dst->u[0] = src0->u[0] % src1->u[0]; 701 dst->u[1] = src0->u[1] % src1->u[1]; 702 dst->u[2] = src0->u[2] % src1->u[2]; 703 dst->u[3] = src0->u[3] % src1->u[3]; 704} 705#endif 706 707static void 708micro_mul( 709 union tgsi_exec_channel *dst, 710 const union tgsi_exec_channel *src0, 711 const union tgsi_exec_channel *src1 ) 712{ 713 dst->f[0] = src0->f[0] * src1->f[0]; 714 dst->f[1] = src0->f[1] * src1->f[1]; 715 dst->f[2] = src0->f[2] * src1->f[2]; 716 dst->f[3] = src0->f[3] * src1->f[3]; 717} 718 719#if 0 720static void 721micro_imul( 722 union tgsi_exec_channel *dst, 723 const union tgsi_exec_channel *src0, 724 const union tgsi_exec_channel *src1 ) 725{ 726 dst->i[0] = src0->i[0] * src1->i[0]; 727 dst->i[1] = src0->i[1] * src1->i[1]; 728 dst->i[2] = src0->i[2] * src1->i[2]; 729 dst->i[3] = src0->i[3] * src1->i[3]; 730} 731#endif 732 733#if 0 734static void 735micro_imul64( 736 union tgsi_exec_channel *dst0, 737 union tgsi_exec_channel *dst1, 738 const union tgsi_exec_channel *src0, 739 const union tgsi_exec_channel *src1 ) 740{ 741 dst1->i[0] = src0->i[0] * src1->i[0]; 742 dst1->i[1] = src0->i[1] * src1->i[1]; 743 dst1->i[2] = src0->i[2] * src1->i[2]; 744 dst1->i[3] = src0->i[3] * src1->i[3]; 745 dst0->i[0] = 0; 746 dst0->i[1] = 0; 747 dst0->i[2] = 0; 748 dst0->i[3] = 0; 749} 750#endif 751 752#if 0 753static void 754micro_umul64( 755 union tgsi_exec_channel *dst0, 756 union tgsi_exec_channel *dst1, 757 const union tgsi_exec_channel *src0, 758 const union tgsi_exec_channel *src1 ) 759{ 760 dst1->u[0] = src0->u[0] * src1->u[0]; 761 dst1->u[1] = src0->u[1] * src1->u[1]; 762 dst1->u[2] = src0->u[2] * src1->u[2]; 763 dst1->u[3] = src0->u[3] * src1->u[3]; 764 dst0->u[0] = 0; 765 dst0->u[1] = 0; 766 dst0->u[2] = 0; 767 dst0->u[3] = 0; 768} 769#endif 770 771 772#if 0 773static void 774micro_movc( 775 union tgsi_exec_channel *dst, 776 const union tgsi_exec_channel *src0, 777 const union tgsi_exec_channel *src1, 778 const union tgsi_exec_channel *src2 ) 779{ 780 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 781 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 782 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 783 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 784} 785#endif 786 787static void 788micro_neg( 789 union tgsi_exec_channel *dst, 790 const union tgsi_exec_channel *src ) 791{ 792 dst->f[0] = -src->f[0]; 793 dst->f[1] = -src->f[1]; 794 dst->f[2] = -src->f[2]; 795 dst->f[3] = -src->f[3]; 796} 797 798#if 0 799static void 800micro_ineg( 801 union tgsi_exec_channel *dst, 802 const union tgsi_exec_channel *src ) 803{ 804 dst->i[0] = -src->i[0]; 805 dst->i[1] = -src->i[1]; 806 dst->i[2] = -src->i[2]; 807 dst->i[3] = -src->i[3]; 808} 809#endif 810 811static void 812micro_not( 813 union tgsi_exec_channel *dst, 814 const union tgsi_exec_channel *src ) 815{ 816 dst->u[0] = ~src->u[0]; 817 dst->u[1] = ~src->u[1]; 818 dst->u[2] = ~src->u[2]; 819 dst->u[3] = ~src->u[3]; 820} 821 822static void 823micro_or( 824 union tgsi_exec_channel *dst, 825 const union tgsi_exec_channel *src0, 826 const union tgsi_exec_channel *src1 ) 827{ 828 dst->u[0] = src0->u[0] | src1->u[0]; 829 dst->u[1] = src0->u[1] | src1->u[1]; 830 dst->u[2] = src0->u[2] | src1->u[2]; 831 dst->u[3] = src0->u[3] | src1->u[3]; 832} 833 834static void 835micro_pow( 836 union tgsi_exec_channel *dst, 837 const union tgsi_exec_channel *src0, 838 const union tgsi_exec_channel *src1 ) 839{ 840#if FAST_MATH 841 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 842 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 843 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 844 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 845#else 846 dst->f[0] = powf( src0->f[0], src1->f[0] ); 847 dst->f[1] = powf( src0->f[1], src1->f[1] ); 848 dst->f[2] = powf( src0->f[2], src1->f[2] ); 849 dst->f[3] = powf( src0->f[3], src1->f[3] ); 850#endif 851} 852 853static void 854micro_rnd( 855 union tgsi_exec_channel *dst, 856 const union tgsi_exec_channel *src ) 857{ 858 dst->f[0] = floorf( src->f[0] + 0.5f ); 859 dst->f[1] = floorf( src->f[1] + 0.5f ); 860 dst->f[2] = floorf( src->f[2] + 0.5f ); 861 dst->f[3] = floorf( src->f[3] + 0.5f ); 862} 863 864static void 865micro_sgn( 866 union tgsi_exec_channel *dst, 867 const union tgsi_exec_channel *src ) 868{ 869 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 870 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 871 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 872 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 873} 874 875static void 876micro_shl( 877 union tgsi_exec_channel *dst, 878 const union tgsi_exec_channel *src0, 879 const union tgsi_exec_channel *src1 ) 880{ 881 dst->i[0] = src0->i[0] << src1->i[0]; 882 dst->i[1] = src0->i[1] << src1->i[1]; 883 dst->i[2] = src0->i[2] << src1->i[2]; 884 dst->i[3] = src0->i[3] << src1->i[3]; 885} 886 887static void 888micro_ishr( 889 union tgsi_exec_channel *dst, 890 const union tgsi_exec_channel *src0, 891 const union tgsi_exec_channel *src1 ) 892{ 893 dst->i[0] = src0->i[0] >> src1->i[0]; 894 dst->i[1] = src0->i[1] >> src1->i[1]; 895 dst->i[2] = src0->i[2] >> src1->i[2]; 896 dst->i[3] = src0->i[3] >> src1->i[3]; 897} 898 899static void 900micro_trunc( 901 union tgsi_exec_channel *dst, 902 const union tgsi_exec_channel *src0 ) 903{ 904 dst->f[0] = (float) (int) src0->f[0]; 905 dst->f[1] = (float) (int) src0->f[1]; 906 dst->f[2] = (float) (int) src0->f[2]; 907 dst->f[3] = (float) (int) src0->f[3]; 908} 909 910#if 0 911static void 912micro_ushr( 913 union tgsi_exec_channel *dst, 914 const union tgsi_exec_channel *src0, 915 const union tgsi_exec_channel *src1 ) 916{ 917 dst->u[0] = src0->u[0] >> src1->u[0]; 918 dst->u[1] = src0->u[1] >> src1->u[1]; 919 dst->u[2] = src0->u[2] >> src1->u[2]; 920 dst->u[3] = src0->u[3] >> src1->u[3]; 921} 922#endif 923 924static void 925micro_sin( 926 union tgsi_exec_channel *dst, 927 const union tgsi_exec_channel *src ) 928{ 929 dst->f[0] = sinf( src->f[0] ); 930 dst->f[1] = sinf( src->f[1] ); 931 dst->f[2] = sinf( src->f[2] ); 932 dst->f[3] = sinf( src->f[3] ); 933} 934 935static void 936micro_sqrt( union tgsi_exec_channel *dst, 937 const union tgsi_exec_channel *src ) 938{ 939 dst->f[0] = sqrtf( src->f[0] ); 940 dst->f[1] = sqrtf( src->f[1] ); 941 dst->f[2] = sqrtf( src->f[2] ); 942 dst->f[3] = sqrtf( src->f[3] ); 943} 944 945static void 946micro_sub( 947 union tgsi_exec_channel *dst, 948 const union tgsi_exec_channel *src0, 949 const union tgsi_exec_channel *src1 ) 950{ 951 dst->f[0] = src0->f[0] - src1->f[0]; 952 dst->f[1] = src0->f[1] - src1->f[1]; 953 dst->f[2] = src0->f[2] - src1->f[2]; 954 dst->f[3] = src0->f[3] - src1->f[3]; 955} 956 957#if 0 958static void 959micro_u2f( 960 union tgsi_exec_channel *dst, 961 const union tgsi_exec_channel *src ) 962{ 963 dst->f[0] = (float) src->u[0]; 964 dst->f[1] = (float) src->u[1]; 965 dst->f[2] = (float) src->u[2]; 966 dst->f[3] = (float) src->u[3]; 967} 968#endif 969 970static void 971micro_xor( 972 union tgsi_exec_channel *dst, 973 const union tgsi_exec_channel *src0, 974 const union tgsi_exec_channel *src1 ) 975{ 976 dst->u[0] = src0->u[0] ^ src1->u[0]; 977 dst->u[1] = src0->u[1] ^ src1->u[1]; 978 dst->u[2] = src0->u[2] ^ src1->u[2]; 979 dst->u[3] = src0->u[3] ^ src1->u[3]; 980} 981 982static void 983fetch_src_file_channel( 984 const struct tgsi_exec_machine *mach, 985 const uint file, 986 const uint swizzle, 987 const union tgsi_exec_channel *index, 988 union tgsi_exec_channel *chan ) 989{ 990 switch( swizzle ) { 991 case TGSI_EXTSWIZZLE_X: 992 case TGSI_EXTSWIZZLE_Y: 993 case TGSI_EXTSWIZZLE_Z: 994 case TGSI_EXTSWIZZLE_W: 995 switch( file ) { 996 case TGSI_FILE_CONSTANT: 997 assert(mach->Consts); 998 if (index->i[0] < 0) 999 chan->f[0] = 0.0f; 1000 else 1001 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1002 if (index->i[1] < 0) 1003 chan->f[1] = 0.0f; 1004 else 1005 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1006 if (index->i[2] < 0) 1007 chan->f[2] = 0.0f; 1008 else 1009 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1010 if (index->i[3] < 0) 1011 chan->f[3] = 0.0f; 1012 else 1013 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1014 break; 1015 1016 case TGSI_FILE_INPUT: 1017 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1018 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1019 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1020 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1021 break; 1022 1023 case TGSI_FILE_TEMPORARY: 1024 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1025 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1026 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1027 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1028 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1029 break; 1030 1031 case TGSI_FILE_IMMEDIATE: 1032 assert( index->i[0] < (int) mach->ImmLimit ); 1033 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1034 assert( index->i[1] < (int) mach->ImmLimit ); 1035 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1036 assert( index->i[2] < (int) mach->ImmLimit ); 1037 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1038 assert( index->i[3] < (int) mach->ImmLimit ); 1039 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1040 break; 1041 1042 case TGSI_FILE_ADDRESS: 1043 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1044 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1045 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1046 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1047 break; 1048 1049 case TGSI_FILE_OUTPUT: 1050 /* vertex/fragment output vars can be read too */ 1051 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1052 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1053 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1054 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1055 break; 1056 1057 default: 1058 assert( 0 ); 1059 } 1060 break; 1061 1062 case TGSI_EXTSWIZZLE_ZERO: 1063 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1064 break; 1065 1066 case TGSI_EXTSWIZZLE_ONE: 1067 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1068 break; 1069 1070 default: 1071 assert( 0 ); 1072 } 1073} 1074 1075static void 1076fetch_source( 1077 const struct tgsi_exec_machine *mach, 1078 union tgsi_exec_channel *chan, 1079 const struct tgsi_full_src_register *reg, 1080 const uint chan_index ) 1081{ 1082 union tgsi_exec_channel index; 1083 uint swizzle; 1084 1085 /* We start with a direct index into a register file. 1086 * 1087 * file[1], 1088 * where: 1089 * file = SrcRegister.File 1090 * [1] = SrcRegister.Index 1091 */ 1092 index.i[0] = 1093 index.i[1] = 1094 index.i[2] = 1095 index.i[3] = reg->SrcRegister.Index; 1096 1097 /* There is an extra source register that indirectly subscripts 1098 * a register file. The direct index now becomes an offset 1099 * that is being added to the indirect register. 1100 * 1101 * file[ind[2].x+1], 1102 * where: 1103 * ind = SrcRegisterInd.File 1104 * [2] = SrcRegisterInd.Index 1105 * .x = SrcRegisterInd.SwizzleX 1106 */ 1107 if (reg->SrcRegister.Indirect) { 1108 union tgsi_exec_channel index2; 1109 union tgsi_exec_channel indir_index; 1110 const uint execmask = mach->ExecMask; 1111 uint i; 1112 1113 /* which address register (always zero now) */ 1114 index2.i[0] = 1115 index2.i[1] = 1116 index2.i[2] = 1117 index2.i[3] = reg->SrcRegisterInd.Index; 1118 1119 /* get current value of address register[swizzle] */ 1120 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1121 fetch_src_file_channel( 1122 mach, 1123 reg->SrcRegisterInd.File, 1124 swizzle, 1125 &index2, 1126 &indir_index ); 1127 1128 /* add value of address register to the offset */ 1129 index.i[0] += (int) indir_index.f[0]; 1130 index.i[1] += (int) indir_index.f[1]; 1131 index.i[2] += (int) indir_index.f[2]; 1132 index.i[3] += (int) indir_index.f[3]; 1133 1134 /* for disabled execution channels, zero-out the index to 1135 * avoid using a potential garbage value. 1136 */ 1137 for (i = 0; i < QUAD_SIZE; i++) { 1138 if ((execmask & (1 << i)) == 0) 1139 index.i[i] = 0; 1140 } 1141 } 1142 1143 /* There is an extra source register that is a second 1144 * subscript to a register file. Effectively it means that 1145 * the register file is actually a 2D array of registers. 1146 * 1147 * file[1][3] == file[1*sizeof(file[1])+3], 1148 * where: 1149 * [3] = SrcRegisterDim.Index 1150 */ 1151 if (reg->SrcRegister.Dimension) { 1152 /* The size of the first-order array depends on the register file type. 1153 * We need to multiply the index to the first array to get an effective, 1154 * "flat" index that points to the beginning of the second-order array. 1155 */ 1156 switch (reg->SrcRegister.File) { 1157 case TGSI_FILE_INPUT: 1158 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1159 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1160 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1161 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1162 break; 1163 case TGSI_FILE_CONSTANT: 1164 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1165 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1166 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1167 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1168 break; 1169 default: 1170 assert( 0 ); 1171 } 1172 1173 index.i[0] += reg->SrcRegisterDim.Index; 1174 index.i[1] += reg->SrcRegisterDim.Index; 1175 index.i[2] += reg->SrcRegisterDim.Index; 1176 index.i[3] += reg->SrcRegisterDim.Index; 1177 1178 /* Again, the second subscript index can be addressed indirectly 1179 * identically to the first one. 1180 * Nothing stops us from indirectly addressing the indirect register, 1181 * but there is no need for that, so we won't exercise it. 1182 * 1183 * file[1][ind[4].y+3], 1184 * where: 1185 * ind = SrcRegisterDimInd.File 1186 * [4] = SrcRegisterDimInd.Index 1187 * .y = SrcRegisterDimInd.SwizzleX 1188 */ 1189 if (reg->SrcRegisterDim.Indirect) { 1190 union tgsi_exec_channel index2; 1191 union tgsi_exec_channel indir_index; 1192 const uint execmask = mach->ExecMask; 1193 uint i; 1194 1195 index2.i[0] = 1196 index2.i[1] = 1197 index2.i[2] = 1198 index2.i[3] = reg->SrcRegisterDimInd.Index; 1199 1200 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1201 fetch_src_file_channel( 1202 mach, 1203 reg->SrcRegisterDimInd.File, 1204 swizzle, 1205 &index2, 1206 &indir_index ); 1207 1208 index.i[0] += (int) indir_index.f[0]; 1209 index.i[1] += (int) indir_index.f[1]; 1210 index.i[2] += (int) indir_index.f[2]; 1211 index.i[3] += (int) indir_index.f[3]; 1212 1213 /* for disabled execution channels, zero-out the index to 1214 * avoid using a potential garbage value. 1215 */ 1216 for (i = 0; i < QUAD_SIZE; i++) { 1217 if ((execmask & (1 << i)) == 0) 1218 index.i[i] = 0; 1219 } 1220 } 1221 1222 /* If by any chance there was a need for a 3D array of register 1223 * files, we would have to check whether SrcRegisterDim is followed 1224 * by a dimension register and continue the saga. 1225 */ 1226 } 1227 1228 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1229 fetch_src_file_channel( 1230 mach, 1231 reg->SrcRegister.File, 1232 swizzle, 1233 &index, 1234 chan ); 1235 1236 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1237 case TGSI_UTIL_SIGN_CLEAR: 1238 micro_abs( chan, chan ); 1239 break; 1240 1241 case TGSI_UTIL_SIGN_SET: 1242 micro_abs( chan, chan ); 1243 micro_neg( chan, chan ); 1244 break; 1245 1246 case TGSI_UTIL_SIGN_TOGGLE: 1247 micro_neg( chan, chan ); 1248 break; 1249 1250 case TGSI_UTIL_SIGN_KEEP: 1251 break; 1252 } 1253 1254 if (reg->SrcRegisterExtMod.Complement) { 1255 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1256 } 1257} 1258 1259static void 1260store_dest( 1261 struct tgsi_exec_machine *mach, 1262 const union tgsi_exec_channel *chan, 1263 const struct tgsi_full_dst_register *reg, 1264 const struct tgsi_full_instruction *inst, 1265 uint chan_index ) 1266{ 1267 uint i; 1268 union tgsi_exec_channel null; 1269 union tgsi_exec_channel *dst; 1270 uint execmask = mach->ExecMask; 1271 1272 switch (reg->DstRegister.File) { 1273 case TGSI_FILE_NULL: 1274 dst = &null; 1275 break; 1276 1277 case TGSI_FILE_OUTPUT: 1278 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1279 + reg->DstRegister.Index].xyzw[chan_index]; 1280 break; 1281 1282 case TGSI_FILE_TEMPORARY: 1283 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); 1284 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 1285 break; 1286 1287 case TGSI_FILE_ADDRESS: 1288 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 1289 break; 1290 1291 default: 1292 assert( 0 ); 1293 return; 1294 } 1295 1296 if (inst->InstructionExtNv.CondFlowEnable) { 1297 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1298 uint swizzle; 1299 uint shift; 1300 uint mask; 1301 uint test; 1302 1303 /* Only CC0 supported. 1304 */ 1305 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1306 1307 switch (chan_index) { 1308 case CHAN_X: 1309 swizzle = inst->InstructionExtNv.CondSwizzleX; 1310 break; 1311 case CHAN_Y: 1312 swizzle = inst->InstructionExtNv.CondSwizzleY; 1313 break; 1314 case CHAN_Z: 1315 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1316 break; 1317 case CHAN_W: 1318 swizzle = inst->InstructionExtNv.CondSwizzleW; 1319 break; 1320 default: 1321 assert( 0 ); 1322 return; 1323 } 1324 1325 switch (swizzle) { 1326 case TGSI_SWIZZLE_X: 1327 shift = TGSI_EXEC_CC_X_SHIFT; 1328 mask = TGSI_EXEC_CC_X_MASK; 1329 break; 1330 case TGSI_SWIZZLE_Y: 1331 shift = TGSI_EXEC_CC_Y_SHIFT; 1332 mask = TGSI_EXEC_CC_Y_MASK; 1333 break; 1334 case TGSI_SWIZZLE_Z: 1335 shift = TGSI_EXEC_CC_Z_SHIFT; 1336 mask = TGSI_EXEC_CC_Z_MASK; 1337 break; 1338 case TGSI_SWIZZLE_W: 1339 shift = TGSI_EXEC_CC_W_SHIFT; 1340 mask = TGSI_EXEC_CC_W_MASK; 1341 break; 1342 default: 1343 assert( 0 ); 1344 return; 1345 } 1346 1347 switch (inst->InstructionExtNv.CondMask) { 1348 case TGSI_CC_GT: 1349 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1350 for (i = 0; i < QUAD_SIZE; i++) 1351 if (cc->u[i] & test) 1352 execmask &= ~(1 << i); 1353 break; 1354 1355 case TGSI_CC_EQ: 1356 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1357 for (i = 0; i < QUAD_SIZE; i++) 1358 if (cc->u[i] & test) 1359 execmask &= ~(1 << i); 1360 break; 1361 1362 case TGSI_CC_LT: 1363 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1364 for (i = 0; i < QUAD_SIZE; i++) 1365 if (cc->u[i] & test) 1366 execmask &= ~(1 << i); 1367 break; 1368 1369 case TGSI_CC_GE: 1370 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1371 for (i = 0; i < QUAD_SIZE; i++) 1372 if (cc->u[i] & test) 1373 execmask &= ~(1 << i); 1374 break; 1375 1376 case TGSI_CC_LE: 1377 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1378 for (i = 0; i < QUAD_SIZE; i++) 1379 if (cc->u[i] & test) 1380 execmask &= ~(1 << i); 1381 break; 1382 1383 case TGSI_CC_NE: 1384 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1385 for (i = 0; i < QUAD_SIZE; i++) 1386 if (cc->u[i] & test) 1387 execmask &= ~(1 << i); 1388 break; 1389 1390 case TGSI_CC_TR: 1391 break; 1392 1393 case TGSI_CC_FL: 1394 for (i = 0; i < QUAD_SIZE; i++) 1395 execmask &= ~(1 << i); 1396 break; 1397 1398 default: 1399 assert( 0 ); 1400 return; 1401 } 1402 } 1403 1404 switch (inst->Instruction.Saturate) { 1405 case TGSI_SAT_NONE: 1406 for (i = 0; i < QUAD_SIZE; i++) 1407 if (execmask & (1 << i)) 1408 dst->i[i] = chan->i[i]; 1409 break; 1410 1411 case TGSI_SAT_ZERO_ONE: 1412 for (i = 0; i < QUAD_SIZE; i++) 1413 if (execmask & (1 << i)) { 1414 if (chan->f[i] < 0.0f) 1415 dst->f[i] = 0.0f; 1416 else if (chan->f[i] > 1.0f) 1417 dst->f[i] = 1.0f; 1418 else 1419 dst->i[i] = chan->i[i]; 1420 } 1421 break; 1422 1423 case TGSI_SAT_MINUS_PLUS_ONE: 1424 for (i = 0; i < QUAD_SIZE; i++) 1425 if (execmask & (1 << i)) { 1426 if (chan->f[i] < -1.0f) 1427 dst->f[i] = -1.0f; 1428 else if (chan->f[i] > 1.0f) 1429 dst->f[i] = 1.0f; 1430 else 1431 dst->i[i] = chan->i[i]; 1432 } 1433 break; 1434 1435 default: 1436 assert( 0 ); 1437 } 1438 1439 if (inst->InstructionExtNv.CondDstUpdate) { 1440 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1441 uint shift; 1442 uint mask; 1443 1444 /* Only CC0 supported. 1445 */ 1446 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1447 1448 switch (chan_index) { 1449 case CHAN_X: 1450 shift = TGSI_EXEC_CC_X_SHIFT; 1451 mask = ~TGSI_EXEC_CC_X_MASK; 1452 break; 1453 case CHAN_Y: 1454 shift = TGSI_EXEC_CC_Y_SHIFT; 1455 mask = ~TGSI_EXEC_CC_Y_MASK; 1456 break; 1457 case CHAN_Z: 1458 shift = TGSI_EXEC_CC_Z_SHIFT; 1459 mask = ~TGSI_EXEC_CC_Z_MASK; 1460 break; 1461 case CHAN_W: 1462 shift = TGSI_EXEC_CC_W_SHIFT; 1463 mask = ~TGSI_EXEC_CC_W_MASK; 1464 break; 1465 default: 1466 assert( 0 ); 1467 return; 1468 } 1469 1470 for (i = 0; i < QUAD_SIZE; i++) 1471 if (execmask & (1 << i)) { 1472 cc->u[i] &= mask; 1473 if (dst->f[i] < 0.0f) 1474 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1475 else if (dst->f[i] > 0.0f) 1476 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1477 else if (dst->f[i] == 0.0f) 1478 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1479 else 1480 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1481 } 1482 } 1483} 1484 1485#define FETCH(VAL,INDEX,CHAN)\ 1486 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1487 1488#define STORE(VAL,INDEX,CHAN)\ 1489 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1490 1491 1492/** 1493 * Execute ARB-style KIL which is predicated by a src register. 1494 * Kill fragment if any of the four values is less than zero. 1495 */ 1496static void 1497exec_kil(struct tgsi_exec_machine *mach, 1498 const struct tgsi_full_instruction *inst) 1499{ 1500 uint uniquemask; 1501 uint chan_index; 1502 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1503 union tgsi_exec_channel r[1]; 1504 1505 /* This mask stores component bits that were already tested. Note that 1506 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1507 * tested. */ 1508 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1509 1510 for (chan_index = 0; chan_index < 4; chan_index++) 1511 { 1512 uint swizzle; 1513 uint i; 1514 1515 /* unswizzle channel */ 1516 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1517 &inst->FullSrcRegisters[0], 1518 chan_index); 1519 1520 /* check if the component has not been already tested */ 1521 if (uniquemask & (1 << swizzle)) 1522 continue; 1523 uniquemask |= 1 << swizzle; 1524 1525 FETCH(&r[0], 0, chan_index); 1526 for (i = 0; i < 4; i++) 1527 if (r[0].f[i] < 0.0f) 1528 kilmask |= 1 << i; 1529 } 1530 1531 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1532} 1533 1534/** 1535 * Execute NVIDIA-style KIL which is predicated by a condition code. 1536 * Kill fragment if the condition code is TRUE. 1537 */ 1538static void 1539exec_kilp(struct tgsi_exec_machine *mach, 1540 const struct tgsi_full_instruction *inst) 1541{ 1542 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1543 1544 if (inst->InstructionExtNv.CondFlowEnable) { 1545 uint swizzle[4]; 1546 uint chan_index; 1547 1548 kilmask = 0x0; 1549 1550 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1551 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1552 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1553 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1554 1555 for (chan_index = 0; chan_index < 4; chan_index++) 1556 { 1557 uint i; 1558 1559 for (i = 0; i < 4; i++) { 1560 /* TODO: evaluate the condition code */ 1561 if (0) 1562 kilmask |= 1 << i; 1563 } 1564 } 1565 } 1566 else { 1567 /* "unconditional" kil */ 1568 kilmask = mach->ExecMask; 1569 } 1570 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1571} 1572 1573 1574/* 1575 * Fetch a four texture samples using STR texture coordinates. 1576 */ 1577static void 1578fetch_texel( struct tgsi_sampler *sampler, 1579 const union tgsi_exec_channel *s, 1580 const union tgsi_exec_channel *t, 1581 const union tgsi_exec_channel *p, 1582 float lodbias, /* XXX should be float[4] */ 1583 union tgsi_exec_channel *r, 1584 union tgsi_exec_channel *g, 1585 union tgsi_exec_channel *b, 1586 union tgsi_exec_channel *a ) 1587{ 1588 uint j; 1589 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1590 1591 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1592 1593 for (j = 0; j < 4; j++) { 1594 r->f[j] = rgba[0][j]; 1595 g->f[j] = rgba[1][j]; 1596 b->f[j] = rgba[2][j]; 1597 a->f[j] = rgba[3][j]; 1598 } 1599} 1600 1601 1602static void 1603exec_tex(struct tgsi_exec_machine *mach, 1604 const struct tgsi_full_instruction *inst, 1605 boolean biasLod, 1606 boolean projected) 1607{ 1608 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1609 union tgsi_exec_channel r[4]; 1610 uint chan_index; 1611 float lodBias; 1612 1613 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1614 1615 switch (inst->InstructionExtTexture.Texture) { 1616 case TGSI_TEXTURE_1D: 1617 1618 FETCH(&r[0], 0, CHAN_X); 1619 1620 if (projected) { 1621 FETCH(&r[1], 0, CHAN_W); 1622 micro_div( &r[0], &r[0], &r[1] ); 1623 } 1624 1625 if (biasLod) { 1626 FETCH(&r[1], 0, CHAN_W); 1627 lodBias = r[2].f[0]; 1628 } 1629 else 1630 lodBias = 0.0; 1631 1632 fetch_texel(mach->Samplers[unit], 1633 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ 1634 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1635 break; 1636 1637 case TGSI_TEXTURE_2D: 1638 case TGSI_TEXTURE_RECT: 1639 1640 FETCH(&r[0], 0, CHAN_X); 1641 FETCH(&r[1], 0, CHAN_Y); 1642 FETCH(&r[2], 0, CHAN_Z); 1643 1644 if (projected) { 1645 FETCH(&r[3], 0, CHAN_W); 1646 micro_div( &r[0], &r[0], &r[3] ); 1647 micro_div( &r[1], &r[1], &r[3] ); 1648 micro_div( &r[2], &r[2], &r[3] ); 1649 } 1650 1651 if (biasLod) { 1652 FETCH(&r[3], 0, CHAN_W); 1653 lodBias = r[3].f[0]; 1654 } 1655 else 1656 lodBias = 0.0; 1657 1658 fetch_texel(mach->Samplers[unit], 1659 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1660 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1661 break; 1662 1663 case TGSI_TEXTURE_3D: 1664 case TGSI_TEXTURE_CUBE: 1665 1666 FETCH(&r[0], 0, CHAN_X); 1667 FETCH(&r[1], 0, CHAN_Y); 1668 FETCH(&r[2], 0, CHAN_Z); 1669 1670 if (projected) { 1671 FETCH(&r[3], 0, CHAN_W); 1672 micro_div( &r[0], &r[0], &r[3] ); 1673 micro_div( &r[1], &r[1], &r[3] ); 1674 micro_div( &r[2], &r[2], &r[3] ); 1675 } 1676 1677 if (biasLod) { 1678 FETCH(&r[3], 0, CHAN_W); 1679 lodBias = r[3].f[0]; 1680 } 1681 else 1682 lodBias = 0.0; 1683 1684 fetch_texel(mach->Samplers[unit], 1685 &r[0], &r[1], &r[2], lodBias, 1686 &r[0], &r[1], &r[2], &r[3]); 1687 break; 1688 1689 default: 1690 assert (0); 1691 } 1692 1693 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1694 STORE( &r[chan_index], 0, chan_index ); 1695 } 1696} 1697 1698 1699/** 1700 * Evaluate a constant-valued coefficient at the position of the 1701 * current quad. 1702 */ 1703static void 1704eval_constant_coef( 1705 struct tgsi_exec_machine *mach, 1706 unsigned attrib, 1707 unsigned chan ) 1708{ 1709 unsigned i; 1710 1711 for( i = 0; i < QUAD_SIZE; i++ ) { 1712 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1713 } 1714} 1715 1716/** 1717 * Evaluate a linear-valued coefficient at the position of the 1718 * current quad. 1719 */ 1720static void 1721eval_linear_coef( 1722 struct tgsi_exec_machine *mach, 1723 unsigned attrib, 1724 unsigned chan ) 1725{ 1726 const float x = mach->QuadPos.xyzw[0].f[0]; 1727 const float y = mach->QuadPos.xyzw[1].f[0]; 1728 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1729 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1730 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1731 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1732 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1733 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1734 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1735} 1736 1737/** 1738 * Evaluate a perspective-valued coefficient at the position of the 1739 * current quad. 1740 */ 1741static void 1742eval_perspective_coef( 1743 struct tgsi_exec_machine *mach, 1744 unsigned attrib, 1745 unsigned chan ) 1746{ 1747 const float x = mach->QuadPos.xyzw[0].f[0]; 1748 const float y = mach->QuadPos.xyzw[1].f[0]; 1749 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1750 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1751 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1752 const float *w = mach->QuadPos.xyzw[3].f; 1753 /* divide by W here */ 1754 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1755 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1756 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1757 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1758} 1759 1760 1761typedef void (* eval_coef_func)( 1762 struct tgsi_exec_machine *mach, 1763 unsigned attrib, 1764 unsigned chan ); 1765 1766static void 1767exec_declaration( 1768 struct tgsi_exec_machine *mach, 1769 const struct tgsi_full_declaration *decl ) 1770{ 1771 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1772 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1773 unsigned first, last, mask; 1774 eval_coef_func eval; 1775 1776 first = decl->DeclarationRange.First; 1777 last = decl->DeclarationRange.Last; 1778 mask = decl->Declaration.UsageMask; 1779 1780 switch( decl->Declaration.Interpolate ) { 1781 case TGSI_INTERPOLATE_CONSTANT: 1782 eval = eval_constant_coef; 1783 break; 1784 1785 case TGSI_INTERPOLATE_LINEAR: 1786 eval = eval_linear_coef; 1787 break; 1788 1789 case TGSI_INTERPOLATE_PERSPECTIVE: 1790 eval = eval_perspective_coef; 1791 break; 1792 1793 default: 1794 eval = NULL; 1795 assert( 0 ); 1796 } 1797 1798 if( mask == TGSI_WRITEMASK_XYZW ) { 1799 unsigned i, j; 1800 1801 for( i = first; i <= last; i++ ) { 1802 for( j = 0; j < NUM_CHANNELS; j++ ) { 1803 eval( mach, i, j ); 1804 } 1805 } 1806 } 1807 else { 1808 unsigned i, j; 1809 1810 for( j = 0; j < NUM_CHANNELS; j++ ) { 1811 if( mask & (1 << j) ) { 1812 for( i = first; i <= last; i++ ) { 1813 eval( mach, i, j ); 1814 } 1815 } 1816 } 1817 } 1818 } 1819 } 1820} 1821 1822static void 1823exec_instruction( 1824 struct tgsi_exec_machine *mach, 1825 const struct tgsi_full_instruction *inst, 1826 int *pc ) 1827{ 1828 uint chan_index; 1829 union tgsi_exec_channel r[8]; 1830 1831 (*pc)++; 1832 1833 switch (inst->Instruction.Opcode) { 1834 case TGSI_OPCODE_ARL: 1835 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1836 FETCH( &r[0], 0, chan_index ); 1837 micro_flr( &r[0], &r[0] ); 1838 STORE( &r[0], 0, chan_index ); 1839 } 1840 break; 1841 1842 case TGSI_OPCODE_MOV: 1843 case TGSI_OPCODE_SWZ: 1844 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1845 FETCH( &r[0], 0, chan_index ); 1846 STORE( &r[0], 0, chan_index ); 1847 } 1848 break; 1849 1850 case TGSI_OPCODE_LIT: 1851 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1852 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1853 } 1854 1855 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1856 FETCH( &r[0], 0, CHAN_X ); 1857 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1858 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1859 STORE( &r[0], 0, CHAN_Y ); 1860 } 1861 1862 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1863 FETCH( &r[1], 0, CHAN_Y ); 1864 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1865 1866 FETCH( &r[2], 0, CHAN_W ); 1867 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1868 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1869 micro_pow( &r[1], &r[1], &r[2] ); 1870 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1871 STORE( &r[0], 0, CHAN_Z ); 1872 } 1873 } 1874 1875 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1876 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1877 } 1878 break; 1879 1880 case TGSI_OPCODE_RCP: 1881 /* TGSI_OPCODE_RECIP */ 1882 FETCH( &r[0], 0, CHAN_X ); 1883 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1884 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1885 STORE( &r[0], 0, chan_index ); 1886 } 1887 break; 1888 1889 case TGSI_OPCODE_RSQ: 1890 /* TGSI_OPCODE_RECIPSQRT */ 1891 FETCH( &r[0], 0, CHAN_X ); 1892 micro_sqrt( &r[0], &r[0] ); 1893 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1894 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1895 STORE( &r[0], 0, chan_index ); 1896 } 1897 break; 1898 1899 case TGSI_OPCODE_EXP: 1900 FETCH( &r[0], 0, CHAN_X ); 1901 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1902 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1903 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1904 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1905 } 1906 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1907 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1908 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1909 } 1910 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1911 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1912 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1913 } 1914 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1915 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1916 } 1917 break; 1918 1919 case TGSI_OPCODE_LOG: 1920 FETCH( &r[0], 0, CHAN_X ); 1921 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1922 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1923 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1924 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1925 STORE( &r[0], 0, CHAN_X ); 1926 } 1927 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1928 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1929 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 1930 STORE( &r[0], 0, CHAN_Y ); 1931 } 1932 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1933 STORE( &r[1], 0, CHAN_Z ); 1934 } 1935 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1936 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1937 } 1938 break; 1939 1940 case TGSI_OPCODE_MUL: 1941 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1942 { 1943 FETCH(&r[0], 0, chan_index); 1944 FETCH(&r[1], 1, chan_index); 1945 1946 micro_mul( &r[0], &r[0], &r[1] ); 1947 1948 STORE(&r[0], 0, chan_index); 1949 } 1950 break; 1951 1952 case TGSI_OPCODE_ADD: 1953 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1954 FETCH( &r[0], 0, chan_index ); 1955 FETCH( &r[1], 1, chan_index ); 1956 micro_add( &r[0], &r[0], &r[1] ); 1957 STORE( &r[0], 0, chan_index ); 1958 } 1959 break; 1960 1961 case TGSI_OPCODE_DP3: 1962 /* TGSI_OPCODE_DOT3 */ 1963 FETCH( &r[0], 0, CHAN_X ); 1964 FETCH( &r[1], 1, CHAN_X ); 1965 micro_mul( &r[0], &r[0], &r[1] ); 1966 1967 FETCH( &r[1], 0, CHAN_Y ); 1968 FETCH( &r[2], 1, CHAN_Y ); 1969 micro_mul( &r[1], &r[1], &r[2] ); 1970 micro_add( &r[0], &r[0], &r[1] ); 1971 1972 FETCH( &r[1], 0, CHAN_Z ); 1973 FETCH( &r[2], 1, CHAN_Z ); 1974 micro_mul( &r[1], &r[1], &r[2] ); 1975 micro_add( &r[0], &r[0], &r[1] ); 1976 1977 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1978 STORE( &r[0], 0, chan_index ); 1979 } 1980 break; 1981 1982 case TGSI_OPCODE_DP4: 1983 /* TGSI_OPCODE_DOT4 */ 1984 FETCH(&r[0], 0, CHAN_X); 1985 FETCH(&r[1], 1, CHAN_X); 1986 1987 micro_mul( &r[0], &r[0], &r[1] ); 1988 1989 FETCH(&r[1], 0, CHAN_Y); 1990 FETCH(&r[2], 1, CHAN_Y); 1991 1992 micro_mul( &r[1], &r[1], &r[2] ); 1993 micro_add( &r[0], &r[0], &r[1] ); 1994 1995 FETCH(&r[1], 0, CHAN_Z); 1996 FETCH(&r[2], 1, CHAN_Z); 1997 1998 micro_mul( &r[1], &r[1], &r[2] ); 1999 micro_add( &r[0], &r[0], &r[1] ); 2000 2001 FETCH(&r[1], 0, CHAN_W); 2002 FETCH(&r[2], 1, CHAN_W); 2003 2004 micro_mul( &r[1], &r[1], &r[2] ); 2005 micro_add( &r[0], &r[0], &r[1] ); 2006 2007 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2008 STORE( &r[0], 0, chan_index ); 2009 } 2010 break; 2011 2012 case TGSI_OPCODE_DST: 2013 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2014 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2015 } 2016 2017 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2018 FETCH( &r[0], 0, CHAN_Y ); 2019 FETCH( &r[1], 1, CHAN_Y); 2020 micro_mul( &r[0], &r[0], &r[1] ); 2021 STORE( &r[0], 0, CHAN_Y ); 2022 } 2023 2024 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2025 FETCH( &r[0], 0, CHAN_Z ); 2026 STORE( &r[0], 0, CHAN_Z ); 2027 } 2028 2029 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2030 FETCH( &r[0], 1, CHAN_W ); 2031 STORE( &r[0], 0, CHAN_W ); 2032 } 2033 break; 2034 2035 case TGSI_OPCODE_MIN: 2036 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2037 FETCH(&r[0], 0, chan_index); 2038 FETCH(&r[1], 1, chan_index); 2039 2040 /* XXX use micro_min()?? */ 2041 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2042 2043 STORE(&r[0], 0, chan_index); 2044 } 2045 break; 2046 2047 case TGSI_OPCODE_MAX: 2048 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2049 FETCH(&r[0], 0, chan_index); 2050 FETCH(&r[1], 1, chan_index); 2051 2052 /* XXX use micro_max()?? */ 2053 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2054 2055 STORE(&r[0], 0, chan_index ); 2056 } 2057 break; 2058 2059 case TGSI_OPCODE_SLT: 2060 /* TGSI_OPCODE_SETLT */ 2061 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2062 FETCH( &r[0], 0, chan_index ); 2063 FETCH( &r[1], 1, chan_index ); 2064 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2065 STORE( &r[0], 0, chan_index ); 2066 } 2067 break; 2068 2069 case TGSI_OPCODE_SGE: 2070 /* TGSI_OPCODE_SETGE */ 2071 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2072 FETCH( &r[0], 0, chan_index ); 2073 FETCH( &r[1], 1, chan_index ); 2074 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2075 STORE( &r[0], 0, chan_index ); 2076 } 2077 break; 2078 2079 case TGSI_OPCODE_MAD: 2080 /* TGSI_OPCODE_MADD */ 2081 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2082 FETCH( &r[0], 0, chan_index ); 2083 FETCH( &r[1], 1, chan_index ); 2084 micro_mul( &r[0], &r[0], &r[1] ); 2085 FETCH( &r[1], 2, chan_index ); 2086 micro_add( &r[0], &r[0], &r[1] ); 2087 STORE( &r[0], 0, chan_index ); 2088 } 2089 break; 2090 2091 case TGSI_OPCODE_SUB: 2092 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2093 FETCH(&r[0], 0, chan_index); 2094 FETCH(&r[1], 1, chan_index); 2095 2096 micro_sub( &r[0], &r[0], &r[1] ); 2097 2098 STORE(&r[0], 0, chan_index); 2099 } 2100 break; 2101 2102 case TGSI_OPCODE_LERP: 2103 /* TGSI_OPCODE_LRP */ 2104 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2105 FETCH(&r[0], 0, chan_index); 2106 FETCH(&r[1], 1, chan_index); 2107 FETCH(&r[2], 2, chan_index); 2108 2109 micro_sub( &r[1], &r[1], &r[2] ); 2110 micro_mul( &r[0], &r[0], &r[1] ); 2111 micro_add( &r[0], &r[0], &r[2] ); 2112 2113 STORE(&r[0], 0, chan_index); 2114 } 2115 break; 2116 2117 case TGSI_OPCODE_CND: 2118 assert (0); 2119 break; 2120 2121 case TGSI_OPCODE_CND0: 2122 assert (0); 2123 break; 2124 2125 case TGSI_OPCODE_DOT2ADD: 2126 /* TGSI_OPCODE_DP2A */ 2127 FETCH( &r[0], 0, CHAN_X ); 2128 FETCH( &r[1], 1, CHAN_X ); 2129 micro_mul( &r[0], &r[0], &r[1] ); 2130 2131 FETCH( &r[1], 0, CHAN_Y ); 2132 FETCH( &r[2], 1, CHAN_Y ); 2133 micro_mul( &r[1], &r[1], &r[2] ); 2134 micro_add( &r[0], &r[0], &r[1] ); 2135 2136 FETCH( &r[2], 2, CHAN_X ); 2137 micro_add( &r[0], &r[0], &r[2] ); 2138 2139 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2140 STORE( &r[0], 0, chan_index ); 2141 } 2142 break; 2143 2144 case TGSI_OPCODE_INDEX: 2145 assert (0); 2146 break; 2147 2148 case TGSI_OPCODE_NEGATE: 2149 assert (0); 2150 break; 2151 2152 case TGSI_OPCODE_FRAC: 2153 /* TGSI_OPCODE_FRC */ 2154 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2155 FETCH( &r[0], 0, chan_index ); 2156 micro_frc( &r[0], &r[0] ); 2157 STORE( &r[0], 0, chan_index ); 2158 } 2159 break; 2160 2161 case TGSI_OPCODE_CLAMP: 2162 assert (0); 2163 break; 2164 2165 case TGSI_OPCODE_FLOOR: 2166 /* TGSI_OPCODE_FLR */ 2167 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2168 FETCH( &r[0], 0, chan_index ); 2169 micro_flr( &r[0], &r[0] ); 2170 STORE( &r[0], 0, chan_index ); 2171 } 2172 break; 2173 2174 case TGSI_OPCODE_ROUND: 2175 case TGSI_OPCODE_ARR: 2176 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2177 FETCH( &r[0], 0, chan_index ); 2178 micro_rnd( &r[0], &r[0] ); 2179 STORE( &r[0], 0, chan_index ); 2180 } 2181 break; 2182 2183 case TGSI_OPCODE_EXPBASE2: 2184 /* TGSI_OPCODE_EX2 */ 2185 FETCH(&r[0], 0, CHAN_X); 2186 2187#if FAST_MATH 2188 micro_exp2( &r[0], &r[0] ); 2189#else 2190 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2191#endif 2192 2193 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2194 STORE( &r[0], 0, chan_index ); 2195 } 2196 break; 2197 2198 case TGSI_OPCODE_LOGBASE2: 2199 /* TGSI_OPCODE_LG2 */ 2200 FETCH( &r[0], 0, CHAN_X ); 2201 micro_lg2( &r[0], &r[0] ); 2202 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2203 STORE( &r[0], 0, chan_index ); 2204 } 2205 break; 2206 2207 case TGSI_OPCODE_POWER: 2208 /* TGSI_OPCODE_POW */ 2209 FETCH(&r[0], 0, CHAN_X); 2210 FETCH(&r[1], 1, CHAN_X); 2211 2212 micro_pow( &r[0], &r[0], &r[1] ); 2213 2214 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2215 STORE( &r[0], 0, chan_index ); 2216 } 2217 break; 2218 2219 case TGSI_OPCODE_CROSSPRODUCT: 2220 /* TGSI_OPCODE_XPD */ 2221 FETCH(&r[0], 0, CHAN_Y); 2222 FETCH(&r[1], 1, CHAN_Z); 2223 2224 micro_mul( &r[2], &r[0], &r[1] ); 2225 2226 FETCH(&r[3], 0, CHAN_Z); 2227 FETCH(&r[4], 1, CHAN_Y); 2228 2229 micro_mul( &r[5], &r[3], &r[4] ); 2230 micro_sub( &r[2], &r[2], &r[5] ); 2231 2232 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2233 STORE( &r[2], 0, CHAN_X ); 2234 } 2235 2236 FETCH(&r[2], 1, CHAN_X); 2237 2238 micro_mul( &r[3], &r[3], &r[2] ); 2239 2240 FETCH(&r[5], 0, CHAN_X); 2241 2242 micro_mul( &r[1], &r[1], &r[5] ); 2243 micro_sub( &r[3], &r[3], &r[1] ); 2244 2245 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2246 STORE( &r[3], 0, CHAN_Y ); 2247 } 2248 2249 micro_mul( &r[5], &r[5], &r[4] ); 2250 micro_mul( &r[0], &r[0], &r[2] ); 2251 micro_sub( &r[5], &r[5], &r[0] ); 2252 2253 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2254 STORE( &r[5], 0, CHAN_Z ); 2255 } 2256 2257 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2258 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2259 } 2260 break; 2261 2262 case TGSI_OPCODE_MULTIPLYMATRIX: 2263 assert (0); 2264 break; 2265 2266 case TGSI_OPCODE_ABS: 2267 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2268 FETCH(&r[0], 0, chan_index); 2269 2270 micro_abs( &r[0], &r[0] ); 2271 2272 STORE(&r[0], 0, chan_index); 2273 } 2274 break; 2275 2276 case TGSI_OPCODE_RCC: 2277 assert (0); 2278 break; 2279 2280 case TGSI_OPCODE_DPH: 2281 FETCH(&r[0], 0, CHAN_X); 2282 FETCH(&r[1], 1, CHAN_X); 2283 2284 micro_mul( &r[0], &r[0], &r[1] ); 2285 2286 FETCH(&r[1], 0, CHAN_Y); 2287 FETCH(&r[2], 1, CHAN_Y); 2288 2289 micro_mul( &r[1], &r[1], &r[2] ); 2290 micro_add( &r[0], &r[0], &r[1] ); 2291 2292 FETCH(&r[1], 0, CHAN_Z); 2293 FETCH(&r[2], 1, CHAN_Z); 2294 2295 micro_mul( &r[1], &r[1], &r[2] ); 2296 micro_add( &r[0], &r[0], &r[1] ); 2297 2298 FETCH(&r[1], 1, CHAN_W); 2299 2300 micro_add( &r[0], &r[0], &r[1] ); 2301 2302 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2303 STORE( &r[0], 0, chan_index ); 2304 } 2305 break; 2306 2307 case TGSI_OPCODE_COS: 2308 FETCH(&r[0], 0, CHAN_X); 2309 2310 micro_cos( &r[0], &r[0] ); 2311 2312 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2313 STORE( &r[0], 0, chan_index ); 2314 } 2315 break; 2316 2317 case TGSI_OPCODE_DDX: 2318 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2319 FETCH( &r[0], 0, chan_index ); 2320 micro_ddx( &r[0], &r[0] ); 2321 STORE( &r[0], 0, chan_index ); 2322 } 2323 break; 2324 2325 case TGSI_OPCODE_DDY: 2326 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2327 FETCH( &r[0], 0, chan_index ); 2328 micro_ddy( &r[0], &r[0] ); 2329 STORE( &r[0], 0, chan_index ); 2330 } 2331 break; 2332 2333 case TGSI_OPCODE_KILP: 2334 exec_kilp (mach, inst); 2335 break; 2336 2337 case TGSI_OPCODE_KIL: 2338 exec_kil (mach, inst); 2339 break; 2340 2341 case TGSI_OPCODE_PK2H: 2342 assert (0); 2343 break; 2344 2345 case TGSI_OPCODE_PK2US: 2346 assert (0); 2347 break; 2348 2349 case TGSI_OPCODE_PK4B: 2350 assert (0); 2351 break; 2352 2353 case TGSI_OPCODE_PK4UB: 2354 assert (0); 2355 break; 2356 2357 case TGSI_OPCODE_RFL: 2358 assert (0); 2359 break; 2360 2361 case TGSI_OPCODE_SEQ: 2362 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2363 FETCH( &r[0], 0, chan_index ); 2364 FETCH( &r[1], 1, chan_index ); 2365 micro_eq( &r[0], &r[0], &r[1], 2366 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2367 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2368 STORE( &r[0], 0, chan_index ); 2369 } 2370 break; 2371 2372 case TGSI_OPCODE_SFL: 2373 assert (0); 2374 break; 2375 2376 case TGSI_OPCODE_SGT: 2377 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2378 FETCH( &r[0], 0, chan_index ); 2379 FETCH( &r[1], 1, chan_index ); 2380 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2381 STORE( &r[0], 0, chan_index ); 2382 } 2383 break; 2384 2385 case TGSI_OPCODE_SIN: 2386 FETCH( &r[0], 0, CHAN_X ); 2387 micro_sin( &r[0], &r[0] ); 2388 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2389 STORE( &r[0], 0, chan_index ); 2390 } 2391 break; 2392 2393 case TGSI_OPCODE_SLE: 2394 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2395 FETCH( &r[0], 0, chan_index ); 2396 FETCH( &r[1], 1, chan_index ); 2397 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2398 STORE( &r[0], 0, chan_index ); 2399 } 2400 break; 2401 2402 case TGSI_OPCODE_SNE: 2403 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2404 FETCH( &r[0], 0, chan_index ); 2405 FETCH( &r[1], 1, chan_index ); 2406 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2407 STORE( &r[0], 0, chan_index ); 2408 } 2409 break; 2410 2411 case TGSI_OPCODE_STR: 2412 assert (0); 2413 break; 2414 2415 case TGSI_OPCODE_TEX: 2416 /* simple texture lookup */ 2417 /* src[0] = texcoord */ 2418 /* src[1] = sampler unit */ 2419 exec_tex(mach, inst, FALSE, FALSE); 2420 break; 2421 2422 case TGSI_OPCODE_TXB: 2423 /* Texture lookup with lod bias */ 2424 /* src[0] = texcoord (src[0].w = LOD bias) */ 2425 /* src[1] = sampler unit */ 2426 exec_tex(mach, inst, TRUE, FALSE); 2427 break; 2428 2429 case TGSI_OPCODE_TXD: 2430 /* Texture lookup with explict partial derivatives */ 2431 /* src[0] = texcoord */ 2432 /* src[1] = d[strq]/dx */ 2433 /* src[2] = d[strq]/dy */ 2434 /* src[3] = sampler unit */ 2435 assert (0); 2436 break; 2437 2438 case TGSI_OPCODE_TXL: 2439 /* Texture lookup with explit LOD */ 2440 /* src[0] = texcoord (src[0].w = LOD) */ 2441 /* src[1] = sampler unit */ 2442 exec_tex(mach, inst, TRUE, FALSE); 2443 break; 2444 2445 case TGSI_OPCODE_TXP: 2446 /* Texture lookup with projection */ 2447 /* src[0] = texcoord (src[0].w = projection) */ 2448 /* src[1] = sampler unit */ 2449 exec_tex(mach, inst, FALSE, TRUE); 2450 break; 2451 2452 case TGSI_OPCODE_UP2H: 2453 assert (0); 2454 break; 2455 2456 case TGSI_OPCODE_UP2US: 2457 assert (0); 2458 break; 2459 2460 case TGSI_OPCODE_UP4B: 2461 assert (0); 2462 break; 2463 2464 case TGSI_OPCODE_UP4UB: 2465 assert (0); 2466 break; 2467 2468 case TGSI_OPCODE_X2D: 2469 assert (0); 2470 break; 2471 2472 case TGSI_OPCODE_ARA: 2473 assert (0); 2474 break; 2475 2476 case TGSI_OPCODE_BRA: 2477 assert (0); 2478 break; 2479 2480 case TGSI_OPCODE_CAL: 2481 /* skip the call if no execution channels are enabled */ 2482 if (mach->ExecMask) { 2483 /* do the call */ 2484 2485 /* push the Cond, Loop, Cont stacks */ 2486 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2487 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2488 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2489 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2490 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2491 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2492 2493 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2494 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2495 2496 /* note that PC was already incremented above */ 2497 mach->CallStack[mach->CallStackTop++] = *pc; 2498 *pc = inst->InstructionExtLabel.Label; 2499 } 2500 break; 2501 2502 case TGSI_OPCODE_RET: 2503 mach->FuncMask &= ~mach->ExecMask; 2504 UPDATE_EXEC_MASK(mach); 2505 2506 if (mach->FuncMask == 0x0) { 2507 /* really return now (otherwise, keep executing */ 2508 2509 if (mach->CallStackTop == 0) { 2510 /* returning from main() */ 2511 *pc = -1; 2512 return; 2513 } 2514 *pc = mach->CallStack[--mach->CallStackTop]; 2515 2516 /* pop the Cond, Loop, Cont stacks */ 2517 assert(mach->CondStackTop > 0); 2518 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2519 assert(mach->LoopStackTop > 0); 2520 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2521 assert(mach->ContStackTop > 0); 2522 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2523 assert(mach->FuncStackTop > 0); 2524 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2525 2526 UPDATE_EXEC_MASK(mach); 2527 } 2528 break; 2529 2530 case TGSI_OPCODE_SSG: 2531 /* TGSI_OPCODE_SGN */ 2532 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2533 FETCH( &r[0], 0, chan_index ); 2534 micro_sgn( &r[0], &r[0] ); 2535 STORE( &r[0], 0, chan_index ); 2536 } 2537 break; 2538 2539 case TGSI_OPCODE_CMP: 2540 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2541 FETCH(&r[0], 0, chan_index); 2542 FETCH(&r[1], 1, chan_index); 2543 FETCH(&r[2], 2, chan_index); 2544 2545 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2546 2547 STORE(&r[0], 0, chan_index); 2548 } 2549 break; 2550 2551 case TGSI_OPCODE_SCS: 2552 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2553 FETCH( &r[0], 0, CHAN_X ); 2554 } 2555 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { 2556 micro_cos( &r[1], &r[0] ); 2557 STORE( &r[1], 0, CHAN_X ); 2558 } 2559 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2560 micro_sin( &r[1], &r[0] ); 2561 STORE( &r[1], 0, CHAN_Y ); 2562 } 2563 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2564 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2565 } 2566 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2567 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2568 } 2569 break; 2570 2571 case TGSI_OPCODE_NRM: 2572 /* 3-component vector normalize */ 2573 { 2574 union tgsi_exec_channel tmp, dot; 2575 2576 /* tmp = dp3(src0, src0): */ 2577 FETCH( &r[0], 0, CHAN_X ); 2578 micro_mul( &tmp, &r[0], &r[0] ); 2579 2580 FETCH( &r[1], 0, CHAN_Y ); 2581 micro_mul( &dot, &r[1], &r[1] ); 2582 micro_add( &tmp, &tmp, &dot ); 2583 2584 FETCH( &r[2], 0, CHAN_Z ); 2585 micro_mul( &dot, &r[2], &r[2] ); 2586 micro_add( &tmp, &tmp, &dot ); 2587 2588 /* tmp = 1 / sqrt(tmp) */ 2589 micro_sqrt( &tmp, &tmp ); 2590 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2591 2592 /* note: w channel is undefined */ 2593 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2594 /* chan = chan * tmp */ 2595 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2596 STORE( &r[chan_index], 0, chan_index ); 2597 } 2598 } 2599 break; 2600 2601 case TGSI_OPCODE_NRM4: 2602 /* 4-component vector normalize */ 2603 { 2604 union tgsi_exec_channel tmp, dot; 2605 2606 /* tmp = dp4(src0, src0): */ 2607 FETCH( &r[0], 0, CHAN_X ); 2608 micro_mul( &tmp, &r[0], &r[0] ); 2609 2610 FETCH( &r[1], 0, CHAN_Y ); 2611 micro_mul( &dot, &r[1], &r[1] ); 2612 micro_add( &tmp, &tmp, &dot ); 2613 2614 FETCH( &r[2], 0, CHAN_Z ); 2615 micro_mul( &dot, &r[2], &r[2] ); 2616 micro_add( &tmp, &tmp, &dot ); 2617 2618 FETCH( &r[3], 0, CHAN_W ); 2619 micro_mul( &dot, &r[3], &r[3] ); 2620 micro_add( &tmp, &tmp, &dot ); 2621 2622 /* tmp = 1 / sqrt(tmp) */ 2623 micro_sqrt( &tmp, &tmp ); 2624 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2625 2626 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2627 /* chan = chan * tmp */ 2628 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2629 STORE( &r[chan_index], 0, chan_index ); 2630 } 2631 } 2632 break; 2633 2634 case TGSI_OPCODE_DIV: 2635 assert( 0 ); 2636 break; 2637 2638 case TGSI_OPCODE_DP2: 2639 FETCH( &r[0], 0, CHAN_X ); 2640 FETCH( &r[1], 1, CHAN_X ); 2641 micro_mul( &r[0], &r[0], &r[1] ); 2642 2643 FETCH( &r[1], 0, CHAN_Y ); 2644 FETCH( &r[2], 1, CHAN_Y ); 2645 micro_mul( &r[1], &r[1], &r[2] ); 2646 micro_add( &r[0], &r[0], &r[1] ); 2647 2648 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2649 STORE( &r[0], 0, chan_index ); 2650 } 2651 break; 2652 2653 case TGSI_OPCODE_IF: 2654 /* push CondMask */ 2655 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2656 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2657 FETCH( &r[0], 0, CHAN_X ); 2658 /* update CondMask */ 2659 if( ! r[0].u[0] ) { 2660 mach->CondMask &= ~0x1; 2661 } 2662 if( ! r[0].u[1] ) { 2663 mach->CondMask &= ~0x2; 2664 } 2665 if( ! r[0].u[2] ) { 2666 mach->CondMask &= ~0x4; 2667 } 2668 if( ! r[0].u[3] ) { 2669 mach->CondMask &= ~0x8; 2670 } 2671 UPDATE_EXEC_MASK(mach); 2672 /* Todo: If CondMask==0, jump to ELSE */ 2673 break; 2674 2675 case TGSI_OPCODE_ELSE: 2676 /* invert CondMask wrt previous mask */ 2677 { 2678 uint prevMask; 2679 assert(mach->CondStackTop > 0); 2680 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2681 mach->CondMask = ~mach->CondMask & prevMask; 2682 UPDATE_EXEC_MASK(mach); 2683 /* Todo: If CondMask==0, jump to ENDIF */ 2684 } 2685 break; 2686 2687 case TGSI_OPCODE_ENDIF: 2688 /* pop CondMask */ 2689 assert(mach->CondStackTop > 0); 2690 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2691 UPDATE_EXEC_MASK(mach); 2692 break; 2693 2694 case TGSI_OPCODE_END: 2695 /* halt execution */ 2696 *pc = -1; 2697 break; 2698 2699 case TGSI_OPCODE_REP: 2700 assert (0); 2701 break; 2702 2703 case TGSI_OPCODE_ENDREP: 2704 assert (0); 2705 break; 2706 2707 case TGSI_OPCODE_PUSHA: 2708 assert (0); 2709 break; 2710 2711 case TGSI_OPCODE_POPA: 2712 assert (0); 2713 break; 2714 2715 case TGSI_OPCODE_CEIL: 2716 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2717 FETCH( &r[0], 0, chan_index ); 2718 micro_ceil( &r[0], &r[0] ); 2719 STORE( &r[0], 0, chan_index ); 2720 } 2721 break; 2722 2723 case TGSI_OPCODE_I2F: 2724 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2725 FETCH( &r[0], 0, chan_index ); 2726 micro_i2f( &r[0], &r[0] ); 2727 STORE( &r[0], 0, chan_index ); 2728 } 2729 break; 2730 2731 case TGSI_OPCODE_NOT: 2732 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2733 FETCH( &r[0], 0, chan_index ); 2734 micro_not( &r[0], &r[0] ); 2735 STORE( &r[0], 0, chan_index ); 2736 } 2737 break; 2738 2739 case TGSI_OPCODE_TRUNC: 2740 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2741 FETCH( &r[0], 0, chan_index ); 2742 micro_trunc( &r[0], &r[0] ); 2743 STORE( &r[0], 0, chan_index ); 2744 } 2745 break; 2746 2747 case TGSI_OPCODE_SHL: 2748 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2749 FETCH( &r[0], 0, chan_index ); 2750 FETCH( &r[1], 1, chan_index ); 2751 micro_shl( &r[0], &r[0], &r[1] ); 2752 STORE( &r[0], 0, chan_index ); 2753 } 2754 break; 2755 2756 case TGSI_OPCODE_SHR: 2757 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2758 FETCH( &r[0], 0, chan_index ); 2759 FETCH( &r[1], 1, chan_index ); 2760 micro_ishr( &r[0], &r[0], &r[1] ); 2761 STORE( &r[0], 0, chan_index ); 2762 } 2763 break; 2764 2765 case TGSI_OPCODE_AND: 2766 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2767 FETCH( &r[0], 0, chan_index ); 2768 FETCH( &r[1], 1, chan_index ); 2769 micro_and( &r[0], &r[0], &r[1] ); 2770 STORE( &r[0], 0, chan_index ); 2771 } 2772 break; 2773 2774 case TGSI_OPCODE_OR: 2775 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2776 FETCH( &r[0], 0, chan_index ); 2777 FETCH( &r[1], 1, chan_index ); 2778 micro_or( &r[0], &r[0], &r[1] ); 2779 STORE( &r[0], 0, chan_index ); 2780 } 2781 break; 2782 2783 case TGSI_OPCODE_MOD: 2784 assert (0); 2785 break; 2786 2787 case TGSI_OPCODE_XOR: 2788 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2789 FETCH( &r[0], 0, chan_index ); 2790 FETCH( &r[1], 1, chan_index ); 2791 micro_xor( &r[0], &r[0], &r[1] ); 2792 STORE( &r[0], 0, chan_index ); 2793 } 2794 break; 2795 2796 case TGSI_OPCODE_SAD: 2797 assert (0); 2798 break; 2799 2800 case TGSI_OPCODE_TXF: 2801 assert (0); 2802 break; 2803 2804 case TGSI_OPCODE_TXQ: 2805 assert (0); 2806 break; 2807 2808 case TGSI_OPCODE_EMIT: 2809 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 2810 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2811 break; 2812 2813 case TGSI_OPCODE_ENDPRIM: 2814 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 2815 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 2816 break; 2817 2818 case TGSI_OPCODE_LOOP: 2819 /* fall-through (for now) */ 2820 case TGSI_OPCODE_BGNLOOP2: 2821 /* push LoopMask and ContMasks */ 2822 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2823 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2824 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2825 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2826 break; 2827 2828 case TGSI_OPCODE_ENDLOOP: 2829 /* fall-through (for now at least) */ 2830 case TGSI_OPCODE_ENDLOOP2: 2831 /* Restore ContMask, but don't pop */ 2832 assert(mach->ContStackTop > 0); 2833 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 2834 UPDATE_EXEC_MASK(mach); 2835 if (mach->ExecMask) { 2836 /* repeat loop: jump to instruction just past BGNLOOP */ 2837 *pc = inst->InstructionExtLabel.Label + 1; 2838 } 2839 else { 2840 /* exit loop: pop LoopMask */ 2841 assert(mach->LoopStackTop > 0); 2842 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2843 /* pop ContMask */ 2844 assert(mach->ContStackTop > 0); 2845 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2846 } 2847 UPDATE_EXEC_MASK(mach); 2848 break; 2849 2850 case TGSI_OPCODE_BRK: 2851 /* turn off loop channels for each enabled exec channel */ 2852 mach->LoopMask &= ~mach->ExecMask; 2853 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2854 UPDATE_EXEC_MASK(mach); 2855 break; 2856 2857 case TGSI_OPCODE_CONT: 2858 /* turn off cont channels for each enabled exec channel */ 2859 mach->ContMask &= ~mach->ExecMask; 2860 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2861 UPDATE_EXEC_MASK(mach); 2862 break; 2863 2864 case TGSI_OPCODE_BGNSUB: 2865 /* no-op */ 2866 break; 2867 2868 case TGSI_OPCODE_ENDSUB: 2869 /* no-op */ 2870 break; 2871 2872 case TGSI_OPCODE_NOISE1: 2873 assert( 0 ); 2874 break; 2875 2876 case TGSI_OPCODE_NOISE2: 2877 assert( 0 ); 2878 break; 2879 2880 case TGSI_OPCODE_NOISE3: 2881 assert( 0 ); 2882 break; 2883 2884 case TGSI_OPCODE_NOISE4: 2885 assert( 0 ); 2886 break; 2887 2888 case TGSI_OPCODE_NOP: 2889 break; 2890 2891 default: 2892 assert( 0 ); 2893 } 2894} 2895 2896 2897/** 2898 * Run TGSI interpreter. 2899 * \return bitmask of "alive" quad components 2900 */ 2901uint 2902tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 2903{ 2904 uint i; 2905 int pc = 0; 2906 2907 mach->CondMask = 0xf; 2908 mach->LoopMask = 0xf; 2909 mach->ContMask = 0xf; 2910 mach->FuncMask = 0xf; 2911 mach->ExecMask = 0xf; 2912 2913 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 2914 assert(mach->CondStackTop == 0); 2915 assert(mach->LoopStackTop == 0); 2916 assert(mach->ContStackTop == 0); 2917 assert(mach->CallStackTop == 0); 2918 2919 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 2920 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 2921 2922 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 2923 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 2924 mach->Primitives[0] = 0; 2925 } 2926 2927 for (i = 0; i < QUAD_SIZE; i++) { 2928 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 2929 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 2930 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 2931 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 2932 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 2933 } 2934 2935 /* execute declarations (interpolants) */ 2936 for (i = 0; i < mach->NumDeclarations; i++) { 2937 exec_declaration( mach, mach->Declarations+i ); 2938 } 2939 2940 /* execute instructions, until pc is set to -1 */ 2941 while (pc != -1) { 2942 assert(pc < (int) mach->NumInstructions); 2943 exec_instruction( mach, mach->Instructions + pc, &pc ); 2944 } 2945 2946#if 0 2947 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 2948 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 2949 /* 2950 * Scale back depth component. 2951 */ 2952 for (i = 0; i < 4; i++) 2953 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 2954 } 2955#endif 2956 2957 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 2958} 2959 2960 2961