tgsi_exec.c revision 38afa2934077ce1cf67d1c553f872d1e14fb0794
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_parse.h" 57#include "tgsi/tgsi_util.h" 58#include "tgsi_exec.h" 59#include "util/u_memory.h" 60#include "util/u_math.h" 61 62#define FAST_MATH 1 63 64#define TILE_TOP_LEFT 0 65#define TILE_TOP_RIGHT 1 66#define TILE_BOTTOM_LEFT 2 67#define TILE_BOTTOM_RIGHT 3 68 69#define CHAN_X 0 70#define CHAN_Y 1 71#define CHAN_Z 2 72#define CHAN_W 3 73 74/* 75 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 76 */ 77#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 78#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 79#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 80#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 81#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 82#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 83#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 84#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 85#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 86#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 87#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 88#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 89#define TEMP_128_I TGSI_EXEC_TEMP_128_I 90#define TEMP_128_C TGSI_EXEC_TEMP_128_C 91#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 92#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 93#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 94#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 95#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 96#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 97#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 98#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 99#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 100#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 101#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 102#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 103#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 104#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 105#define TEMP_R0 TGSI_EXEC_TEMP_R0 106 107#define IS_CHANNEL_ENABLED(INST, CHAN)\ 108 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 109 110#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 111 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 112 113#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 114 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 115 if (IS_CHANNEL_ENABLED( INST, CHAN )) 116 117#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 118 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 119 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 120 121 122/** The execution mask depends on the conditional mask and the loop mask */ 123#define UPDATE_EXEC_MASK(MACH) \ 124 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 125 126/** 127 * Initialize machine state by expanding tokens to full instructions, 128 * allocating temporary storage, setting up constants, etc. 129 * After this, we can call tgsi_exec_machine_run() many times. 130 */ 131void 132tgsi_exec_machine_bind_shader( 133 struct tgsi_exec_machine *mach, 134 const struct tgsi_token *tokens, 135 uint numSamplers, 136 struct tgsi_sampler **samplers) 137{ 138 uint k; 139 struct tgsi_parse_context parse; 140 struct tgsi_exec_labels *labels = &mach->Labels; 141 struct tgsi_full_instruction *instructions; 142 struct tgsi_full_declaration *declarations; 143 uint maxInstructions = 10, numInstructions = 0; 144 uint maxDeclarations = 10, numDeclarations = 0; 145 uint instno = 0; 146 147#if 0 148 tgsi_dump(tokens, 0); 149#endif 150 151 util_init_math(); 152 153 mach->Tokens = tokens; 154 mach->Samplers = samplers; 155 156 k = tgsi_parse_init (&parse, mach->Tokens); 157 if (k != TGSI_PARSE_OK) { 158 debug_printf( "Problem parsing!\n" ); 159 return; 160 } 161 162 mach->Processor = parse.FullHeader.Processor.Processor; 163 mach->ImmLimit = 0; 164 labels->count = 0; 165 166 declarations = (struct tgsi_full_declaration *) 167 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 168 169 if (!declarations) { 170 return; 171 } 172 173 instructions = (struct tgsi_full_instruction *) 174 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 175 176 if (!instructions) { 177 FREE( declarations ); 178 return; 179 } 180 181 while( !tgsi_parse_end_of_tokens( &parse ) ) { 182 uint pointer = parse.Position; 183 uint i; 184 185 tgsi_parse_token( &parse ); 186 switch( parse.FullToken.Token.Type ) { 187 case TGSI_TOKEN_TYPE_DECLARATION: 188 /* save expanded declaration */ 189 if (numDeclarations == maxDeclarations) { 190 declarations = REALLOC(declarations, 191 maxDeclarations 192 * sizeof(struct tgsi_full_declaration), 193 (maxDeclarations + 10) 194 * sizeof(struct tgsi_full_declaration)); 195 maxDeclarations += 10; 196 } 197 memcpy(declarations + numDeclarations, 198 &parse.FullToken.FullDeclaration, 199 sizeof(declarations[0])); 200 numDeclarations++; 201 break; 202 203 case TGSI_TOKEN_TYPE_IMMEDIATE: 204 { 205 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 206 assert( size % 4 == 0 ); 207 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); 208 209 for( i = 0; i < size; i++ ) { 210 mach->Imms[mach->ImmLimit + i / 4][i % 4] = 211 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 212 } 213 mach->ImmLimit += size / 4; 214 } 215 break; 216 217 case TGSI_TOKEN_TYPE_INSTRUCTION: 218 assert( labels->count < MAX_LABELS ); 219 220 labels->labels[labels->count][0] = instno; 221 labels->labels[labels->count][1] = pointer; 222 labels->count++; 223 224 /* save expanded instruction */ 225 if (numInstructions == maxInstructions) { 226 instructions = REALLOC(instructions, 227 maxInstructions 228 * sizeof(struct tgsi_full_instruction), 229 (maxInstructions + 10) 230 * sizeof(struct tgsi_full_instruction)); 231 maxInstructions += 10; 232 } 233 memcpy(instructions + numInstructions, 234 &parse.FullToken.FullInstruction, 235 sizeof(instructions[0])); 236 numInstructions++; 237 break; 238 239 default: 240 assert( 0 ); 241 } 242 } 243 tgsi_parse_free (&parse); 244 245 if (mach->Declarations) { 246 FREE( mach->Declarations ); 247 } 248 mach->Declarations = declarations; 249 mach->NumDeclarations = numDeclarations; 250 251 if (mach->Instructions) { 252 FREE( mach->Instructions ); 253 } 254 mach->Instructions = instructions; 255 mach->NumInstructions = numInstructions; 256} 257 258 259void 260tgsi_exec_machine_init( 261 struct tgsi_exec_machine *mach ) 262{ 263 uint i; 264 265 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); 266 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 267 268 /* Setup constants. */ 269 for( i = 0; i < 4; i++ ) { 270 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 271 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 272 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 273 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 274 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 275 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 276 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 277 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 278 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 279 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 280 } 281} 282 283 284void 285tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) 286{ 287 if (mach->Instructions) { 288 FREE(mach->Instructions); 289 mach->Instructions = NULL; 290 mach->NumInstructions = 0; 291 } 292 if (mach->Declarations) { 293 FREE(mach->Declarations); 294 mach->Declarations = NULL; 295 mach->NumDeclarations = 0; 296 } 297} 298 299 300static void 301micro_abs( 302 union tgsi_exec_channel *dst, 303 const union tgsi_exec_channel *src ) 304{ 305 dst->f[0] = fabsf( src->f[0] ); 306 dst->f[1] = fabsf( src->f[1] ); 307 dst->f[2] = fabsf( src->f[2] ); 308 dst->f[3] = fabsf( src->f[3] ); 309} 310 311static void 312micro_add( 313 union tgsi_exec_channel *dst, 314 const union tgsi_exec_channel *src0, 315 const union tgsi_exec_channel *src1 ) 316{ 317 dst->f[0] = src0->f[0] + src1->f[0]; 318 dst->f[1] = src0->f[1] + src1->f[1]; 319 dst->f[2] = src0->f[2] + src1->f[2]; 320 dst->f[3] = src0->f[3] + src1->f[3]; 321} 322 323#if 0 324static void 325micro_iadd( 326 union tgsi_exec_channel *dst, 327 const union tgsi_exec_channel *src0, 328 const union tgsi_exec_channel *src1 ) 329{ 330 dst->i[0] = src0->i[0] + src1->i[0]; 331 dst->i[1] = src0->i[1] + src1->i[1]; 332 dst->i[2] = src0->i[2] + src1->i[2]; 333 dst->i[3] = src0->i[3] + src1->i[3]; 334} 335#endif 336 337static void 338micro_and( 339 union tgsi_exec_channel *dst, 340 const union tgsi_exec_channel *src0, 341 const union tgsi_exec_channel *src1 ) 342{ 343 dst->u[0] = src0->u[0] & src1->u[0]; 344 dst->u[1] = src0->u[1] & src1->u[1]; 345 dst->u[2] = src0->u[2] & src1->u[2]; 346 dst->u[3] = src0->u[3] & src1->u[3]; 347} 348 349static void 350micro_ceil( 351 union tgsi_exec_channel *dst, 352 const union tgsi_exec_channel *src ) 353{ 354 dst->f[0] = ceilf( src->f[0] ); 355 dst->f[1] = ceilf( src->f[1] ); 356 dst->f[2] = ceilf( src->f[2] ); 357 dst->f[3] = ceilf( src->f[3] ); 358} 359 360static void 361micro_cos( 362 union tgsi_exec_channel *dst, 363 const union tgsi_exec_channel *src ) 364{ 365 dst->f[0] = cosf( src->f[0] ); 366 dst->f[1] = cosf( src->f[1] ); 367 dst->f[2] = cosf( src->f[2] ); 368 dst->f[3] = cosf( src->f[3] ); 369} 370 371static void 372micro_ddx( 373 union tgsi_exec_channel *dst, 374 const union tgsi_exec_channel *src ) 375{ 376 dst->f[0] = 377 dst->f[1] = 378 dst->f[2] = 379 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 380} 381 382static void 383micro_ddy( 384 union tgsi_exec_channel *dst, 385 const union tgsi_exec_channel *src ) 386{ 387 dst->f[0] = 388 dst->f[1] = 389 dst->f[2] = 390 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 391} 392 393static void 394micro_div( 395 union tgsi_exec_channel *dst, 396 const union tgsi_exec_channel *src0, 397 const union tgsi_exec_channel *src1 ) 398{ 399 if (src1->f[0] != 0) { 400 dst->f[0] = src0->f[0] / src1->f[0]; 401 } 402 if (src1->f[1] != 0) { 403 dst->f[1] = src0->f[1] / src1->f[1]; 404 } 405 if (src1->f[2] != 0) { 406 dst->f[2] = src0->f[2] / src1->f[2]; 407 } 408 if (src1->f[3] != 0) { 409 dst->f[3] = src0->f[3] / src1->f[3]; 410 } 411} 412 413#if 0 414static void 415micro_udiv( 416 union tgsi_exec_channel *dst, 417 const union tgsi_exec_channel *src0, 418 const union tgsi_exec_channel *src1 ) 419{ 420 dst->u[0] = src0->u[0] / src1->u[0]; 421 dst->u[1] = src0->u[1] / src1->u[1]; 422 dst->u[2] = src0->u[2] / src1->u[2]; 423 dst->u[3] = src0->u[3] / src1->u[3]; 424} 425#endif 426 427static void 428micro_eq( 429 union tgsi_exec_channel *dst, 430 const union tgsi_exec_channel *src0, 431 const union tgsi_exec_channel *src1, 432 const union tgsi_exec_channel *src2, 433 const union tgsi_exec_channel *src3 ) 434{ 435 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 436 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 437 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 438 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 439} 440 441#if 0 442static void 443micro_ieq( 444 union tgsi_exec_channel *dst, 445 const union tgsi_exec_channel *src0, 446 const union tgsi_exec_channel *src1, 447 const union tgsi_exec_channel *src2, 448 const union tgsi_exec_channel *src3 ) 449{ 450 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 451 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 452 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 453 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 454} 455#endif 456 457static void 458micro_exp2( 459 union tgsi_exec_channel *dst, 460 const union tgsi_exec_channel *src) 461{ 462#if FAST_MATH 463 dst->f[0] = util_fast_exp2( src->f[0] ); 464 dst->f[1] = util_fast_exp2( src->f[1] ); 465 dst->f[2] = util_fast_exp2( src->f[2] ); 466 dst->f[3] = util_fast_exp2( src->f[3] ); 467#else 468 dst->f[0] = powf( 2.0f, src->f[0] ); 469 dst->f[1] = powf( 2.0f, src->f[1] ); 470 dst->f[2] = powf( 2.0f, src->f[2] ); 471 dst->f[3] = powf( 2.0f, src->f[3] ); 472#endif 473} 474 475#if 0 476static void 477micro_f2ut( 478 union tgsi_exec_channel *dst, 479 const union tgsi_exec_channel *src ) 480{ 481 dst->u[0] = (uint) src->f[0]; 482 dst->u[1] = (uint) src->f[1]; 483 dst->u[2] = (uint) src->f[2]; 484 dst->u[3] = (uint) src->f[3]; 485} 486#endif 487 488static void 489micro_flr( 490 union tgsi_exec_channel *dst, 491 const union tgsi_exec_channel *src ) 492{ 493 dst->f[0] = floorf( src->f[0] ); 494 dst->f[1] = floorf( src->f[1] ); 495 dst->f[2] = floorf( src->f[2] ); 496 dst->f[3] = floorf( src->f[3] ); 497} 498 499static void 500micro_frc( 501 union tgsi_exec_channel *dst, 502 const union tgsi_exec_channel *src ) 503{ 504 dst->f[0] = src->f[0] - floorf( src->f[0] ); 505 dst->f[1] = src->f[1] - floorf( src->f[1] ); 506 dst->f[2] = src->f[2] - floorf( src->f[2] ); 507 dst->f[3] = src->f[3] - floorf( src->f[3] ); 508} 509 510static void 511micro_i2f( 512 union tgsi_exec_channel *dst, 513 const union tgsi_exec_channel *src ) 514{ 515 dst->f[0] = (float) src->i[0]; 516 dst->f[1] = (float) src->i[1]; 517 dst->f[2] = (float) src->i[2]; 518 dst->f[3] = (float) src->i[3]; 519} 520 521static void 522micro_lg2( 523 union tgsi_exec_channel *dst, 524 const union tgsi_exec_channel *src ) 525{ 526#if FAST_MATH 527 dst->f[0] = util_fast_log2( src->f[0] ); 528 dst->f[1] = util_fast_log2( src->f[1] ); 529 dst->f[2] = util_fast_log2( src->f[2] ); 530 dst->f[3] = util_fast_log2( src->f[3] ); 531#else 532 dst->f[0] = logf( src->f[0] ) * 1.442695f; 533 dst->f[1] = logf( src->f[1] ) * 1.442695f; 534 dst->f[2] = logf( src->f[2] ) * 1.442695f; 535 dst->f[3] = logf( src->f[3] ) * 1.442695f; 536#endif 537} 538 539static void 540micro_le( 541 union tgsi_exec_channel *dst, 542 const union tgsi_exec_channel *src0, 543 const union tgsi_exec_channel *src1, 544 const union tgsi_exec_channel *src2, 545 const union tgsi_exec_channel *src3 ) 546{ 547 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 548 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 549 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 550 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 551} 552 553static void 554micro_lt( 555 union tgsi_exec_channel *dst, 556 const union tgsi_exec_channel *src0, 557 const union tgsi_exec_channel *src1, 558 const union tgsi_exec_channel *src2, 559 const union tgsi_exec_channel *src3 ) 560{ 561 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 562 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 563 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 564 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 565} 566 567#if 0 568static void 569micro_ilt( 570 union tgsi_exec_channel *dst, 571 const union tgsi_exec_channel *src0, 572 const union tgsi_exec_channel *src1, 573 const union tgsi_exec_channel *src2, 574 const union tgsi_exec_channel *src3 ) 575{ 576 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 577 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 578 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 579 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 580} 581#endif 582 583#if 0 584static void 585micro_ult( 586 union tgsi_exec_channel *dst, 587 const union tgsi_exec_channel *src0, 588 const union tgsi_exec_channel *src1, 589 const union tgsi_exec_channel *src2, 590 const union tgsi_exec_channel *src3 ) 591{ 592 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 593 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 594 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 595 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 596} 597#endif 598 599static void 600micro_max( 601 union tgsi_exec_channel *dst, 602 const union tgsi_exec_channel *src0, 603 const union tgsi_exec_channel *src1 ) 604{ 605 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 606 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 607 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 608 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 609} 610 611#if 0 612static void 613micro_imax( 614 union tgsi_exec_channel *dst, 615 const union tgsi_exec_channel *src0, 616 const union tgsi_exec_channel *src1 ) 617{ 618 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 619 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 620 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 621 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 622} 623#endif 624 625#if 0 626static void 627micro_umax( 628 union tgsi_exec_channel *dst, 629 const union tgsi_exec_channel *src0, 630 const union tgsi_exec_channel *src1 ) 631{ 632 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 633 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 634 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 635 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 636} 637#endif 638 639static void 640micro_min( 641 union tgsi_exec_channel *dst, 642 const union tgsi_exec_channel *src0, 643 const union tgsi_exec_channel *src1 ) 644{ 645 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 646 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 647 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 648 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 649} 650 651#if 0 652static void 653micro_imin( 654 union tgsi_exec_channel *dst, 655 const union tgsi_exec_channel *src0, 656 const union tgsi_exec_channel *src1 ) 657{ 658 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 659 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 660 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 661 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 662} 663#endif 664 665#if 0 666static void 667micro_umin( 668 union tgsi_exec_channel *dst, 669 const union tgsi_exec_channel *src0, 670 const union tgsi_exec_channel *src1 ) 671{ 672 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 673 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 674 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 675 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 676} 677#endif 678 679#if 0 680static void 681micro_umod( 682 union tgsi_exec_channel *dst, 683 const union tgsi_exec_channel *src0, 684 const union tgsi_exec_channel *src1 ) 685{ 686 dst->u[0] = src0->u[0] % src1->u[0]; 687 dst->u[1] = src0->u[1] % src1->u[1]; 688 dst->u[2] = src0->u[2] % src1->u[2]; 689 dst->u[3] = src0->u[3] % src1->u[3]; 690} 691#endif 692 693static void 694micro_mul( 695 union tgsi_exec_channel *dst, 696 const union tgsi_exec_channel *src0, 697 const union tgsi_exec_channel *src1 ) 698{ 699 dst->f[0] = src0->f[0] * src1->f[0]; 700 dst->f[1] = src0->f[1] * src1->f[1]; 701 dst->f[2] = src0->f[2] * src1->f[2]; 702 dst->f[3] = src0->f[3] * src1->f[3]; 703} 704 705#if 0 706static void 707micro_imul( 708 union tgsi_exec_channel *dst, 709 const union tgsi_exec_channel *src0, 710 const union tgsi_exec_channel *src1 ) 711{ 712 dst->i[0] = src0->i[0] * src1->i[0]; 713 dst->i[1] = src0->i[1] * src1->i[1]; 714 dst->i[2] = src0->i[2] * src1->i[2]; 715 dst->i[3] = src0->i[3] * src1->i[3]; 716} 717#endif 718 719#if 0 720static void 721micro_imul64( 722 union tgsi_exec_channel *dst0, 723 union tgsi_exec_channel *dst1, 724 const union tgsi_exec_channel *src0, 725 const union tgsi_exec_channel *src1 ) 726{ 727 dst1->i[0] = src0->i[0] * src1->i[0]; 728 dst1->i[1] = src0->i[1] * src1->i[1]; 729 dst1->i[2] = src0->i[2] * src1->i[2]; 730 dst1->i[3] = src0->i[3] * src1->i[3]; 731 dst0->i[0] = 0; 732 dst0->i[1] = 0; 733 dst0->i[2] = 0; 734 dst0->i[3] = 0; 735} 736#endif 737 738#if 0 739static void 740micro_umul64( 741 union tgsi_exec_channel *dst0, 742 union tgsi_exec_channel *dst1, 743 const union tgsi_exec_channel *src0, 744 const union tgsi_exec_channel *src1 ) 745{ 746 dst1->u[0] = src0->u[0] * src1->u[0]; 747 dst1->u[1] = src0->u[1] * src1->u[1]; 748 dst1->u[2] = src0->u[2] * src1->u[2]; 749 dst1->u[3] = src0->u[3] * src1->u[3]; 750 dst0->u[0] = 0; 751 dst0->u[1] = 0; 752 dst0->u[2] = 0; 753 dst0->u[3] = 0; 754} 755#endif 756 757 758#if 0 759static void 760micro_movc( 761 union tgsi_exec_channel *dst, 762 const union tgsi_exec_channel *src0, 763 const union tgsi_exec_channel *src1, 764 const union tgsi_exec_channel *src2 ) 765{ 766 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 767 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 768 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 769 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 770} 771#endif 772 773static void 774micro_neg( 775 union tgsi_exec_channel *dst, 776 const union tgsi_exec_channel *src ) 777{ 778 dst->f[0] = -src->f[0]; 779 dst->f[1] = -src->f[1]; 780 dst->f[2] = -src->f[2]; 781 dst->f[3] = -src->f[3]; 782} 783 784#if 0 785static void 786micro_ineg( 787 union tgsi_exec_channel *dst, 788 const union tgsi_exec_channel *src ) 789{ 790 dst->i[0] = -src->i[0]; 791 dst->i[1] = -src->i[1]; 792 dst->i[2] = -src->i[2]; 793 dst->i[3] = -src->i[3]; 794} 795#endif 796 797static void 798micro_not( 799 union tgsi_exec_channel *dst, 800 const union tgsi_exec_channel *src ) 801{ 802 dst->u[0] = ~src->u[0]; 803 dst->u[1] = ~src->u[1]; 804 dst->u[2] = ~src->u[2]; 805 dst->u[3] = ~src->u[3]; 806} 807 808static void 809micro_or( 810 union tgsi_exec_channel *dst, 811 const union tgsi_exec_channel *src0, 812 const union tgsi_exec_channel *src1 ) 813{ 814 dst->u[0] = src0->u[0] | src1->u[0]; 815 dst->u[1] = src0->u[1] | src1->u[1]; 816 dst->u[2] = src0->u[2] | src1->u[2]; 817 dst->u[3] = src0->u[3] | src1->u[3]; 818} 819 820static void 821micro_pow( 822 union tgsi_exec_channel *dst, 823 const union tgsi_exec_channel *src0, 824 const union tgsi_exec_channel *src1 ) 825{ 826#if FAST_MATH 827 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 828 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 829 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 830 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 831#else 832 dst->f[0] = powf( src0->f[0], src1->f[0] ); 833 dst->f[1] = powf( src0->f[1], src1->f[1] ); 834 dst->f[2] = powf( src0->f[2], src1->f[2] ); 835 dst->f[3] = powf( src0->f[3], src1->f[3] ); 836#endif 837} 838 839static void 840micro_rnd( 841 union tgsi_exec_channel *dst, 842 const union tgsi_exec_channel *src ) 843{ 844 dst->f[0] = floorf( src->f[0] + 0.5f ); 845 dst->f[1] = floorf( src->f[1] + 0.5f ); 846 dst->f[2] = floorf( src->f[2] + 0.5f ); 847 dst->f[3] = floorf( src->f[3] + 0.5f ); 848} 849 850static void 851micro_sgn( 852 union tgsi_exec_channel *dst, 853 const union tgsi_exec_channel *src ) 854{ 855 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 856 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 857 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 858 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 859} 860 861static void 862micro_shl( 863 union tgsi_exec_channel *dst, 864 const union tgsi_exec_channel *src0, 865 const union tgsi_exec_channel *src1 ) 866{ 867 dst->i[0] = src0->i[0] << src1->i[0]; 868 dst->i[1] = src0->i[1] << src1->i[1]; 869 dst->i[2] = src0->i[2] << src1->i[2]; 870 dst->i[3] = src0->i[3] << src1->i[3]; 871} 872 873static void 874micro_ishr( 875 union tgsi_exec_channel *dst, 876 const union tgsi_exec_channel *src0, 877 const union tgsi_exec_channel *src1 ) 878{ 879 dst->i[0] = src0->i[0] >> src1->i[0]; 880 dst->i[1] = src0->i[1] >> src1->i[1]; 881 dst->i[2] = src0->i[2] >> src1->i[2]; 882 dst->i[3] = src0->i[3] >> src1->i[3]; 883} 884 885static void 886micro_trunc( 887 union tgsi_exec_channel *dst, 888 const union tgsi_exec_channel *src0 ) 889{ 890 dst->f[0] = (float) (int) src0->f[0]; 891 dst->f[1] = (float) (int) src0->f[1]; 892 dst->f[2] = (float) (int) src0->f[2]; 893 dst->f[3] = (float) (int) src0->f[3]; 894} 895 896#if 0 897static void 898micro_ushr( 899 union tgsi_exec_channel *dst, 900 const union tgsi_exec_channel *src0, 901 const union tgsi_exec_channel *src1 ) 902{ 903 dst->u[0] = src0->u[0] >> src1->u[0]; 904 dst->u[1] = src0->u[1] >> src1->u[1]; 905 dst->u[2] = src0->u[2] >> src1->u[2]; 906 dst->u[3] = src0->u[3] >> src1->u[3]; 907} 908#endif 909 910static void 911micro_sin( 912 union tgsi_exec_channel *dst, 913 const union tgsi_exec_channel *src ) 914{ 915 dst->f[0] = sinf( src->f[0] ); 916 dst->f[1] = sinf( src->f[1] ); 917 dst->f[2] = sinf( src->f[2] ); 918 dst->f[3] = sinf( src->f[3] ); 919} 920 921static void 922micro_sqrt( union tgsi_exec_channel *dst, 923 const union tgsi_exec_channel *src ) 924{ 925 dst->f[0] = sqrtf( src->f[0] ); 926 dst->f[1] = sqrtf( src->f[1] ); 927 dst->f[2] = sqrtf( src->f[2] ); 928 dst->f[3] = sqrtf( src->f[3] ); 929} 930 931static void 932micro_sub( 933 union tgsi_exec_channel *dst, 934 const union tgsi_exec_channel *src0, 935 const union tgsi_exec_channel *src1 ) 936{ 937 dst->f[0] = src0->f[0] - src1->f[0]; 938 dst->f[1] = src0->f[1] - src1->f[1]; 939 dst->f[2] = src0->f[2] - src1->f[2]; 940 dst->f[3] = src0->f[3] - src1->f[3]; 941} 942 943#if 0 944static void 945micro_u2f( 946 union tgsi_exec_channel *dst, 947 const union tgsi_exec_channel *src ) 948{ 949 dst->f[0] = (float) src->u[0]; 950 dst->f[1] = (float) src->u[1]; 951 dst->f[2] = (float) src->u[2]; 952 dst->f[3] = (float) src->u[3]; 953} 954#endif 955 956static void 957micro_xor( 958 union tgsi_exec_channel *dst, 959 const union tgsi_exec_channel *src0, 960 const union tgsi_exec_channel *src1 ) 961{ 962 dst->u[0] = src0->u[0] ^ src1->u[0]; 963 dst->u[1] = src0->u[1] ^ src1->u[1]; 964 dst->u[2] = src0->u[2] ^ src1->u[2]; 965 dst->u[3] = src0->u[3] ^ src1->u[3]; 966} 967 968static void 969fetch_src_file_channel( 970 const struct tgsi_exec_machine *mach, 971 const uint file, 972 const uint swizzle, 973 const union tgsi_exec_channel *index, 974 union tgsi_exec_channel *chan ) 975{ 976 switch( swizzle ) { 977 case TGSI_EXTSWIZZLE_X: 978 case TGSI_EXTSWIZZLE_Y: 979 case TGSI_EXTSWIZZLE_Z: 980 case TGSI_EXTSWIZZLE_W: 981 switch( file ) { 982 case TGSI_FILE_CONSTANT: 983 assert(mach->Consts); 984 if (index->i[0] < 0) 985 chan->f[0] = 0.0f; 986 else 987 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 988 if (index->i[1] < 0) 989 chan->f[1] = 0.0f; 990 else 991 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 992 if (index->i[2] < 0) 993 chan->f[2] = 0.0f; 994 else 995 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 996 if (index->i[3] < 0) 997 chan->f[3] = 0.0f; 998 else 999 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1000 break; 1001 1002 case TGSI_FILE_INPUT: 1003 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1004 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1005 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1006 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1007 break; 1008 1009 case TGSI_FILE_TEMPORARY: 1010 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1011 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1012 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1013 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1014 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1015 break; 1016 1017 case TGSI_FILE_IMMEDIATE: 1018 assert( index->i[0] < (int) mach->ImmLimit ); 1019 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1020 assert( index->i[1] < (int) mach->ImmLimit ); 1021 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1022 assert( index->i[2] < (int) mach->ImmLimit ); 1023 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1024 assert( index->i[3] < (int) mach->ImmLimit ); 1025 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1026 break; 1027 1028 case TGSI_FILE_ADDRESS: 1029 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1030 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1031 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1032 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1033 break; 1034 1035 case TGSI_FILE_OUTPUT: 1036 /* vertex/fragment output vars can be read too */ 1037 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1038 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1039 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1040 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1041 break; 1042 1043 default: 1044 assert( 0 ); 1045 } 1046 break; 1047 1048 case TGSI_EXTSWIZZLE_ZERO: 1049 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 1050 break; 1051 1052 case TGSI_EXTSWIZZLE_ONE: 1053 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 1054 break; 1055 1056 default: 1057 assert( 0 ); 1058 } 1059} 1060 1061static void 1062fetch_source( 1063 const struct tgsi_exec_machine *mach, 1064 union tgsi_exec_channel *chan, 1065 const struct tgsi_full_src_register *reg, 1066 const uint chan_index ) 1067{ 1068 union tgsi_exec_channel index; 1069 uint swizzle; 1070 1071 /* We start with a direct index into a register file. 1072 * 1073 * file[1], 1074 * where: 1075 * file = SrcRegister.File 1076 * [1] = SrcRegister.Index 1077 */ 1078 index.i[0] = 1079 index.i[1] = 1080 index.i[2] = 1081 index.i[3] = reg->SrcRegister.Index; 1082 1083 /* There is an extra source register that indirectly subscripts 1084 * a register file. The direct index now becomes an offset 1085 * that is being added to the indirect register. 1086 * 1087 * file[ind[2].x+1], 1088 * where: 1089 * ind = SrcRegisterInd.File 1090 * [2] = SrcRegisterInd.Index 1091 * .x = SrcRegisterInd.SwizzleX 1092 */ 1093 if (reg->SrcRegister.Indirect) { 1094 union tgsi_exec_channel index2; 1095 union tgsi_exec_channel indir_index; 1096 const uint execmask = mach->ExecMask; 1097 uint i; 1098 1099 /* which address register (always zero now) */ 1100 index2.i[0] = 1101 index2.i[1] = 1102 index2.i[2] = 1103 index2.i[3] = reg->SrcRegisterInd.Index; 1104 1105 /* get current value of address register[swizzle] */ 1106 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1107 fetch_src_file_channel( 1108 mach, 1109 reg->SrcRegisterInd.File, 1110 swizzle, 1111 &index2, 1112 &indir_index ); 1113 1114 /* add value of address register to the offset */ 1115 index.i[0] += (int) indir_index.f[0]; 1116 index.i[1] += (int) indir_index.f[1]; 1117 index.i[2] += (int) indir_index.f[2]; 1118 index.i[3] += (int) indir_index.f[3]; 1119 1120 /* for disabled execution channels, zero-out the index to 1121 * avoid using a potential garbage value. 1122 */ 1123 for (i = 0; i < QUAD_SIZE; i++) { 1124 if ((execmask & (1 << i)) == 0) 1125 index.i[i] = 0; 1126 } 1127 } 1128 1129 /* There is an extra source register that is a second 1130 * subscript to a register file. Effectively it means that 1131 * the register file is actually a 2D array of registers. 1132 * 1133 * file[1][3] == file[1*sizeof(file[1])+3], 1134 * where: 1135 * [3] = SrcRegisterDim.Index 1136 */ 1137 if (reg->SrcRegister.Dimension) { 1138 /* The size of the first-order array depends on the register file type. 1139 * We need to multiply the index to the first array to get an effective, 1140 * "flat" index that points to the beginning of the second-order array. 1141 */ 1142 switch (reg->SrcRegister.File) { 1143 case TGSI_FILE_INPUT: 1144 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1145 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1146 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1147 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1148 break; 1149 case TGSI_FILE_CONSTANT: 1150 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1151 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1152 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1153 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1154 break; 1155 default: 1156 assert( 0 ); 1157 } 1158 1159 index.i[0] += reg->SrcRegisterDim.Index; 1160 index.i[1] += reg->SrcRegisterDim.Index; 1161 index.i[2] += reg->SrcRegisterDim.Index; 1162 index.i[3] += reg->SrcRegisterDim.Index; 1163 1164 /* Again, the second subscript index can be addressed indirectly 1165 * identically to the first one. 1166 * Nothing stops us from indirectly addressing the indirect register, 1167 * but there is no need for that, so we won't exercise it. 1168 * 1169 * file[1][ind[4].y+3], 1170 * where: 1171 * ind = SrcRegisterDimInd.File 1172 * [4] = SrcRegisterDimInd.Index 1173 * .y = SrcRegisterDimInd.SwizzleX 1174 */ 1175 if (reg->SrcRegisterDim.Indirect) { 1176 union tgsi_exec_channel index2; 1177 union tgsi_exec_channel indir_index; 1178 const uint execmask = mach->ExecMask; 1179 uint i; 1180 1181 index2.i[0] = 1182 index2.i[1] = 1183 index2.i[2] = 1184 index2.i[3] = reg->SrcRegisterDimInd.Index; 1185 1186 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1187 fetch_src_file_channel( 1188 mach, 1189 reg->SrcRegisterDimInd.File, 1190 swizzle, 1191 &index2, 1192 &indir_index ); 1193 1194 index.i[0] += (int) indir_index.f[0]; 1195 index.i[1] += (int) indir_index.f[1]; 1196 index.i[2] += (int) indir_index.f[2]; 1197 index.i[3] += (int) indir_index.f[3]; 1198 1199 /* for disabled execution channels, zero-out the index to 1200 * avoid using a potential garbage value. 1201 */ 1202 for (i = 0; i < QUAD_SIZE; i++) { 1203 if ((execmask & (1 << i)) == 0) 1204 index.i[i] = 0; 1205 } 1206 } 1207 1208 /* If by any chance there was a need for a 3D array of register 1209 * files, we would have to check whether SrcRegisterDim is followed 1210 * by a dimension register and continue the saga. 1211 */ 1212 } 1213 1214 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1215 fetch_src_file_channel( 1216 mach, 1217 reg->SrcRegister.File, 1218 swizzle, 1219 &index, 1220 chan ); 1221 1222 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1223 case TGSI_UTIL_SIGN_CLEAR: 1224 micro_abs( chan, chan ); 1225 break; 1226 1227 case TGSI_UTIL_SIGN_SET: 1228 micro_abs( chan, chan ); 1229 micro_neg( chan, chan ); 1230 break; 1231 1232 case TGSI_UTIL_SIGN_TOGGLE: 1233 micro_neg( chan, chan ); 1234 break; 1235 1236 case TGSI_UTIL_SIGN_KEEP: 1237 break; 1238 } 1239 1240 if (reg->SrcRegisterExtMod.Complement) { 1241 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1242 } 1243} 1244 1245static void 1246store_dest( 1247 struct tgsi_exec_machine *mach, 1248 const union tgsi_exec_channel *chan, 1249 const struct tgsi_full_dst_register *reg, 1250 const struct tgsi_full_instruction *inst, 1251 uint chan_index ) 1252{ 1253 uint i; 1254 union tgsi_exec_channel null; 1255 union tgsi_exec_channel *dst; 1256 uint execmask = mach->ExecMask; 1257 1258 switch (reg->DstRegister.File) { 1259 case TGSI_FILE_NULL: 1260 dst = &null; 1261 break; 1262 1263 case TGSI_FILE_OUTPUT: 1264 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1265 + reg->DstRegister.Index].xyzw[chan_index]; 1266 break; 1267 1268 case TGSI_FILE_TEMPORARY: 1269 assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); 1270 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 1271 break; 1272 1273 case TGSI_FILE_ADDRESS: 1274 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 1275 break; 1276 1277 default: 1278 assert( 0 ); 1279 return; 1280 } 1281 1282 if (inst->InstructionExtNv.CondFlowEnable) { 1283 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1284 uint swizzle; 1285 uint shift; 1286 uint mask; 1287 uint test; 1288 1289 /* Only CC0 supported. 1290 */ 1291 assert( inst->InstructionExtNv.CondFlowIndex < 1 ); 1292 1293 switch (chan_index) { 1294 case CHAN_X: 1295 swizzle = inst->InstructionExtNv.CondSwizzleX; 1296 break; 1297 case CHAN_Y: 1298 swizzle = inst->InstructionExtNv.CondSwizzleY; 1299 break; 1300 case CHAN_Z: 1301 swizzle = inst->InstructionExtNv.CondSwizzleZ; 1302 break; 1303 case CHAN_W: 1304 swizzle = inst->InstructionExtNv.CondSwizzleW; 1305 break; 1306 default: 1307 assert( 0 ); 1308 return; 1309 } 1310 1311 switch (swizzle) { 1312 case TGSI_SWIZZLE_X: 1313 shift = TGSI_EXEC_CC_X_SHIFT; 1314 mask = TGSI_EXEC_CC_X_MASK; 1315 break; 1316 case TGSI_SWIZZLE_Y: 1317 shift = TGSI_EXEC_CC_Y_SHIFT; 1318 mask = TGSI_EXEC_CC_Y_MASK; 1319 break; 1320 case TGSI_SWIZZLE_Z: 1321 shift = TGSI_EXEC_CC_Z_SHIFT; 1322 mask = TGSI_EXEC_CC_Z_MASK; 1323 break; 1324 case TGSI_SWIZZLE_W: 1325 shift = TGSI_EXEC_CC_W_SHIFT; 1326 mask = TGSI_EXEC_CC_W_MASK; 1327 break; 1328 default: 1329 assert( 0 ); 1330 return; 1331 } 1332 1333 switch (inst->InstructionExtNv.CondMask) { 1334 case TGSI_CC_GT: 1335 test = ~(TGSI_EXEC_CC_GT << shift) & mask; 1336 for (i = 0; i < QUAD_SIZE; i++) 1337 if (cc->u[i] & test) 1338 execmask &= ~(1 << i); 1339 break; 1340 1341 case TGSI_CC_EQ: 1342 test = ~(TGSI_EXEC_CC_EQ << shift) & mask; 1343 for (i = 0; i < QUAD_SIZE; i++) 1344 if (cc->u[i] & test) 1345 execmask &= ~(1 << i); 1346 break; 1347 1348 case TGSI_CC_LT: 1349 test = ~(TGSI_EXEC_CC_LT << shift) & mask; 1350 for (i = 0; i < QUAD_SIZE; i++) 1351 if (cc->u[i] & test) 1352 execmask &= ~(1 << i); 1353 break; 1354 1355 case TGSI_CC_GE: 1356 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; 1357 for (i = 0; i < QUAD_SIZE; i++) 1358 if (cc->u[i] & test) 1359 execmask &= ~(1 << i); 1360 break; 1361 1362 case TGSI_CC_LE: 1363 test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; 1364 for (i = 0; i < QUAD_SIZE; i++) 1365 if (cc->u[i] & test) 1366 execmask &= ~(1 << i); 1367 break; 1368 1369 case TGSI_CC_NE: 1370 test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; 1371 for (i = 0; i < QUAD_SIZE; i++) 1372 if (cc->u[i] & test) 1373 execmask &= ~(1 << i); 1374 break; 1375 1376 case TGSI_CC_TR: 1377 break; 1378 1379 case TGSI_CC_FL: 1380 for (i = 0; i < QUAD_SIZE; i++) 1381 execmask &= ~(1 << i); 1382 break; 1383 1384 default: 1385 assert( 0 ); 1386 return; 1387 } 1388 } 1389 1390 switch (inst->Instruction.Saturate) { 1391 case TGSI_SAT_NONE: 1392 for (i = 0; i < QUAD_SIZE; i++) 1393 if (execmask & (1 << i)) 1394 dst->i[i] = chan->i[i]; 1395 break; 1396 1397 case TGSI_SAT_ZERO_ONE: 1398 for (i = 0; i < QUAD_SIZE; i++) 1399 if (execmask & (1 << i)) { 1400 if (chan->f[i] < 0.0f) 1401 dst->f[i] = 0.0f; 1402 else if (chan->f[i] > 1.0f) 1403 dst->f[i] = 1.0f; 1404 else 1405 dst->i[i] = chan->i[i]; 1406 } 1407 break; 1408 1409 case TGSI_SAT_MINUS_PLUS_ONE: 1410 for (i = 0; i < QUAD_SIZE; i++) 1411 if (execmask & (1 << i)) { 1412 if (chan->f[i] < -1.0f) 1413 dst->f[i] = -1.0f; 1414 else if (chan->f[i] > 1.0f) 1415 dst->f[i] = 1.0f; 1416 else 1417 dst->i[i] = chan->i[i]; 1418 } 1419 break; 1420 1421 default: 1422 assert( 0 ); 1423 } 1424 1425 if (inst->InstructionExtNv.CondDstUpdate) { 1426 union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; 1427 uint shift; 1428 uint mask; 1429 1430 /* Only CC0 supported. 1431 */ 1432 assert( inst->InstructionExtNv.CondDstIndex < 1 ); 1433 1434 switch (chan_index) { 1435 case CHAN_X: 1436 shift = TGSI_EXEC_CC_X_SHIFT; 1437 mask = ~TGSI_EXEC_CC_X_MASK; 1438 break; 1439 case CHAN_Y: 1440 shift = TGSI_EXEC_CC_Y_SHIFT; 1441 mask = ~TGSI_EXEC_CC_Y_MASK; 1442 break; 1443 case CHAN_Z: 1444 shift = TGSI_EXEC_CC_Z_SHIFT; 1445 mask = ~TGSI_EXEC_CC_Z_MASK; 1446 break; 1447 case CHAN_W: 1448 shift = TGSI_EXEC_CC_W_SHIFT; 1449 mask = ~TGSI_EXEC_CC_W_MASK; 1450 break; 1451 default: 1452 assert( 0 ); 1453 return; 1454 } 1455 1456 for (i = 0; i < QUAD_SIZE; i++) 1457 if (execmask & (1 << i)) { 1458 cc->u[i] &= mask; 1459 if (dst->f[i] < 0.0f) 1460 cc->u[i] |= TGSI_EXEC_CC_LT << shift; 1461 else if (dst->f[i] > 0.0f) 1462 cc->u[i] |= TGSI_EXEC_CC_GT << shift; 1463 else if (dst->f[i] == 0.0f) 1464 cc->u[i] |= TGSI_EXEC_CC_EQ << shift; 1465 else 1466 cc->u[i] |= TGSI_EXEC_CC_UN << shift; 1467 } 1468 } 1469} 1470 1471#define FETCH(VAL,INDEX,CHAN)\ 1472 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1473 1474#define STORE(VAL,INDEX,CHAN)\ 1475 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1476 1477 1478/** 1479 * Execute ARB-style KIL which is predicated by a src register. 1480 * Kill fragment if any of the four values is less than zero. 1481 */ 1482static void 1483exec_kil(struct tgsi_exec_machine *mach, 1484 const struct tgsi_full_instruction *inst) 1485{ 1486 uint uniquemask; 1487 uint chan_index; 1488 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1489 union tgsi_exec_channel r[1]; 1490 1491 /* This mask stores component bits that were already tested. Note that 1492 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1493 * tested. */ 1494 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1495 1496 for (chan_index = 0; chan_index < 4; chan_index++) 1497 { 1498 uint swizzle; 1499 uint i; 1500 1501 /* unswizzle channel */ 1502 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1503 &inst->FullSrcRegisters[0], 1504 chan_index); 1505 1506 /* check if the component has not been already tested */ 1507 if (uniquemask & (1 << swizzle)) 1508 continue; 1509 uniquemask |= 1 << swizzle; 1510 1511 FETCH(&r[0], 0, chan_index); 1512 for (i = 0; i < 4; i++) 1513 if (r[0].f[i] < 0.0f) 1514 kilmask |= 1 << i; 1515 } 1516 1517 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1518} 1519 1520/** 1521 * Execute NVIDIA-style KIL which is predicated by a condition code. 1522 * Kill fragment if the condition code is TRUE. 1523 */ 1524static void 1525exec_kilp(struct tgsi_exec_machine *mach, 1526 const struct tgsi_full_instruction *inst) 1527{ 1528 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1529 1530 if (inst->InstructionExtNv.CondFlowEnable) { 1531 uint swizzle[4]; 1532 uint chan_index; 1533 1534 kilmask = 0x0; 1535 1536 swizzle[0] = inst->InstructionExtNv.CondSwizzleX; 1537 swizzle[1] = inst->InstructionExtNv.CondSwizzleY; 1538 swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; 1539 swizzle[3] = inst->InstructionExtNv.CondSwizzleW; 1540 1541 for (chan_index = 0; chan_index < 4; chan_index++) 1542 { 1543 uint i; 1544 1545 for (i = 0; i < 4; i++) { 1546 /* TODO: evaluate the condition code */ 1547 if (0) 1548 kilmask |= 1 << i; 1549 } 1550 } 1551 } 1552 else { 1553 /* "unconditional" kil */ 1554 kilmask = mach->ExecMask; 1555 } 1556 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1557} 1558 1559 1560/* 1561 * Fetch a four texture samples using STR texture coordinates. 1562 */ 1563static void 1564fetch_texel( struct tgsi_sampler *sampler, 1565 const union tgsi_exec_channel *s, 1566 const union tgsi_exec_channel *t, 1567 const union tgsi_exec_channel *p, 1568 float lodbias, /* XXX should be float[4] */ 1569 union tgsi_exec_channel *r, 1570 union tgsi_exec_channel *g, 1571 union tgsi_exec_channel *b, 1572 union tgsi_exec_channel *a ) 1573{ 1574 uint j; 1575 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1576 1577 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1578 1579 for (j = 0; j < 4; j++) { 1580 r->f[j] = rgba[0][j]; 1581 g->f[j] = rgba[1][j]; 1582 b->f[j] = rgba[2][j]; 1583 a->f[j] = rgba[3][j]; 1584 } 1585} 1586 1587 1588static void 1589exec_tex(struct tgsi_exec_machine *mach, 1590 const struct tgsi_full_instruction *inst, 1591 boolean biasLod, 1592 boolean projected) 1593{ 1594 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1595 union tgsi_exec_channel r[4]; 1596 uint chan_index; 1597 float lodBias; 1598 1599 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1600 1601 switch (inst->InstructionExtTexture.Texture) { 1602 case TGSI_TEXTURE_1D: 1603 case TGSI_TEXTURE_SHADOW1D: 1604 1605 FETCH(&r[0], 0, CHAN_X); 1606 1607 if (projected) { 1608 FETCH(&r[1], 0, CHAN_W); 1609 micro_div( &r[0], &r[0], &r[1] ); 1610 } 1611 1612 if (biasLod) { 1613 FETCH(&r[1], 0, CHAN_W); 1614 lodBias = r[2].f[0]; 1615 } 1616 else 1617 lodBias = 0.0; 1618 1619 fetch_texel(mach->Samplers[unit], 1620 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ 1621 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1622 break; 1623 1624 case TGSI_TEXTURE_2D: 1625 case TGSI_TEXTURE_RECT: 1626 case TGSI_TEXTURE_SHADOW2D: 1627 case TGSI_TEXTURE_SHADOWRECT: 1628 1629 FETCH(&r[0], 0, CHAN_X); 1630 FETCH(&r[1], 0, CHAN_Y); 1631 FETCH(&r[2], 0, CHAN_Z); 1632 1633 if (projected) { 1634 FETCH(&r[3], 0, CHAN_W); 1635 micro_div( &r[0], &r[0], &r[3] ); 1636 micro_div( &r[1], &r[1], &r[3] ); 1637 micro_div( &r[2], &r[2], &r[3] ); 1638 } 1639 1640 if (biasLod) { 1641 FETCH(&r[3], 0, CHAN_W); 1642 lodBias = r[3].f[0]; 1643 } 1644 else 1645 lodBias = 0.0; 1646 1647 fetch_texel(mach->Samplers[unit], 1648 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1649 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1650 break; 1651 1652 case TGSI_TEXTURE_3D: 1653 case TGSI_TEXTURE_CUBE: 1654 1655 FETCH(&r[0], 0, CHAN_X); 1656 FETCH(&r[1], 0, CHAN_Y); 1657 FETCH(&r[2], 0, CHAN_Z); 1658 1659 if (projected) { 1660 FETCH(&r[3], 0, CHAN_W); 1661 micro_div( &r[0], &r[0], &r[3] ); 1662 micro_div( &r[1], &r[1], &r[3] ); 1663 micro_div( &r[2], &r[2], &r[3] ); 1664 } 1665 1666 if (biasLod) { 1667 FETCH(&r[3], 0, CHAN_W); 1668 lodBias = r[3].f[0]; 1669 } 1670 else 1671 lodBias = 0.0; 1672 1673 fetch_texel(mach->Samplers[unit], 1674 &r[0], &r[1], &r[2], lodBias, 1675 &r[0], &r[1], &r[2], &r[3]); 1676 break; 1677 1678 default: 1679 assert (0); 1680 } 1681 1682 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1683 STORE( &r[chan_index], 0, chan_index ); 1684 } 1685} 1686 1687 1688/** 1689 * Evaluate a constant-valued coefficient at the position of the 1690 * current quad. 1691 */ 1692static void 1693eval_constant_coef( 1694 struct tgsi_exec_machine *mach, 1695 unsigned attrib, 1696 unsigned chan ) 1697{ 1698 unsigned i; 1699 1700 for( i = 0; i < QUAD_SIZE; i++ ) { 1701 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1702 } 1703} 1704 1705/** 1706 * Evaluate a linear-valued coefficient at the position of the 1707 * current quad. 1708 */ 1709static void 1710eval_linear_coef( 1711 struct tgsi_exec_machine *mach, 1712 unsigned attrib, 1713 unsigned chan ) 1714{ 1715 const float x = mach->QuadPos.xyzw[0].f[0]; 1716 const float y = mach->QuadPos.xyzw[1].f[0]; 1717 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1718 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1719 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1720 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1721 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1722 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1723 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1724} 1725 1726/** 1727 * Evaluate a perspective-valued coefficient at the position of the 1728 * current quad. 1729 */ 1730static void 1731eval_perspective_coef( 1732 struct tgsi_exec_machine *mach, 1733 unsigned attrib, 1734 unsigned chan ) 1735{ 1736 const float x = mach->QuadPos.xyzw[0].f[0]; 1737 const float y = mach->QuadPos.xyzw[1].f[0]; 1738 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1739 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1740 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1741 const float *w = mach->QuadPos.xyzw[3].f; 1742 /* divide by W here */ 1743 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1744 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1745 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1746 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1747} 1748 1749 1750typedef void (* eval_coef_func)( 1751 struct tgsi_exec_machine *mach, 1752 unsigned attrib, 1753 unsigned chan ); 1754 1755static void 1756exec_declaration( 1757 struct tgsi_exec_machine *mach, 1758 const struct tgsi_full_declaration *decl ) 1759{ 1760 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1761 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1762 unsigned first, last, mask; 1763 eval_coef_func eval; 1764 1765 first = decl->DeclarationRange.First; 1766 last = decl->DeclarationRange.Last; 1767 mask = decl->Declaration.UsageMask; 1768 1769 switch( decl->Declaration.Interpolate ) { 1770 case TGSI_INTERPOLATE_CONSTANT: 1771 eval = eval_constant_coef; 1772 break; 1773 1774 case TGSI_INTERPOLATE_LINEAR: 1775 eval = eval_linear_coef; 1776 break; 1777 1778 case TGSI_INTERPOLATE_PERSPECTIVE: 1779 eval = eval_perspective_coef; 1780 break; 1781 1782 default: 1783 eval = NULL; 1784 assert( 0 ); 1785 } 1786 1787 if( mask == TGSI_WRITEMASK_XYZW ) { 1788 unsigned i, j; 1789 1790 for( i = first; i <= last; i++ ) { 1791 for( j = 0; j < NUM_CHANNELS; j++ ) { 1792 eval( mach, i, j ); 1793 } 1794 } 1795 } 1796 else { 1797 unsigned i, j; 1798 1799 for( j = 0; j < NUM_CHANNELS; j++ ) { 1800 if( mask & (1 << j) ) { 1801 for( i = first; i <= last; i++ ) { 1802 eval( mach, i, j ); 1803 } 1804 } 1805 } 1806 } 1807 } 1808 } 1809} 1810 1811static void 1812exec_instruction( 1813 struct tgsi_exec_machine *mach, 1814 const struct tgsi_full_instruction *inst, 1815 int *pc ) 1816{ 1817 uint chan_index; 1818 union tgsi_exec_channel r[8]; 1819 1820 (*pc)++; 1821 1822 switch (inst->Instruction.Opcode) { 1823 case TGSI_OPCODE_ARL: 1824 /* TGSI_OPCODE_FLOOR */ 1825 /* TGSI_OPCODE_FLR */ 1826 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1827 FETCH( &r[0], 0, chan_index ); 1828 micro_flr( &r[0], &r[0] ); 1829 STORE( &r[0], 0, chan_index ); 1830 } 1831 break; 1832 1833 case TGSI_OPCODE_MOV: 1834 case TGSI_OPCODE_SWZ: 1835 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1836 FETCH( &r[0], 0, chan_index ); 1837 STORE( &r[0], 0, chan_index ); 1838 } 1839 break; 1840 1841 case TGSI_OPCODE_LIT: 1842 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1843 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1844 } 1845 1846 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1847 FETCH( &r[0], 0, CHAN_X ); 1848 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1849 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1850 STORE( &r[0], 0, CHAN_Y ); 1851 } 1852 1853 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1854 FETCH( &r[1], 0, CHAN_Y ); 1855 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1856 1857 FETCH( &r[2], 0, CHAN_W ); 1858 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1859 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1860 micro_pow( &r[1], &r[1], &r[2] ); 1861 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1862 STORE( &r[0], 0, CHAN_Z ); 1863 } 1864 } 1865 1866 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1867 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1868 } 1869 break; 1870 1871 case TGSI_OPCODE_RCP: 1872 /* TGSI_OPCODE_RECIP */ 1873 FETCH( &r[0], 0, CHAN_X ); 1874 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1875 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1876 STORE( &r[0], 0, chan_index ); 1877 } 1878 break; 1879 1880 case TGSI_OPCODE_RSQ: 1881 /* TGSI_OPCODE_RECIPSQRT */ 1882 FETCH( &r[0], 0, CHAN_X ); 1883 micro_abs( &r[0], &r[0] ); 1884 micro_sqrt( &r[0], &r[0] ); 1885 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1886 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1887 STORE( &r[0], 0, chan_index ); 1888 } 1889 break; 1890 1891 case TGSI_OPCODE_EXP: 1892 FETCH( &r[0], 0, CHAN_X ); 1893 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1894 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1895 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1896 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1897 } 1898 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1899 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1900 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1901 } 1902 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1903 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1904 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1905 } 1906 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1907 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1908 } 1909 break; 1910 1911 case TGSI_OPCODE_LOG: 1912 FETCH( &r[0], 0, CHAN_X ); 1913 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1914 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1915 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1916 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1917 STORE( &r[0], 0, CHAN_X ); 1918 } 1919 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1920 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1921 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 1922 STORE( &r[0], 0, CHAN_Y ); 1923 } 1924 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1925 STORE( &r[1], 0, CHAN_Z ); 1926 } 1927 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1928 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1929 } 1930 break; 1931 1932 case TGSI_OPCODE_MUL: 1933 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1934 { 1935 FETCH(&r[0], 0, chan_index); 1936 FETCH(&r[1], 1, chan_index); 1937 1938 micro_mul( &r[0], &r[0], &r[1] ); 1939 1940 STORE(&r[0], 0, chan_index); 1941 } 1942 break; 1943 1944 case TGSI_OPCODE_ADD: 1945 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1946 FETCH( &r[0], 0, chan_index ); 1947 FETCH( &r[1], 1, chan_index ); 1948 micro_add( &r[0], &r[0], &r[1] ); 1949 STORE( &r[0], 0, chan_index ); 1950 } 1951 break; 1952 1953 case TGSI_OPCODE_DP3: 1954 /* TGSI_OPCODE_DOT3 */ 1955 FETCH( &r[0], 0, CHAN_X ); 1956 FETCH( &r[1], 1, CHAN_X ); 1957 micro_mul( &r[0], &r[0], &r[1] ); 1958 1959 FETCH( &r[1], 0, CHAN_Y ); 1960 FETCH( &r[2], 1, CHAN_Y ); 1961 micro_mul( &r[1], &r[1], &r[2] ); 1962 micro_add( &r[0], &r[0], &r[1] ); 1963 1964 FETCH( &r[1], 0, CHAN_Z ); 1965 FETCH( &r[2], 1, CHAN_Z ); 1966 micro_mul( &r[1], &r[1], &r[2] ); 1967 micro_add( &r[0], &r[0], &r[1] ); 1968 1969 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1970 STORE( &r[0], 0, chan_index ); 1971 } 1972 break; 1973 1974 case TGSI_OPCODE_DP4: 1975 /* TGSI_OPCODE_DOT4 */ 1976 FETCH(&r[0], 0, CHAN_X); 1977 FETCH(&r[1], 1, CHAN_X); 1978 1979 micro_mul( &r[0], &r[0], &r[1] ); 1980 1981 FETCH(&r[1], 0, CHAN_Y); 1982 FETCH(&r[2], 1, CHAN_Y); 1983 1984 micro_mul( &r[1], &r[1], &r[2] ); 1985 micro_add( &r[0], &r[0], &r[1] ); 1986 1987 FETCH(&r[1], 0, CHAN_Z); 1988 FETCH(&r[2], 1, CHAN_Z); 1989 1990 micro_mul( &r[1], &r[1], &r[2] ); 1991 micro_add( &r[0], &r[0], &r[1] ); 1992 1993 FETCH(&r[1], 0, CHAN_W); 1994 FETCH(&r[2], 1, CHAN_W); 1995 1996 micro_mul( &r[1], &r[1], &r[2] ); 1997 micro_add( &r[0], &r[0], &r[1] ); 1998 1999 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2000 STORE( &r[0], 0, chan_index ); 2001 } 2002 break; 2003 2004 case TGSI_OPCODE_DST: 2005 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2006 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2007 } 2008 2009 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2010 FETCH( &r[0], 0, CHAN_Y ); 2011 FETCH( &r[1], 1, CHAN_Y); 2012 micro_mul( &r[0], &r[0], &r[1] ); 2013 STORE( &r[0], 0, CHAN_Y ); 2014 } 2015 2016 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2017 FETCH( &r[0], 0, CHAN_Z ); 2018 STORE( &r[0], 0, CHAN_Z ); 2019 } 2020 2021 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2022 FETCH( &r[0], 1, CHAN_W ); 2023 STORE( &r[0], 0, CHAN_W ); 2024 } 2025 break; 2026 2027 case TGSI_OPCODE_MIN: 2028 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2029 FETCH(&r[0], 0, chan_index); 2030 FETCH(&r[1], 1, chan_index); 2031 2032 /* XXX use micro_min()?? */ 2033 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2034 2035 STORE(&r[0], 0, chan_index); 2036 } 2037 break; 2038 2039 case TGSI_OPCODE_MAX: 2040 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2041 FETCH(&r[0], 0, chan_index); 2042 FETCH(&r[1], 1, chan_index); 2043 2044 /* XXX use micro_max()?? */ 2045 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2046 2047 STORE(&r[0], 0, chan_index ); 2048 } 2049 break; 2050 2051 case TGSI_OPCODE_SLT: 2052 /* TGSI_OPCODE_SETLT */ 2053 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2054 FETCH( &r[0], 0, chan_index ); 2055 FETCH( &r[1], 1, chan_index ); 2056 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2057 STORE( &r[0], 0, chan_index ); 2058 } 2059 break; 2060 2061 case TGSI_OPCODE_SGE: 2062 /* TGSI_OPCODE_SETGE */ 2063 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2064 FETCH( &r[0], 0, chan_index ); 2065 FETCH( &r[1], 1, chan_index ); 2066 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2067 STORE( &r[0], 0, chan_index ); 2068 } 2069 break; 2070 2071 case TGSI_OPCODE_MAD: 2072 /* TGSI_OPCODE_MADD */ 2073 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2074 FETCH( &r[0], 0, chan_index ); 2075 FETCH( &r[1], 1, chan_index ); 2076 micro_mul( &r[0], &r[0], &r[1] ); 2077 FETCH( &r[1], 2, chan_index ); 2078 micro_add( &r[0], &r[0], &r[1] ); 2079 STORE( &r[0], 0, chan_index ); 2080 } 2081 break; 2082 2083 case TGSI_OPCODE_SUB: 2084 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2085 FETCH(&r[0], 0, chan_index); 2086 FETCH(&r[1], 1, chan_index); 2087 2088 micro_sub( &r[0], &r[0], &r[1] ); 2089 2090 STORE(&r[0], 0, chan_index); 2091 } 2092 break; 2093 2094 case TGSI_OPCODE_LERP: 2095 /* TGSI_OPCODE_LRP */ 2096 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2097 FETCH(&r[0], 0, chan_index); 2098 FETCH(&r[1], 1, chan_index); 2099 FETCH(&r[2], 2, chan_index); 2100 2101 micro_sub( &r[1], &r[1], &r[2] ); 2102 micro_mul( &r[0], &r[0], &r[1] ); 2103 micro_add( &r[0], &r[0], &r[2] ); 2104 2105 STORE(&r[0], 0, chan_index); 2106 } 2107 break; 2108 2109 case TGSI_OPCODE_CND: 2110 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2111 FETCH(&r[0], 0, chan_index); 2112 FETCH(&r[1], 1, chan_index); 2113 FETCH(&r[2], 2, chan_index); 2114 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2115 STORE(&r[0], 0, chan_index); 2116 } 2117 break; 2118 2119 case TGSI_OPCODE_CND0: 2120 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2121 FETCH(&r[0], 0, chan_index); 2122 FETCH(&r[1], 1, chan_index); 2123 FETCH(&r[2], 2, chan_index); 2124 micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); 2125 STORE(&r[0], 0, chan_index); 2126 } 2127 break; 2128 2129 case TGSI_OPCODE_DOT2ADD: 2130 /* TGSI_OPCODE_DP2A */ 2131 FETCH( &r[0], 0, CHAN_X ); 2132 FETCH( &r[1], 1, CHAN_X ); 2133 micro_mul( &r[0], &r[0], &r[1] ); 2134 2135 FETCH( &r[1], 0, CHAN_Y ); 2136 FETCH( &r[2], 1, CHAN_Y ); 2137 micro_mul( &r[1], &r[1], &r[2] ); 2138 micro_add( &r[0], &r[0], &r[1] ); 2139 2140 FETCH( &r[2], 2, CHAN_X ); 2141 micro_add( &r[0], &r[0], &r[2] ); 2142 2143 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2144 STORE( &r[0], 0, chan_index ); 2145 } 2146 break; 2147 2148 case TGSI_OPCODE_INDEX: 2149 /* XXX: considered for removal */ 2150 assert (0); 2151 break; 2152 2153 case TGSI_OPCODE_NEGATE: 2154 /* XXX: considered for removal */ 2155 assert (0); 2156 break; 2157 2158 case TGSI_OPCODE_FRAC: 2159 /* TGSI_OPCODE_FRC */ 2160 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2161 FETCH( &r[0], 0, chan_index ); 2162 micro_frc( &r[0], &r[0] ); 2163 STORE( &r[0], 0, chan_index ); 2164 } 2165 break; 2166 2167 case TGSI_OPCODE_CLAMP: 2168 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2169 FETCH(&r[0], 0, chan_index); 2170 FETCH(&r[1], 1, chan_index); 2171 micro_max(&r[0], &r[0], &r[1]); 2172 FETCH(&r[1], 2, chan_index); 2173 micro_min(&r[0], &r[0], &r[1]); 2174 STORE(&r[0], 0, chan_index); 2175 } 2176 break; 2177 2178 case TGSI_OPCODE_ROUND: 2179 case TGSI_OPCODE_ARR: 2180 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2181 FETCH( &r[0], 0, chan_index ); 2182 micro_rnd( &r[0], &r[0] ); 2183 STORE( &r[0], 0, chan_index ); 2184 } 2185 break; 2186 2187 case TGSI_OPCODE_EXPBASE2: 2188 /* TGSI_OPCODE_EX2 */ 2189 FETCH(&r[0], 0, CHAN_X); 2190 2191#if FAST_MATH 2192 micro_exp2( &r[0], &r[0] ); 2193#else 2194 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2195#endif 2196 2197 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2198 STORE( &r[0], 0, chan_index ); 2199 } 2200 break; 2201 2202 case TGSI_OPCODE_LOGBASE2: 2203 /* TGSI_OPCODE_LG2 */ 2204 FETCH( &r[0], 0, CHAN_X ); 2205 micro_lg2( &r[0], &r[0] ); 2206 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2207 STORE( &r[0], 0, chan_index ); 2208 } 2209 break; 2210 2211 case TGSI_OPCODE_POWER: 2212 /* TGSI_OPCODE_POW */ 2213 FETCH(&r[0], 0, CHAN_X); 2214 FETCH(&r[1], 1, CHAN_X); 2215 2216 micro_pow( &r[0], &r[0], &r[1] ); 2217 2218 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2219 STORE( &r[0], 0, chan_index ); 2220 } 2221 break; 2222 2223 case TGSI_OPCODE_CROSSPRODUCT: 2224 /* TGSI_OPCODE_XPD */ 2225 FETCH(&r[0], 0, CHAN_Y); 2226 FETCH(&r[1], 1, CHAN_Z); 2227 2228 micro_mul( &r[2], &r[0], &r[1] ); 2229 2230 FETCH(&r[3], 0, CHAN_Z); 2231 FETCH(&r[4], 1, CHAN_Y); 2232 2233 micro_mul( &r[5], &r[3], &r[4] ); 2234 micro_sub( &r[2], &r[2], &r[5] ); 2235 2236 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2237 STORE( &r[2], 0, CHAN_X ); 2238 } 2239 2240 FETCH(&r[2], 1, CHAN_X); 2241 2242 micro_mul( &r[3], &r[3], &r[2] ); 2243 2244 FETCH(&r[5], 0, CHAN_X); 2245 2246 micro_mul( &r[1], &r[1], &r[5] ); 2247 micro_sub( &r[3], &r[3], &r[1] ); 2248 2249 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2250 STORE( &r[3], 0, CHAN_Y ); 2251 } 2252 2253 micro_mul( &r[5], &r[5], &r[4] ); 2254 micro_mul( &r[0], &r[0], &r[2] ); 2255 micro_sub( &r[5], &r[5], &r[0] ); 2256 2257 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2258 STORE( &r[5], 0, CHAN_Z ); 2259 } 2260 2261 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2262 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2263 } 2264 break; 2265 2266 case TGSI_OPCODE_MULTIPLYMATRIX: 2267 /* XXX: considered for removal */ 2268 assert (0); 2269 break; 2270 2271 case TGSI_OPCODE_ABS: 2272 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2273 FETCH(&r[0], 0, chan_index); 2274 2275 micro_abs( &r[0], &r[0] ); 2276 2277 STORE(&r[0], 0, chan_index); 2278 } 2279 break; 2280 2281 case TGSI_OPCODE_RCC: 2282 assert (0); 2283 break; 2284 2285 case TGSI_OPCODE_DPH: 2286 FETCH(&r[0], 0, CHAN_X); 2287 FETCH(&r[1], 1, CHAN_X); 2288 2289 micro_mul( &r[0], &r[0], &r[1] ); 2290 2291 FETCH(&r[1], 0, CHAN_Y); 2292 FETCH(&r[2], 1, CHAN_Y); 2293 2294 micro_mul( &r[1], &r[1], &r[2] ); 2295 micro_add( &r[0], &r[0], &r[1] ); 2296 2297 FETCH(&r[1], 0, CHAN_Z); 2298 FETCH(&r[2], 1, CHAN_Z); 2299 2300 micro_mul( &r[1], &r[1], &r[2] ); 2301 micro_add( &r[0], &r[0], &r[1] ); 2302 2303 FETCH(&r[1], 1, CHAN_W); 2304 2305 micro_add( &r[0], &r[0], &r[1] ); 2306 2307 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2308 STORE( &r[0], 0, chan_index ); 2309 } 2310 break; 2311 2312 case TGSI_OPCODE_COS: 2313 FETCH(&r[0], 0, CHAN_X); 2314 2315 micro_cos( &r[0], &r[0] ); 2316 2317 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2318 STORE( &r[0], 0, chan_index ); 2319 } 2320 break; 2321 2322 case TGSI_OPCODE_DDX: 2323 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2324 FETCH( &r[0], 0, chan_index ); 2325 micro_ddx( &r[0], &r[0] ); 2326 STORE( &r[0], 0, chan_index ); 2327 } 2328 break; 2329 2330 case TGSI_OPCODE_DDY: 2331 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2332 FETCH( &r[0], 0, chan_index ); 2333 micro_ddy( &r[0], &r[0] ); 2334 STORE( &r[0], 0, chan_index ); 2335 } 2336 break; 2337 2338 case TGSI_OPCODE_KILP: 2339 exec_kilp (mach, inst); 2340 break; 2341 2342 case TGSI_OPCODE_KIL: 2343 exec_kil (mach, inst); 2344 break; 2345 2346 case TGSI_OPCODE_PK2H: 2347 assert (0); 2348 break; 2349 2350 case TGSI_OPCODE_PK2US: 2351 assert (0); 2352 break; 2353 2354 case TGSI_OPCODE_PK4B: 2355 assert (0); 2356 break; 2357 2358 case TGSI_OPCODE_PK4UB: 2359 assert (0); 2360 break; 2361 2362 case TGSI_OPCODE_RFL: 2363 assert (0); 2364 break; 2365 2366 case TGSI_OPCODE_SEQ: 2367 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2368 FETCH( &r[0], 0, chan_index ); 2369 FETCH( &r[1], 1, chan_index ); 2370 micro_eq( &r[0], &r[0], &r[1], 2371 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2372 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2373 STORE( &r[0], 0, chan_index ); 2374 } 2375 break; 2376 2377 case TGSI_OPCODE_SFL: 2378 assert (0); 2379 break; 2380 2381 case TGSI_OPCODE_SGT: 2382 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2383 FETCH( &r[0], 0, chan_index ); 2384 FETCH( &r[1], 1, chan_index ); 2385 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2386 STORE( &r[0], 0, chan_index ); 2387 } 2388 break; 2389 2390 case TGSI_OPCODE_SIN: 2391 FETCH( &r[0], 0, CHAN_X ); 2392 micro_sin( &r[0], &r[0] ); 2393 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2394 STORE( &r[0], 0, chan_index ); 2395 } 2396 break; 2397 2398 case TGSI_OPCODE_SLE: 2399 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2400 FETCH( &r[0], 0, chan_index ); 2401 FETCH( &r[1], 1, chan_index ); 2402 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2403 STORE( &r[0], 0, chan_index ); 2404 } 2405 break; 2406 2407 case TGSI_OPCODE_SNE: 2408 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2409 FETCH( &r[0], 0, chan_index ); 2410 FETCH( &r[1], 1, chan_index ); 2411 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2412 STORE( &r[0], 0, chan_index ); 2413 } 2414 break; 2415 2416 case TGSI_OPCODE_STR: 2417 assert (0); 2418 break; 2419 2420 case TGSI_OPCODE_TEX: 2421 /* simple texture lookup */ 2422 /* src[0] = texcoord */ 2423 /* src[1] = sampler unit */ 2424 exec_tex(mach, inst, FALSE, FALSE); 2425 break; 2426 2427 case TGSI_OPCODE_TXB: 2428 /* Texture lookup with lod bias */ 2429 /* src[0] = texcoord (src[0].w = LOD bias) */ 2430 /* src[1] = sampler unit */ 2431 exec_tex(mach, inst, TRUE, FALSE); 2432 break; 2433 2434 case TGSI_OPCODE_TXD: 2435 /* Texture lookup with explict partial derivatives */ 2436 /* src[0] = texcoord */ 2437 /* src[1] = d[strq]/dx */ 2438 /* src[2] = d[strq]/dy */ 2439 /* src[3] = sampler unit */ 2440 assert (0); 2441 break; 2442 2443 case TGSI_OPCODE_TXL: 2444 /* Texture lookup with explit LOD */ 2445 /* src[0] = texcoord (src[0].w = LOD) */ 2446 /* src[1] = sampler unit */ 2447 exec_tex(mach, inst, TRUE, FALSE); 2448 break; 2449 2450 case TGSI_OPCODE_TXP: 2451 /* Texture lookup with projection */ 2452 /* src[0] = texcoord (src[0].w = projection) */ 2453 /* src[1] = sampler unit */ 2454 exec_tex(mach, inst, FALSE, TRUE); 2455 break; 2456 2457 case TGSI_OPCODE_UP2H: 2458 assert (0); 2459 break; 2460 2461 case TGSI_OPCODE_UP2US: 2462 assert (0); 2463 break; 2464 2465 case TGSI_OPCODE_UP4B: 2466 assert (0); 2467 break; 2468 2469 case TGSI_OPCODE_UP4UB: 2470 assert (0); 2471 break; 2472 2473 case TGSI_OPCODE_X2D: 2474 assert (0); 2475 break; 2476 2477 case TGSI_OPCODE_ARA: 2478 assert (0); 2479 break; 2480 2481 case TGSI_OPCODE_BRA: 2482 assert (0); 2483 break; 2484 2485 case TGSI_OPCODE_CAL: 2486 /* skip the call if no execution channels are enabled */ 2487 if (mach->ExecMask) { 2488 /* do the call */ 2489 2490 /* push the Cond, Loop, Cont stacks */ 2491 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2492 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2493 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2494 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2495 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2496 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2497 2498 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2499 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2500 2501 /* note that PC was already incremented above */ 2502 mach->CallStack[mach->CallStackTop++] = *pc; 2503 *pc = inst->InstructionExtLabel.Label; 2504 } 2505 break; 2506 2507 case TGSI_OPCODE_RET: 2508 mach->FuncMask &= ~mach->ExecMask; 2509 UPDATE_EXEC_MASK(mach); 2510 2511 if (mach->FuncMask == 0x0) { 2512 /* really return now (otherwise, keep executing */ 2513 2514 if (mach->CallStackTop == 0) { 2515 /* returning from main() */ 2516 *pc = -1; 2517 return; 2518 } 2519 *pc = mach->CallStack[--mach->CallStackTop]; 2520 2521 /* pop the Cond, Loop, Cont stacks */ 2522 assert(mach->CondStackTop > 0); 2523 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2524 assert(mach->LoopStackTop > 0); 2525 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2526 assert(mach->ContStackTop > 0); 2527 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2528 assert(mach->FuncStackTop > 0); 2529 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2530 2531 UPDATE_EXEC_MASK(mach); 2532 } 2533 break; 2534 2535 case TGSI_OPCODE_SSG: 2536 /* TGSI_OPCODE_SGN */ 2537 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2538 FETCH( &r[0], 0, chan_index ); 2539 micro_sgn( &r[0], &r[0] ); 2540 STORE( &r[0], 0, chan_index ); 2541 } 2542 break; 2543 2544 case TGSI_OPCODE_CMP: 2545 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2546 FETCH(&r[0], 0, chan_index); 2547 FETCH(&r[1], 1, chan_index); 2548 FETCH(&r[2], 2, chan_index); 2549 2550 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2551 2552 STORE(&r[0], 0, chan_index); 2553 } 2554 break; 2555 2556 case TGSI_OPCODE_SCS: 2557 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2558 FETCH( &r[0], 0, CHAN_X ); 2559 } 2560 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { 2561 micro_cos( &r[1], &r[0] ); 2562 STORE( &r[1], 0, CHAN_X ); 2563 } 2564 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2565 micro_sin( &r[1], &r[0] ); 2566 STORE( &r[1], 0, CHAN_Y ); 2567 } 2568 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2569 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2570 } 2571 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2572 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2573 } 2574 break; 2575 2576 case TGSI_OPCODE_NRM: 2577 /* 3-component vector normalize */ 2578 { 2579 union tgsi_exec_channel tmp, dot; 2580 2581 /* tmp = dp3(src0, src0): */ 2582 FETCH( &r[0], 0, CHAN_X ); 2583 micro_mul( &tmp, &r[0], &r[0] ); 2584 2585 FETCH( &r[1], 0, CHAN_Y ); 2586 micro_mul( &dot, &r[1], &r[1] ); 2587 micro_add( &tmp, &tmp, &dot ); 2588 2589 FETCH( &r[2], 0, CHAN_Z ); 2590 micro_mul( &dot, &r[2], &r[2] ); 2591 micro_add( &tmp, &tmp, &dot ); 2592 2593 /* tmp = 1 / sqrt(tmp) */ 2594 micro_sqrt( &tmp, &tmp ); 2595 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2596 2597 /* note: w channel is undefined */ 2598 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2599 /* chan = chan * tmp */ 2600 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2601 STORE( &r[chan_index], 0, chan_index ); 2602 } 2603 } 2604 break; 2605 2606 case TGSI_OPCODE_NRM4: 2607 /* 4-component vector normalize */ 2608 { 2609 union tgsi_exec_channel tmp, dot; 2610 2611 /* tmp = dp4(src0, src0): */ 2612 FETCH( &r[0], 0, CHAN_X ); 2613 micro_mul( &tmp, &r[0], &r[0] ); 2614 2615 FETCH( &r[1], 0, CHAN_Y ); 2616 micro_mul( &dot, &r[1], &r[1] ); 2617 micro_add( &tmp, &tmp, &dot ); 2618 2619 FETCH( &r[2], 0, CHAN_Z ); 2620 micro_mul( &dot, &r[2], &r[2] ); 2621 micro_add( &tmp, &tmp, &dot ); 2622 2623 FETCH( &r[3], 0, CHAN_W ); 2624 micro_mul( &dot, &r[3], &r[3] ); 2625 micro_add( &tmp, &tmp, &dot ); 2626 2627 /* tmp = 1 / sqrt(tmp) */ 2628 micro_sqrt( &tmp, &tmp ); 2629 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2630 2631 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2632 /* chan = chan * tmp */ 2633 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2634 STORE( &r[chan_index], 0, chan_index ); 2635 } 2636 } 2637 break; 2638 2639 case TGSI_OPCODE_DIV: 2640 assert( 0 ); 2641 break; 2642 2643 case TGSI_OPCODE_DP2: 2644 FETCH( &r[0], 0, CHAN_X ); 2645 FETCH( &r[1], 1, CHAN_X ); 2646 micro_mul( &r[0], &r[0], &r[1] ); 2647 2648 FETCH( &r[1], 0, CHAN_Y ); 2649 FETCH( &r[2], 1, CHAN_Y ); 2650 micro_mul( &r[1], &r[1], &r[2] ); 2651 micro_add( &r[0], &r[0], &r[1] ); 2652 2653 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2654 STORE( &r[0], 0, chan_index ); 2655 } 2656 break; 2657 2658 case TGSI_OPCODE_IF: 2659 /* push CondMask */ 2660 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2661 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2662 FETCH( &r[0], 0, CHAN_X ); 2663 /* update CondMask */ 2664 if( ! r[0].u[0] ) { 2665 mach->CondMask &= ~0x1; 2666 } 2667 if( ! r[0].u[1] ) { 2668 mach->CondMask &= ~0x2; 2669 } 2670 if( ! r[0].u[2] ) { 2671 mach->CondMask &= ~0x4; 2672 } 2673 if( ! r[0].u[3] ) { 2674 mach->CondMask &= ~0x8; 2675 } 2676 UPDATE_EXEC_MASK(mach); 2677 /* Todo: If CondMask==0, jump to ELSE */ 2678 break; 2679 2680 case TGSI_OPCODE_ELSE: 2681 /* invert CondMask wrt previous mask */ 2682 { 2683 uint prevMask; 2684 assert(mach->CondStackTop > 0); 2685 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2686 mach->CondMask = ~mach->CondMask & prevMask; 2687 UPDATE_EXEC_MASK(mach); 2688 /* Todo: If CondMask==0, jump to ENDIF */ 2689 } 2690 break; 2691 2692 case TGSI_OPCODE_ENDIF: 2693 /* pop CondMask */ 2694 assert(mach->CondStackTop > 0); 2695 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2696 UPDATE_EXEC_MASK(mach); 2697 break; 2698 2699 case TGSI_OPCODE_END: 2700 /* halt execution */ 2701 *pc = -1; 2702 break; 2703 2704 case TGSI_OPCODE_REP: 2705 assert (0); 2706 break; 2707 2708 case TGSI_OPCODE_ENDREP: 2709 assert (0); 2710 break; 2711 2712 case TGSI_OPCODE_PUSHA: 2713 assert (0); 2714 break; 2715 2716 case TGSI_OPCODE_POPA: 2717 assert (0); 2718 break; 2719 2720 case TGSI_OPCODE_CEIL: 2721 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2722 FETCH( &r[0], 0, chan_index ); 2723 micro_ceil( &r[0], &r[0] ); 2724 STORE( &r[0], 0, chan_index ); 2725 } 2726 break; 2727 2728 case TGSI_OPCODE_I2F: 2729 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2730 FETCH( &r[0], 0, chan_index ); 2731 micro_i2f( &r[0], &r[0] ); 2732 STORE( &r[0], 0, chan_index ); 2733 } 2734 break; 2735 2736 case TGSI_OPCODE_NOT: 2737 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2738 FETCH( &r[0], 0, chan_index ); 2739 micro_not( &r[0], &r[0] ); 2740 STORE( &r[0], 0, chan_index ); 2741 } 2742 break; 2743 2744 case TGSI_OPCODE_TRUNC: 2745 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2746 FETCH( &r[0], 0, chan_index ); 2747 micro_trunc( &r[0], &r[0] ); 2748 STORE( &r[0], 0, chan_index ); 2749 } 2750 break; 2751 2752 case TGSI_OPCODE_SHL: 2753 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2754 FETCH( &r[0], 0, chan_index ); 2755 FETCH( &r[1], 1, chan_index ); 2756 micro_shl( &r[0], &r[0], &r[1] ); 2757 STORE( &r[0], 0, chan_index ); 2758 } 2759 break; 2760 2761 case TGSI_OPCODE_SHR: 2762 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2763 FETCH( &r[0], 0, chan_index ); 2764 FETCH( &r[1], 1, chan_index ); 2765 micro_ishr( &r[0], &r[0], &r[1] ); 2766 STORE( &r[0], 0, chan_index ); 2767 } 2768 break; 2769 2770 case TGSI_OPCODE_AND: 2771 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2772 FETCH( &r[0], 0, chan_index ); 2773 FETCH( &r[1], 1, chan_index ); 2774 micro_and( &r[0], &r[0], &r[1] ); 2775 STORE( &r[0], 0, chan_index ); 2776 } 2777 break; 2778 2779 case TGSI_OPCODE_OR: 2780 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2781 FETCH( &r[0], 0, chan_index ); 2782 FETCH( &r[1], 1, chan_index ); 2783 micro_or( &r[0], &r[0], &r[1] ); 2784 STORE( &r[0], 0, chan_index ); 2785 } 2786 break; 2787 2788 case TGSI_OPCODE_MOD: 2789 assert (0); 2790 break; 2791 2792 case TGSI_OPCODE_XOR: 2793 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2794 FETCH( &r[0], 0, chan_index ); 2795 FETCH( &r[1], 1, chan_index ); 2796 micro_xor( &r[0], &r[0], &r[1] ); 2797 STORE( &r[0], 0, chan_index ); 2798 } 2799 break; 2800 2801 case TGSI_OPCODE_SAD: 2802 assert (0); 2803 break; 2804 2805 case TGSI_OPCODE_TXF: 2806 assert (0); 2807 break; 2808 2809 case TGSI_OPCODE_TXQ: 2810 assert (0); 2811 break; 2812 2813 case TGSI_OPCODE_EMIT: 2814 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 2815 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2816 break; 2817 2818 case TGSI_OPCODE_ENDPRIM: 2819 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 2820 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 2821 break; 2822 2823 case TGSI_OPCODE_LOOP: 2824 /* fall-through (for now) */ 2825 case TGSI_OPCODE_BGNLOOP2: 2826 /* push LoopMask and ContMasks */ 2827 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2828 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2829 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2830 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2831 break; 2832 2833 case TGSI_OPCODE_ENDLOOP: 2834 /* fall-through (for now at least) */ 2835 case TGSI_OPCODE_ENDLOOP2: 2836 /* Restore ContMask, but don't pop */ 2837 assert(mach->ContStackTop > 0); 2838 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 2839 UPDATE_EXEC_MASK(mach); 2840 if (mach->ExecMask) { 2841 /* repeat loop: jump to instruction just past BGNLOOP */ 2842 *pc = inst->InstructionExtLabel.Label + 1; 2843 } 2844 else { 2845 /* exit loop: pop LoopMask */ 2846 assert(mach->LoopStackTop > 0); 2847 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2848 /* pop ContMask */ 2849 assert(mach->ContStackTop > 0); 2850 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2851 } 2852 UPDATE_EXEC_MASK(mach); 2853 break; 2854 2855 case TGSI_OPCODE_BRK: 2856 /* turn off loop channels for each enabled exec channel */ 2857 mach->LoopMask &= ~mach->ExecMask; 2858 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2859 UPDATE_EXEC_MASK(mach); 2860 break; 2861 2862 case TGSI_OPCODE_CONT: 2863 /* turn off cont channels for each enabled exec channel */ 2864 mach->ContMask &= ~mach->ExecMask; 2865 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2866 UPDATE_EXEC_MASK(mach); 2867 break; 2868 2869 case TGSI_OPCODE_BGNSUB: 2870 /* no-op */ 2871 break; 2872 2873 case TGSI_OPCODE_ENDSUB: 2874 /* no-op */ 2875 break; 2876 2877 case TGSI_OPCODE_NOISE1: 2878 assert( 0 ); 2879 break; 2880 2881 case TGSI_OPCODE_NOISE2: 2882 assert( 0 ); 2883 break; 2884 2885 case TGSI_OPCODE_NOISE3: 2886 assert( 0 ); 2887 break; 2888 2889 case TGSI_OPCODE_NOISE4: 2890 assert( 0 ); 2891 break; 2892 2893 case TGSI_OPCODE_NOP: 2894 break; 2895 2896 default: 2897 assert( 0 ); 2898 } 2899} 2900 2901 2902/** 2903 * Run TGSI interpreter. 2904 * \return bitmask of "alive" quad components 2905 */ 2906uint 2907tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 2908{ 2909 uint i; 2910 int pc = 0; 2911 2912 mach->CondMask = 0xf; 2913 mach->LoopMask = 0xf; 2914 mach->ContMask = 0xf; 2915 mach->FuncMask = 0xf; 2916 mach->ExecMask = 0xf; 2917 2918 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 2919 assert(mach->CondStackTop == 0); 2920 assert(mach->LoopStackTop == 0); 2921 assert(mach->ContStackTop == 0); 2922 assert(mach->CallStackTop == 0); 2923 2924 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 2925 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 2926 2927 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 2928 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 2929 mach->Primitives[0] = 0; 2930 } 2931 2932 for (i = 0; i < QUAD_SIZE; i++) { 2933 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 2934 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 2935 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 2936 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 2937 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 2938 } 2939 2940 /* execute declarations (interpolants) */ 2941 for (i = 0; i < mach->NumDeclarations; i++) { 2942 exec_declaration( mach, mach->Declarations+i ); 2943 } 2944 2945 /* execute instructions, until pc is set to -1 */ 2946 while (pc != -1) { 2947 assert(pc < (int) mach->NumInstructions); 2948 exec_instruction( mach, mach->Instructions + pc, &pc ); 2949 } 2950 2951#if 0 2952 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 2953 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 2954 /* 2955 * Scale back depth component. 2956 */ 2957 for (i = 0; i < 4; i++) 2958 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 2959 } 2960#endif 2961 2962 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 2963} 2964