tgsi_exec.c revision 80d3a653f0172f01be694a29456c70f1f4da1812
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpretor/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_util.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60 61#define TILE_TOP_LEFT 0 62#define TILE_TOP_RIGHT 1 63#define TILE_BOTTOM_LEFT 2 64#define TILE_BOTTOM_RIGHT 3 65 66/* 67 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 68 */ 69#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 70#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 71#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 72#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 73#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 74#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 75#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 76#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 77#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 78#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 79#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 80#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 81#define TEMP_128_I TGSI_EXEC_TEMP_128_I 82#define TEMP_128_C TGSI_EXEC_TEMP_128_C 83#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 84#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 85#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 86#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 87#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 88#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 89#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 90#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 91#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 92#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 93#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 94#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 95#define TEMP_R0 TGSI_EXEC_TEMP_R0 96 97#define FOR_EACH_CHANNEL(CHAN)\ 98 for (CHAN = 0; CHAN < 4; CHAN++) 99 100#define IS_CHANNEL_ENABLED(INST, CHAN)\ 101 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 102 103#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 104 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 105 106#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 107 FOR_EACH_CHANNEL( CHAN )\ 108 if (IS_CHANNEL_ENABLED( INST, CHAN )) 109 110#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 111 FOR_EACH_CHANNEL( CHAN )\ 112 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 113 114 115/** The execution mask depends on the conditional mask and the loop mask */ 116#define UPDATE_EXEC_MASK(MACH) \ 117 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 118 119 120#define CHAN_X 0 121#define CHAN_Y 1 122#define CHAN_Z 2 123#define CHAN_W 3 124 125 126 127/** 128 * Initialize machine state by expanding tokens to full instructions, 129 * allocating temporary storage, setting up constants, etc. 130 * After this, we can call tgsi_exec_machine_run() many times. 131 */ 132void 133tgsi_exec_machine_bind_shader( 134 struct tgsi_exec_machine *mach, 135 const struct tgsi_token *tokens, 136 uint numSamplers, 137 struct tgsi_sampler *samplers) 138{ 139 uint k; 140 struct tgsi_parse_context parse; 141 struct tgsi_exec_labels *labels = &mach->Labels; 142 struct tgsi_full_instruction *instructions; 143 struct tgsi_full_declaration *declarations; 144 uint maxInstructions = 10, numInstructions = 0; 145 uint maxDeclarations = 10, numDeclarations = 0; 146 uint instno = 0; 147 148#if 0 149 tgsi_dump(tokens, 0); 150#endif 151 152 mach->Tokens = tokens; 153 mach->Samplers = samplers; 154 155 k = tgsi_parse_init (&parse, mach->Tokens); 156 if (k != TGSI_PARSE_OK) { 157 debug_printf( "Problem parsing!\n" ); 158 return; 159 } 160 161 mach->Processor = parse.FullHeader.Processor.Processor; 162 mach->ImmLimit = 0; 163 labels->count = 0; 164 165 declarations = (struct tgsi_full_declaration *) 166 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 167 168 if (!declarations) { 169 return; 170 } 171 172 instructions = (struct tgsi_full_instruction *) 173 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 174 175 if (!instructions) { 176 FREE( declarations ); 177 return; 178 } 179 180 while( !tgsi_parse_end_of_tokens( &parse ) ) { 181 uint pointer = parse.Position; 182 uint i; 183 184 tgsi_parse_token( &parse ); 185 switch( parse.FullToken.Token.Type ) { 186 case TGSI_TOKEN_TYPE_DECLARATION: 187 /* save expanded declaration */ 188 if (numDeclarations == maxDeclarations) { 189 declarations = REALLOC(declarations, 190 maxDeclarations 191 * sizeof(struct tgsi_full_declaration), 192 (maxDeclarations + 10) 193 * sizeof(struct tgsi_full_declaration)); 194 maxDeclarations += 10; 195 } 196 memcpy(declarations + numDeclarations, 197 &parse.FullToken.FullDeclaration, 198 sizeof(declarations[0])); 199 numDeclarations++; 200 break; 201 202 case TGSI_TOKEN_TYPE_IMMEDIATE: 203 { 204 uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; 205 assert( size % 4 == 0 ); 206 assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); 207 208 for( i = 0; i < size; i++ ) { 209 mach->Imms[mach->ImmLimit + i / 4][i % 4] = 210 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 211 } 212 mach->ImmLimit += size / 4; 213 } 214 break; 215 216 case TGSI_TOKEN_TYPE_INSTRUCTION: 217 assert( labels->count < MAX_LABELS ); 218 219 labels->labels[labels->count][0] = instno; 220 labels->labels[labels->count][1] = pointer; 221 labels->count++; 222 223 /* save expanded instruction */ 224 if (numInstructions == maxInstructions) { 225 instructions = REALLOC(instructions, 226 maxInstructions 227 * sizeof(struct tgsi_full_instruction), 228 (maxInstructions + 10) 229 * sizeof(struct tgsi_full_instruction)); 230 maxInstructions += 10; 231 } 232 memcpy(instructions + numInstructions, 233 &parse.FullToken.FullInstruction, 234 sizeof(instructions[0])); 235 numInstructions++; 236 break; 237 238 default: 239 assert( 0 ); 240 } 241 } 242 tgsi_parse_free (&parse); 243 244 if (mach->Declarations) { 245 FREE( mach->Declarations ); 246 } 247 mach->Declarations = declarations; 248 mach->NumDeclarations = numDeclarations; 249 250 if (mach->Instructions) { 251 FREE( mach->Instructions ); 252 } 253 mach->Instructions = instructions; 254 mach->NumInstructions = numInstructions; 255} 256 257 258void 259tgsi_exec_machine_init( 260 struct tgsi_exec_machine *mach ) 261{ 262 uint i; 263 264 mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); 265 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 266 267 /* Setup constants. */ 268 for( i = 0; i < 4; i++ ) { 269 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 270 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 271 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 272 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 273 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 274 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 275 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 276 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 277 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 278 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 279 } 280} 281 282 283void 284tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) 285{ 286 if (mach->Instructions) { 287 FREE(mach->Instructions); 288 mach->Instructions = NULL; 289 mach->NumInstructions = 0; 290 } 291 if (mach->Declarations) { 292 FREE(mach->Declarations); 293 mach->Declarations = NULL; 294 mach->NumDeclarations = 0; 295 } 296} 297 298 299static void 300micro_abs( 301 union tgsi_exec_channel *dst, 302 const union tgsi_exec_channel *src ) 303{ 304 dst->f[0] = fabsf( src->f[0] ); 305 dst->f[1] = fabsf( src->f[1] ); 306 dst->f[2] = fabsf( src->f[2] ); 307 dst->f[3] = fabsf( src->f[3] ); 308} 309 310static void 311micro_add( 312 union tgsi_exec_channel *dst, 313 const union tgsi_exec_channel *src0, 314 const union tgsi_exec_channel *src1 ) 315{ 316 dst->f[0] = src0->f[0] + src1->f[0]; 317 dst->f[1] = src0->f[1] + src1->f[1]; 318 dst->f[2] = src0->f[2] + src1->f[2]; 319 dst->f[3] = src0->f[3] + src1->f[3]; 320} 321 322static void 323micro_iadd( 324 union tgsi_exec_channel *dst, 325 const union tgsi_exec_channel *src0, 326 const union tgsi_exec_channel *src1 ) 327{ 328 dst->i[0] = src0->i[0] + src1->i[0]; 329 dst->i[1] = src0->i[1] + src1->i[1]; 330 dst->i[2] = src0->i[2] + src1->i[2]; 331 dst->i[3] = src0->i[3] + src1->i[3]; 332} 333 334static void 335micro_and( 336 union tgsi_exec_channel *dst, 337 const union tgsi_exec_channel *src0, 338 const union tgsi_exec_channel *src1 ) 339{ 340 dst->u[0] = src0->u[0] & src1->u[0]; 341 dst->u[1] = src0->u[1] & src1->u[1]; 342 dst->u[2] = src0->u[2] & src1->u[2]; 343 dst->u[3] = src0->u[3] & src1->u[3]; 344} 345 346static void 347micro_ceil( 348 union tgsi_exec_channel *dst, 349 const union tgsi_exec_channel *src ) 350{ 351 dst->f[0] = ceilf( src->f[0] ); 352 dst->f[1] = ceilf( src->f[1] ); 353 dst->f[2] = ceilf( src->f[2] ); 354 dst->f[3] = ceilf( src->f[3] ); 355} 356 357static void 358micro_cos( 359 union tgsi_exec_channel *dst, 360 const union tgsi_exec_channel *src ) 361{ 362 dst->f[0] = cosf( src->f[0] ); 363 dst->f[1] = cosf( src->f[1] ); 364 dst->f[2] = cosf( src->f[2] ); 365 dst->f[3] = cosf( src->f[3] ); 366} 367 368static void 369micro_ddx( 370 union tgsi_exec_channel *dst, 371 const union tgsi_exec_channel *src ) 372{ 373 dst->f[0] = 374 dst->f[1] = 375 dst->f[2] = 376 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 377} 378 379static void 380micro_ddy( 381 union tgsi_exec_channel *dst, 382 const union tgsi_exec_channel *src ) 383{ 384 dst->f[0] = 385 dst->f[1] = 386 dst->f[2] = 387 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 388} 389 390static void 391micro_div( 392 union tgsi_exec_channel *dst, 393 const union tgsi_exec_channel *src0, 394 const union tgsi_exec_channel *src1 ) 395{ 396 if (src1->f[0] != 0) { 397 dst->f[0] = src0->f[0] / src1->f[0]; 398 } 399 if (src1->f[1] != 0) { 400 dst->f[1] = src0->f[1] / src1->f[1]; 401 } 402 if (src1->f[2] != 0) { 403 dst->f[2] = src0->f[2] / src1->f[2]; 404 } 405 if (src1->f[3] != 0) { 406 dst->f[3] = src0->f[3] / src1->f[3]; 407 } 408} 409 410static void 411micro_udiv( 412 union tgsi_exec_channel *dst, 413 const union tgsi_exec_channel *src0, 414 const union tgsi_exec_channel *src1 ) 415{ 416 dst->u[0] = src0->u[0] / src1->u[0]; 417 dst->u[1] = src0->u[1] / src1->u[1]; 418 dst->u[2] = src0->u[2] / src1->u[2]; 419 dst->u[3] = src0->u[3] / src1->u[3]; 420} 421 422static void 423micro_eq( 424 union tgsi_exec_channel *dst, 425 const union tgsi_exec_channel *src0, 426 const union tgsi_exec_channel *src1, 427 const union tgsi_exec_channel *src2, 428 const union tgsi_exec_channel *src3 ) 429{ 430 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 431 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 432 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 433 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 434} 435 436static void 437micro_ieq( 438 union tgsi_exec_channel *dst, 439 const union tgsi_exec_channel *src0, 440 const union tgsi_exec_channel *src1, 441 const union tgsi_exec_channel *src2, 442 const union tgsi_exec_channel *src3 ) 443{ 444 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 445 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 446 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 447 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 448} 449 450static void 451micro_exp2( 452 union tgsi_exec_channel *dst, 453 const union tgsi_exec_channel *src) 454{ 455 dst->f[0] = powf( 2.0f, src->f[0] ); 456 dst->f[1] = powf( 2.0f, src->f[1] ); 457 dst->f[2] = powf( 2.0f, src->f[2] ); 458 dst->f[3] = powf( 2.0f, src->f[3] ); 459} 460 461static void 462micro_f2it( 463 union tgsi_exec_channel *dst, 464 const union tgsi_exec_channel *src ) 465{ 466 dst->i[0] = (int) src->f[0]; 467 dst->i[1] = (int) src->f[1]; 468 dst->i[2] = (int) src->f[2]; 469 dst->i[3] = (int) src->f[3]; 470} 471 472static void 473micro_f2ut( 474 union tgsi_exec_channel *dst, 475 const union tgsi_exec_channel *src ) 476{ 477 dst->u[0] = (uint) src->f[0]; 478 dst->u[1] = (uint) src->f[1]; 479 dst->u[2] = (uint) src->f[2]; 480 dst->u[3] = (uint) src->f[3]; 481} 482 483static void 484micro_flr( 485 union tgsi_exec_channel *dst, 486 const union tgsi_exec_channel *src ) 487{ 488 dst->f[0] = floorf( src->f[0] ); 489 dst->f[1] = floorf( src->f[1] ); 490 dst->f[2] = floorf( src->f[2] ); 491 dst->f[3] = floorf( src->f[3] ); 492} 493 494static void 495micro_frc( 496 union tgsi_exec_channel *dst, 497 const union tgsi_exec_channel *src ) 498{ 499 dst->f[0] = src->f[0] - floorf( src->f[0] ); 500 dst->f[1] = src->f[1] - floorf( src->f[1] ); 501 dst->f[2] = src->f[2] - floorf( src->f[2] ); 502 dst->f[3] = src->f[3] - floorf( src->f[3] ); 503} 504 505static void 506micro_ge( 507 union tgsi_exec_channel *dst, 508 const union tgsi_exec_channel *src0, 509 const union tgsi_exec_channel *src1, 510 const union tgsi_exec_channel *src2, 511 const union tgsi_exec_channel *src3 ) 512{ 513 dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; 514 dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; 515 dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; 516 dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; 517} 518 519static void 520micro_i2f( 521 union tgsi_exec_channel *dst, 522 const union tgsi_exec_channel *src ) 523{ 524 dst->f[0] = (float) src->i[0]; 525 dst->f[1] = (float) src->i[1]; 526 dst->f[2] = (float) src->i[2]; 527 dst->f[3] = (float) src->i[3]; 528} 529 530static void 531micro_lg2( 532 union tgsi_exec_channel *dst, 533 const union tgsi_exec_channel *src ) 534{ 535 dst->f[0] = logf( src->f[0] ) * 1.442695f; 536 dst->f[1] = logf( src->f[1] ) * 1.442695f; 537 dst->f[2] = logf( src->f[2] ) * 1.442695f; 538 dst->f[3] = logf( src->f[3] ) * 1.442695f; 539} 540 541static void 542micro_le( 543 union tgsi_exec_channel *dst, 544 const union tgsi_exec_channel *src0, 545 const union tgsi_exec_channel *src1, 546 const union tgsi_exec_channel *src2, 547 const union tgsi_exec_channel *src3 ) 548{ 549 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 550 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 551 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 552 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 553} 554 555static void 556micro_lt( 557 union tgsi_exec_channel *dst, 558 const union tgsi_exec_channel *src0, 559 const union tgsi_exec_channel *src1, 560 const union tgsi_exec_channel *src2, 561 const union tgsi_exec_channel *src3 ) 562{ 563 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 564 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 565 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 566 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 567} 568 569static void 570micro_ilt( 571 union tgsi_exec_channel *dst, 572 const union tgsi_exec_channel *src0, 573 const union tgsi_exec_channel *src1, 574 const union tgsi_exec_channel *src2, 575 const union tgsi_exec_channel *src3 ) 576{ 577 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 578 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 579 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 580 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 581} 582 583static void 584micro_ult( 585 union tgsi_exec_channel *dst, 586 const union tgsi_exec_channel *src0, 587 const union tgsi_exec_channel *src1, 588 const union tgsi_exec_channel *src2, 589 const union tgsi_exec_channel *src3 ) 590{ 591 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 592 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 593 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 594 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 595} 596 597static void 598micro_max( 599 union tgsi_exec_channel *dst, 600 const union tgsi_exec_channel *src0, 601 const union tgsi_exec_channel *src1 ) 602{ 603 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 604 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 605 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 606 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 607} 608 609static void 610micro_imax( 611 union tgsi_exec_channel *dst, 612 const union tgsi_exec_channel *src0, 613 const union tgsi_exec_channel *src1 ) 614{ 615 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 616 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 617 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 618 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 619} 620 621static void 622micro_umax( 623 union tgsi_exec_channel *dst, 624 const union tgsi_exec_channel *src0, 625 const union tgsi_exec_channel *src1 ) 626{ 627 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 628 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 629 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 630 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 631} 632 633static void 634micro_min( 635 union tgsi_exec_channel *dst, 636 const union tgsi_exec_channel *src0, 637 const union tgsi_exec_channel *src1 ) 638{ 639 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 640 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 641 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 642 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 643} 644 645static void 646micro_imin( 647 union tgsi_exec_channel *dst, 648 const union tgsi_exec_channel *src0, 649 const union tgsi_exec_channel *src1 ) 650{ 651 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 652 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 653 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 654 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 655} 656 657static void 658micro_umin( 659 union tgsi_exec_channel *dst, 660 const union tgsi_exec_channel *src0, 661 const union tgsi_exec_channel *src1 ) 662{ 663 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 664 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 665 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 666 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 667} 668 669static void 670micro_umod( 671 union tgsi_exec_channel *dst, 672 const union tgsi_exec_channel *src0, 673 const union tgsi_exec_channel *src1 ) 674{ 675 dst->u[0] = src0->u[0] % src1->u[0]; 676 dst->u[1] = src0->u[1] % src1->u[1]; 677 dst->u[2] = src0->u[2] % src1->u[2]; 678 dst->u[3] = src0->u[3] % src1->u[3]; 679} 680 681static void 682micro_mul( 683 union tgsi_exec_channel *dst, 684 const union tgsi_exec_channel *src0, 685 const union tgsi_exec_channel *src1 ) 686{ 687 dst->f[0] = src0->f[0] * src1->f[0]; 688 dst->f[1] = src0->f[1] * src1->f[1]; 689 dst->f[2] = src0->f[2] * src1->f[2]; 690 dst->f[3] = src0->f[3] * src1->f[3]; 691} 692 693static void 694micro_imul( 695 union tgsi_exec_channel *dst, 696 const union tgsi_exec_channel *src0, 697 const union tgsi_exec_channel *src1 ) 698{ 699 dst->i[0] = src0->i[0] * src1->i[0]; 700 dst->i[1] = src0->i[1] * src1->i[1]; 701 dst->i[2] = src0->i[2] * src1->i[2]; 702 dst->i[3] = src0->i[3] * src1->i[3]; 703} 704 705static void 706micro_imul64( 707 union tgsi_exec_channel *dst0, 708 union tgsi_exec_channel *dst1, 709 const union tgsi_exec_channel *src0, 710 const union tgsi_exec_channel *src1 ) 711{ 712 dst1->i[0] = src0->i[0] * src1->i[0]; 713 dst1->i[1] = src0->i[1] * src1->i[1]; 714 dst1->i[2] = src0->i[2] * src1->i[2]; 715 dst1->i[3] = src0->i[3] * src1->i[3]; 716 dst0->i[0] = 0; 717 dst0->i[1] = 0; 718 dst0->i[2] = 0; 719 dst0->i[3] = 0; 720} 721 722static void 723micro_umul64( 724 union tgsi_exec_channel *dst0, 725 union tgsi_exec_channel *dst1, 726 const union tgsi_exec_channel *src0, 727 const union tgsi_exec_channel *src1 ) 728{ 729 dst1->u[0] = src0->u[0] * src1->u[0]; 730 dst1->u[1] = src0->u[1] * src1->u[1]; 731 dst1->u[2] = src0->u[2] * src1->u[2]; 732 dst1->u[3] = src0->u[3] * src1->u[3]; 733 dst0->u[0] = 0; 734 dst0->u[1] = 0; 735 dst0->u[2] = 0; 736 dst0->u[3] = 0; 737} 738 739static void 740micro_movc( 741 union tgsi_exec_channel *dst, 742 const union tgsi_exec_channel *src0, 743 const union tgsi_exec_channel *src1, 744 const union tgsi_exec_channel *src2 ) 745{ 746 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 747 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 748 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 749 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 750} 751 752static void 753micro_neg( 754 union tgsi_exec_channel *dst, 755 const union tgsi_exec_channel *src ) 756{ 757 dst->f[0] = -src->f[0]; 758 dst->f[1] = -src->f[1]; 759 dst->f[2] = -src->f[2]; 760 dst->f[3] = -src->f[3]; 761} 762 763static void 764micro_ineg( 765 union tgsi_exec_channel *dst, 766 const union tgsi_exec_channel *src ) 767{ 768 dst->i[0] = -src->i[0]; 769 dst->i[1] = -src->i[1]; 770 dst->i[2] = -src->i[2]; 771 dst->i[3] = -src->i[3]; 772} 773 774static void 775micro_not( 776 union tgsi_exec_channel *dst, 777 const union tgsi_exec_channel *src ) 778{ 779 dst->u[0] = ~src->u[0]; 780 dst->u[1] = ~src->u[1]; 781 dst->u[2] = ~src->u[2]; 782 dst->u[3] = ~src->u[3]; 783} 784 785static void 786micro_or( 787 union tgsi_exec_channel *dst, 788 const union tgsi_exec_channel *src0, 789 const union tgsi_exec_channel *src1 ) 790{ 791 dst->u[0] = src0->u[0] | src1->u[0]; 792 dst->u[1] = src0->u[1] | src1->u[1]; 793 dst->u[2] = src0->u[2] | src1->u[2]; 794 dst->u[3] = src0->u[3] | src1->u[3]; 795} 796 797static void 798micro_pow( 799 union tgsi_exec_channel *dst, 800 const union tgsi_exec_channel *src0, 801 const union tgsi_exec_channel *src1 ) 802{ 803 dst->f[0] = powf( src0->f[0], src1->f[0] ); 804 dst->f[1] = powf( src0->f[1], src1->f[1] ); 805 dst->f[2] = powf( src0->f[2], src1->f[2] ); 806 dst->f[3] = powf( src0->f[3], src1->f[3] ); 807} 808 809static void 810micro_rnd( 811 union tgsi_exec_channel *dst, 812 const union tgsi_exec_channel *src ) 813{ 814 dst->f[0] = floorf( src->f[0] + 0.5f ); 815 dst->f[1] = floorf( src->f[1] + 0.5f ); 816 dst->f[2] = floorf( src->f[2] + 0.5f ); 817 dst->f[3] = floorf( src->f[3] + 0.5f ); 818} 819 820static void 821micro_shl( 822 union tgsi_exec_channel *dst, 823 const union tgsi_exec_channel *src0, 824 const union tgsi_exec_channel *src1 ) 825{ 826 dst->i[0] = src0->i[0] << src1->i[0]; 827 dst->i[1] = src0->i[1] << src1->i[1]; 828 dst->i[2] = src0->i[2] << src1->i[2]; 829 dst->i[3] = src0->i[3] << src1->i[3]; 830} 831 832static void 833micro_ishr( 834 union tgsi_exec_channel *dst, 835 const union tgsi_exec_channel *src0, 836 const union tgsi_exec_channel *src1 ) 837{ 838 dst->i[0] = src0->i[0] >> src1->i[0]; 839 dst->i[1] = src0->i[1] >> src1->i[1]; 840 dst->i[2] = src0->i[2] >> src1->i[2]; 841 dst->i[3] = src0->i[3] >> src1->i[3]; 842} 843 844static void 845micro_trunc( 846 union tgsi_exec_channel *dst, 847 const union tgsi_exec_channel *src0 ) 848{ 849 dst->f[0] = (float) (int) src0->f[0]; 850 dst->f[1] = (float) (int) src0->f[1]; 851 dst->f[2] = (float) (int) src0->f[2]; 852 dst->f[3] = (float) (int) src0->f[3]; 853} 854 855static void 856micro_ushr( 857 union tgsi_exec_channel *dst, 858 const union tgsi_exec_channel *src0, 859 const union tgsi_exec_channel *src1 ) 860{ 861 dst->u[0] = src0->u[0] >> src1->u[0]; 862 dst->u[1] = src0->u[1] >> src1->u[1]; 863 dst->u[2] = src0->u[2] >> src1->u[2]; 864 dst->u[3] = src0->u[3] >> src1->u[3]; 865} 866 867static void 868micro_sin( 869 union tgsi_exec_channel *dst, 870 const union tgsi_exec_channel *src ) 871{ 872 dst->f[0] = sinf( src->f[0] ); 873 dst->f[1] = sinf( src->f[1] ); 874 dst->f[2] = sinf( src->f[2] ); 875 dst->f[3] = sinf( src->f[3] ); 876} 877 878static void 879micro_sqrt( union tgsi_exec_channel *dst, 880 const union tgsi_exec_channel *src ) 881{ 882 dst->f[0] = sqrtf( src->f[0] ); 883 dst->f[1] = sqrtf( src->f[1] ); 884 dst->f[2] = sqrtf( src->f[2] ); 885 dst->f[3] = sqrtf( src->f[3] ); 886} 887 888static void 889micro_sub( 890 union tgsi_exec_channel *dst, 891 const union tgsi_exec_channel *src0, 892 const union tgsi_exec_channel *src1 ) 893{ 894 dst->f[0] = src0->f[0] - src1->f[0]; 895 dst->f[1] = src0->f[1] - src1->f[1]; 896 dst->f[2] = src0->f[2] - src1->f[2]; 897 dst->f[3] = src0->f[3] - src1->f[3]; 898} 899 900static void 901micro_u2f( 902 union tgsi_exec_channel *dst, 903 const union tgsi_exec_channel *src ) 904{ 905 dst->f[0] = (float) src->u[0]; 906 dst->f[1] = (float) src->u[1]; 907 dst->f[2] = (float) src->u[2]; 908 dst->f[3] = (float) src->u[3]; 909} 910 911static void 912micro_xor( 913 union tgsi_exec_channel *dst, 914 const union tgsi_exec_channel *src0, 915 const union tgsi_exec_channel *src1 ) 916{ 917 dst->u[0] = src0->u[0] ^ src1->u[0]; 918 dst->u[1] = src0->u[1] ^ src1->u[1]; 919 dst->u[2] = src0->u[2] ^ src1->u[2]; 920 dst->u[3] = src0->u[3] ^ src1->u[3]; 921} 922 923static void 924fetch_src_file_channel( 925 const struct tgsi_exec_machine *mach, 926 const uint file, 927 const uint swizzle, 928 const union tgsi_exec_channel *index, 929 union tgsi_exec_channel *chan ) 930{ 931 switch( swizzle ) { 932 case TGSI_EXTSWIZZLE_X: 933 case TGSI_EXTSWIZZLE_Y: 934 case TGSI_EXTSWIZZLE_Z: 935 case TGSI_EXTSWIZZLE_W: 936 switch( file ) { 937 case TGSI_FILE_CONSTANT: 938 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 939 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 940 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 941 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 942 break; 943 944 case TGSI_FILE_INPUT: 945 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 946 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 947 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 948 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 949 break; 950 951 case TGSI_FILE_TEMPORARY: 952 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 953 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 954 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 955 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 956 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 957 break; 958 959 case TGSI_FILE_IMMEDIATE: 960 assert( index->i[0] < (int) mach->ImmLimit ); 961 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 962 assert( index->i[1] < (int) mach->ImmLimit ); 963 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 964 assert( index->i[2] < (int) mach->ImmLimit ); 965 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 966 assert( index->i[3] < (int) mach->ImmLimit ); 967 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 968 break; 969 970 case TGSI_FILE_ADDRESS: 971 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 972 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 973 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 974 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 975 break; 976 977 case TGSI_FILE_OUTPUT: 978 /* vertex/fragment output vars can be read too */ 979 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 980 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 981 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 982 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 983 break; 984 985 default: 986 assert( 0 ); 987 } 988 break; 989 990 case TGSI_EXTSWIZZLE_ZERO: 991 *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; 992 break; 993 994 case TGSI_EXTSWIZZLE_ONE: 995 *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; 996 break; 997 998 default: 999 assert( 0 ); 1000 } 1001} 1002 1003static void 1004fetch_source( 1005 const struct tgsi_exec_machine *mach, 1006 union tgsi_exec_channel *chan, 1007 const struct tgsi_full_src_register *reg, 1008 const uint chan_index ) 1009{ 1010 union tgsi_exec_channel index; 1011 uint swizzle; 1012 1013 index.i[0] = 1014 index.i[1] = 1015 index.i[2] = 1016 index.i[3] = reg->SrcRegister.Index; 1017 1018 if (reg->SrcRegister.Indirect) { 1019 union tgsi_exec_channel index2; 1020 union tgsi_exec_channel indir_index; 1021 1022 index2.i[0] = 1023 index2.i[1] = 1024 index2.i[2] = 1025 index2.i[3] = reg->SrcRegisterInd.Index; 1026 1027 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1028 fetch_src_file_channel( 1029 mach, 1030 reg->SrcRegisterInd.File, 1031 swizzle, 1032 &index2, 1033 &indir_index ); 1034 1035 index.i[0] += indir_index.i[0]; 1036 index.i[1] += indir_index.i[1]; 1037 index.i[2] += indir_index.i[2]; 1038 index.i[3] += indir_index.i[3]; 1039 } 1040 1041 if( reg->SrcRegister.Dimension ) { 1042 switch( reg->SrcRegister.File ) { 1043 case TGSI_FILE_INPUT: 1044 index.i[0] *= 17; 1045 index.i[1] *= 17; 1046 index.i[2] *= 17; 1047 index.i[3] *= 17; 1048 break; 1049 case TGSI_FILE_CONSTANT: 1050 index.i[0] *= 4096; 1051 index.i[1] *= 4096; 1052 index.i[2] *= 4096; 1053 index.i[3] *= 4096; 1054 break; 1055 default: 1056 assert( 0 ); 1057 } 1058 1059 index.i[0] += reg->SrcRegisterDim.Index; 1060 index.i[1] += reg->SrcRegisterDim.Index; 1061 index.i[2] += reg->SrcRegisterDim.Index; 1062 index.i[3] += reg->SrcRegisterDim.Index; 1063 1064 if (reg->SrcRegisterDim.Indirect) { 1065 union tgsi_exec_channel index2; 1066 union tgsi_exec_channel indir_index; 1067 1068 index2.i[0] = 1069 index2.i[1] = 1070 index2.i[2] = 1071 index2.i[3] = reg->SrcRegisterDimInd.Index; 1072 1073 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1074 fetch_src_file_channel( 1075 mach, 1076 reg->SrcRegisterDimInd.File, 1077 swizzle, 1078 &index2, 1079 &indir_index ); 1080 1081 index.i[0] += indir_index.i[0]; 1082 index.i[1] += indir_index.i[1]; 1083 index.i[2] += indir_index.i[2]; 1084 index.i[3] += indir_index.i[3]; 1085 } 1086 } 1087 1088 swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); 1089 fetch_src_file_channel( 1090 mach, 1091 reg->SrcRegister.File, 1092 swizzle, 1093 &index, 1094 chan ); 1095 1096 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1097 case TGSI_UTIL_SIGN_CLEAR: 1098 micro_abs( chan, chan ); 1099 break; 1100 1101 case TGSI_UTIL_SIGN_SET: 1102 micro_abs( chan, chan ); 1103 micro_neg( chan, chan ); 1104 break; 1105 1106 case TGSI_UTIL_SIGN_TOGGLE: 1107 micro_neg( chan, chan ); 1108 break; 1109 1110 case TGSI_UTIL_SIGN_KEEP: 1111 break; 1112 } 1113 1114 if (reg->SrcRegisterExtMod.Complement) { 1115 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1116 } 1117} 1118 1119static void 1120store_dest( 1121 struct tgsi_exec_machine *mach, 1122 const union tgsi_exec_channel *chan, 1123 const struct tgsi_full_dst_register *reg, 1124 const struct tgsi_full_instruction *inst, 1125 uint chan_index ) 1126{ 1127 union tgsi_exec_channel *dst; 1128 1129 switch( reg->DstRegister.File ) { 1130 case TGSI_FILE_NULL: 1131 return; 1132 1133 case TGSI_FILE_OUTPUT: 1134 dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1135 + reg->DstRegister.Index].xyzw[chan_index]; 1136 break; 1137 1138 case TGSI_FILE_TEMPORARY: 1139 assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS); 1140 dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; 1141 break; 1142 1143 case TGSI_FILE_ADDRESS: 1144 dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; 1145 break; 1146 1147 default: 1148 assert( 0 ); 1149 return; 1150 } 1151 1152 switch (inst->Instruction.Saturate) 1153 { 1154 case TGSI_SAT_NONE: 1155 if (mach->ExecMask & 0x1) 1156 dst->i[0] = chan->i[0]; 1157 if (mach->ExecMask & 0x2) 1158 dst->i[1] = chan->i[1]; 1159 if (mach->ExecMask & 0x4) 1160 dst->i[2] = chan->i[2]; 1161 if (mach->ExecMask & 0x8) 1162 dst->i[3] = chan->i[3]; 1163 break; 1164 1165 case TGSI_SAT_ZERO_ONE: 1166 /* XXX need to obey ExecMask here */ 1167 micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 1168 micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 1169 break; 1170 1171 case TGSI_SAT_MINUS_PLUS_ONE: 1172 assert( 0 ); 1173 break; 1174 1175 default: 1176 assert( 0 ); 1177 } 1178} 1179 1180#define FETCH(VAL,INDEX,CHAN)\ 1181 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1182 1183#define STORE(VAL,INDEX,CHAN)\ 1184 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1185 1186 1187/** 1188 * Execute ARB-style KIL which is predicated by a src register. 1189 * Kill fragment if any of the four values is less than zero. 1190 */ 1191static void 1192exec_kilp(struct tgsi_exec_machine *mach, 1193 const struct tgsi_full_instruction *inst) 1194{ 1195 uint uniquemask; 1196 uint chan_index; 1197 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1198 union tgsi_exec_channel r[1]; 1199 1200 /* This mask stores component bits that were already tested. Note that 1201 * we test if the value is less than zero, so 1.0 and 0.0 need not to be 1202 * tested. */ 1203 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); 1204 1205 for (chan_index = 0; chan_index < 4; chan_index++) 1206 { 1207 uint swizzle; 1208 uint i; 1209 1210 /* unswizzle channel */ 1211 swizzle = tgsi_util_get_full_src_register_extswizzle ( 1212 &inst->FullSrcRegisters[0], 1213 chan_index); 1214 1215 /* check if the component has not been already tested */ 1216 if (uniquemask & (1 << swizzle)) 1217 continue; 1218 uniquemask |= 1 << swizzle; 1219 1220 FETCH(&r[0], 0, chan_index); 1221 for (i = 0; i < 4; i++) 1222 if (r[0].f[i] < 0.0f) 1223 kilmask |= 1 << i; 1224 } 1225 1226 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1227} 1228 1229 1230/* 1231 * Fetch a texel using STR texture coordinates. 1232 */ 1233static void 1234fetch_texel( struct tgsi_sampler *sampler, 1235 const union tgsi_exec_channel *s, 1236 const union tgsi_exec_channel *t, 1237 const union tgsi_exec_channel *p, 1238 float lodbias, /* XXX should be float[4] */ 1239 union tgsi_exec_channel *r, 1240 union tgsi_exec_channel *g, 1241 union tgsi_exec_channel *b, 1242 union tgsi_exec_channel *a ) 1243{ 1244 uint j; 1245 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1246 1247 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1248 1249 for (j = 0; j < 4; j++) { 1250 r->f[j] = rgba[0][j]; 1251 g->f[j] = rgba[1][j]; 1252 b->f[j] = rgba[2][j]; 1253 a->f[j] = rgba[3][j]; 1254 } 1255} 1256 1257 1258static void 1259exec_tex(struct tgsi_exec_machine *mach, 1260 const struct tgsi_full_instruction *inst, 1261 boolean biasLod, 1262 boolean projected) 1263{ 1264 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1265 union tgsi_exec_channel r[8]; 1266 uint chan_index; 1267 float lodBias; 1268 1269 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1270 1271 switch (inst->InstructionExtTexture.Texture) { 1272 case TGSI_TEXTURE_1D: 1273 1274 FETCH(&r[0], 0, CHAN_X); 1275 1276 if (projected) { 1277 FETCH(&r[1], 0, CHAN_W); 1278 micro_div( &r[0], &r[0], &r[1] ); 1279 } 1280 1281 if (biasLod) { 1282 FETCH(&r[1], 0, CHAN_W); 1283 lodBias = r[2].f[0]; 1284 } 1285 else 1286 lodBias = 0.0; 1287 1288 fetch_texel(&mach->Samplers[unit], 1289 &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ 1290 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1291 break; 1292 1293 case TGSI_TEXTURE_2D: 1294 case TGSI_TEXTURE_RECT: 1295 1296 FETCH(&r[0], 0, CHAN_X); 1297 FETCH(&r[1], 0, CHAN_Y); 1298 FETCH(&r[2], 0, CHAN_Z); 1299 1300 if (projected) { 1301 FETCH(&r[3], 0, CHAN_W); 1302 micro_div( &r[0], &r[0], &r[3] ); 1303 micro_div( &r[1], &r[1], &r[3] ); 1304 micro_div( &r[2], &r[2], &r[3] ); 1305 } 1306 1307 if (biasLod) { 1308 FETCH(&r[3], 0, CHAN_W); 1309 lodBias = r[3].f[0]; 1310 } 1311 else 1312 lodBias = 0.0; 1313 1314 fetch_texel(&mach->Samplers[unit], 1315 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1316 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1317 break; 1318 1319 case TGSI_TEXTURE_3D: 1320 case TGSI_TEXTURE_CUBE: 1321 1322 FETCH(&r[0], 0, CHAN_X); 1323 FETCH(&r[1], 0, CHAN_Y); 1324 FETCH(&r[2], 0, CHAN_Z); 1325 1326 if (projected) { 1327 FETCH(&r[3], 0, CHAN_W); 1328 micro_div( &r[0], &r[0], &r[3] ); 1329 micro_div( &r[1], &r[1], &r[3] ); 1330 micro_div( &r[2], &r[2], &r[3] ); 1331 } 1332 1333 if (biasLod) { 1334 FETCH(&r[3], 0, CHAN_W); 1335 lodBias = r[3].f[0]; 1336 } 1337 else 1338 lodBias = 0.0; 1339 1340 fetch_texel(&mach->Samplers[unit], 1341 &r[0], &r[1], &r[2], lodBias, 1342 &r[0], &r[1], &r[2], &r[3]); 1343 break; 1344 1345 default: 1346 assert (0); 1347 } 1348 1349 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1350 STORE( &r[chan_index], 0, chan_index ); 1351 } 1352} 1353 1354 1355/** 1356 * Evaluate a constant-valued coefficient at the position of the 1357 * current quad. 1358 */ 1359static void 1360eval_constant_coef( 1361 struct tgsi_exec_machine *mach, 1362 unsigned attrib, 1363 unsigned chan ) 1364{ 1365 unsigned i; 1366 1367 for( i = 0; i < QUAD_SIZE; i++ ) { 1368 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1369 } 1370} 1371 1372/** 1373 * Evaluate a linear-valued coefficient at the position of the 1374 * current quad. 1375 */ 1376static void 1377eval_linear_coef( 1378 struct tgsi_exec_machine *mach, 1379 unsigned attrib, 1380 unsigned chan ) 1381{ 1382 const float x = mach->QuadPos.xyzw[0].f[0]; 1383 const float y = mach->QuadPos.xyzw[1].f[0]; 1384 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1385 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1386 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1387 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1388 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1389 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1390 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1391} 1392 1393/** 1394 * Evaluate a perspective-valued coefficient at the position of the 1395 * current quad. 1396 */ 1397static void 1398eval_perspective_coef( 1399 struct tgsi_exec_machine *mach, 1400 unsigned attrib, 1401 unsigned chan ) 1402{ 1403 const float x = mach->QuadPos.xyzw[0].f[0]; 1404 const float y = mach->QuadPos.xyzw[1].f[0]; 1405 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1406 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1407 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1408 const float *w = mach->QuadPos.xyzw[3].f; 1409 /* divide by W here */ 1410 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1411 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1412 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1413 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1414} 1415 1416 1417typedef void (* eval_coef_func)( 1418 struct tgsi_exec_machine *mach, 1419 unsigned attrib, 1420 unsigned chan ); 1421 1422static void 1423exec_declaration( 1424 struct tgsi_exec_machine *mach, 1425 const struct tgsi_full_declaration *decl ) 1426{ 1427 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1428 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1429 unsigned first, last, mask; 1430 eval_coef_func eval; 1431 1432 first = decl->DeclarationRange.First; 1433 last = decl->DeclarationRange.Last; 1434 mask = decl->Declaration.UsageMask; 1435 1436 switch( decl->Declaration.Interpolate ) { 1437 case TGSI_INTERPOLATE_CONSTANT: 1438 eval = eval_constant_coef; 1439 break; 1440 1441 case TGSI_INTERPOLATE_LINEAR: 1442 eval = eval_linear_coef; 1443 break; 1444 1445 case TGSI_INTERPOLATE_PERSPECTIVE: 1446 eval = eval_perspective_coef; 1447 break; 1448 1449 default: 1450 assert( 0 ); 1451 } 1452 1453 if( mask == TGSI_WRITEMASK_XYZW ) { 1454 unsigned i, j; 1455 1456 for( i = first; i <= last; i++ ) { 1457 for( j = 0; j < NUM_CHANNELS; j++ ) { 1458 eval( mach, i, j ); 1459 } 1460 } 1461 } 1462 else { 1463 unsigned i, j; 1464 1465 for( j = 0; j < NUM_CHANNELS; j++ ) { 1466 if( mask & (1 << j) ) { 1467 for( i = first; i <= last; i++ ) { 1468 eval( mach, i, j ); 1469 } 1470 } 1471 } 1472 } 1473 } 1474 } 1475} 1476 1477static void 1478exec_instruction( 1479 struct tgsi_exec_machine *mach, 1480 const struct tgsi_full_instruction *inst, 1481 int *pc ) 1482{ 1483 uint chan_index; 1484 union tgsi_exec_channel r[8]; 1485 1486 (*pc)++; 1487 1488 switch (inst->Instruction.Opcode) { 1489 case TGSI_OPCODE_ARL: 1490 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1491 FETCH( &r[0], 0, chan_index ); 1492 micro_f2it( &r[0], &r[0] ); 1493 STORE( &r[0], 0, chan_index ); 1494 } 1495 break; 1496 1497 case TGSI_OPCODE_MOV: 1498 case TGSI_OPCODE_SWZ: 1499 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1500 FETCH( &r[0], 0, chan_index ); 1501 STORE( &r[0], 0, chan_index ); 1502 } 1503 break; 1504 1505 case TGSI_OPCODE_LIT: 1506 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1507 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1508 } 1509 1510 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1511 FETCH( &r[0], 0, CHAN_X ); 1512 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1513 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1514 STORE( &r[0], 0, CHAN_Y ); 1515 } 1516 1517 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1518 FETCH( &r[1], 0, CHAN_Y ); 1519 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1520 1521 FETCH( &r[2], 0, CHAN_W ); 1522 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1523 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1524 micro_pow( &r[1], &r[1], &r[2] ); 1525 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1526 STORE( &r[0], 0, CHAN_Z ); 1527 } 1528 } 1529 1530 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1531 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1532 } 1533 break; 1534 1535 case TGSI_OPCODE_RCP: 1536 /* TGSI_OPCODE_RECIP */ 1537 FETCH( &r[0], 0, CHAN_X ); 1538 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1539 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1540 STORE( &r[0], 0, chan_index ); 1541 } 1542 break; 1543 1544 case TGSI_OPCODE_RSQ: 1545 /* TGSI_OPCODE_RECIPSQRT */ 1546 FETCH( &r[0], 0, CHAN_X ); 1547 micro_sqrt( &r[0], &r[0] ); 1548 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1549 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1550 STORE( &r[0], 0, chan_index ); 1551 } 1552 break; 1553 1554 case TGSI_OPCODE_EXP: 1555 FETCH( &r[0], 0, CHAN_X ); 1556 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1557 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1558 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1559 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1560 } 1561 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1562 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1563 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1564 } 1565 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1566 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1567 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1568 } 1569 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1570 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1571 } 1572 break; 1573 1574 case TGSI_OPCODE_LOG: 1575 FETCH( &r[0], 0, CHAN_X ); 1576 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1577 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1578 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1579 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1580 STORE( &r[0], 0, CHAN_X ); 1581 } 1582 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1583 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1584 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 1585 STORE( &r[0], 0, CHAN_Y ); 1586 } 1587 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1588 STORE( &r[1], 0, CHAN_Z ); 1589 } 1590 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1591 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1592 } 1593 break; 1594 1595 case TGSI_OPCODE_MUL: 1596 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1597 { 1598 FETCH(&r[0], 0, chan_index); 1599 FETCH(&r[1], 1, chan_index); 1600 1601 micro_mul( &r[0], &r[0], &r[1] ); 1602 1603 STORE(&r[0], 0, chan_index); 1604 } 1605 break; 1606 1607 case TGSI_OPCODE_ADD: 1608 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1609 FETCH( &r[0], 0, chan_index ); 1610 FETCH( &r[1], 1, chan_index ); 1611 micro_add( &r[0], &r[0], &r[1] ); 1612 STORE( &r[0], 0, chan_index ); 1613 } 1614 break; 1615 1616 case TGSI_OPCODE_DP3: 1617 /* TGSI_OPCODE_DOT3 */ 1618 FETCH( &r[0], 0, CHAN_X ); 1619 FETCH( &r[1], 1, CHAN_X ); 1620 micro_mul( &r[0], &r[0], &r[1] ); 1621 1622 FETCH( &r[1], 0, CHAN_Y ); 1623 FETCH( &r[2], 1, CHAN_Y ); 1624 micro_mul( &r[1], &r[1], &r[2] ); 1625 micro_add( &r[0], &r[0], &r[1] ); 1626 1627 FETCH( &r[1], 0, CHAN_Z ); 1628 FETCH( &r[2], 1, CHAN_Z ); 1629 micro_mul( &r[1], &r[1], &r[2] ); 1630 micro_add( &r[0], &r[0], &r[1] ); 1631 1632 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1633 STORE( &r[0], 0, chan_index ); 1634 } 1635 break; 1636 1637 case TGSI_OPCODE_DP4: 1638 /* TGSI_OPCODE_DOT4 */ 1639 FETCH(&r[0], 0, CHAN_X); 1640 FETCH(&r[1], 1, CHAN_X); 1641 1642 micro_mul( &r[0], &r[0], &r[1] ); 1643 1644 FETCH(&r[1], 0, CHAN_Y); 1645 FETCH(&r[2], 1, CHAN_Y); 1646 1647 micro_mul( &r[1], &r[1], &r[2] ); 1648 micro_add( &r[0], &r[0], &r[1] ); 1649 1650 FETCH(&r[1], 0, CHAN_Z); 1651 FETCH(&r[2], 1, CHAN_Z); 1652 1653 micro_mul( &r[1], &r[1], &r[2] ); 1654 micro_add( &r[0], &r[0], &r[1] ); 1655 1656 FETCH(&r[1], 0, CHAN_W); 1657 FETCH(&r[2], 1, CHAN_W); 1658 1659 micro_mul( &r[1], &r[1], &r[2] ); 1660 micro_add( &r[0], &r[0], &r[1] ); 1661 1662 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1663 STORE( &r[0], 0, chan_index ); 1664 } 1665 break; 1666 1667 case TGSI_OPCODE_DST: 1668 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1669 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1670 } 1671 1672 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1673 FETCH( &r[0], 0, CHAN_Y ); 1674 FETCH( &r[1], 1, CHAN_Y); 1675 micro_mul( &r[0], &r[0], &r[1] ); 1676 STORE( &r[0], 0, CHAN_Y ); 1677 } 1678 1679 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1680 FETCH( &r[0], 0, CHAN_Z ); 1681 STORE( &r[0], 0, CHAN_Z ); 1682 } 1683 1684 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1685 FETCH( &r[0], 1, CHAN_W ); 1686 STORE( &r[0], 0, CHAN_W ); 1687 } 1688 break; 1689 1690 case TGSI_OPCODE_MIN: 1691 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1692 FETCH(&r[0], 0, chan_index); 1693 FETCH(&r[1], 1, chan_index); 1694 1695 /* XXX use micro_min()?? */ 1696 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 1697 1698 STORE(&r[0], 0, chan_index); 1699 } 1700 break; 1701 1702 case TGSI_OPCODE_MAX: 1703 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1704 FETCH(&r[0], 0, chan_index); 1705 FETCH(&r[1], 1, chan_index); 1706 1707 /* XXX use micro_max()?? */ 1708 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 1709 1710 STORE(&r[0], 0, chan_index ); 1711 } 1712 break; 1713 1714 case TGSI_OPCODE_SLT: 1715 /* TGSI_OPCODE_SETLT */ 1716 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1717 FETCH( &r[0], 0, chan_index ); 1718 FETCH( &r[1], 1, chan_index ); 1719 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1720 STORE( &r[0], 0, chan_index ); 1721 } 1722 break; 1723 1724 case TGSI_OPCODE_SGE: 1725 /* TGSI_OPCODE_SETGE */ 1726 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1727 FETCH( &r[0], 0, chan_index ); 1728 FETCH( &r[1], 1, chan_index ); 1729 micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1730 STORE( &r[0], 0, chan_index ); 1731 } 1732 break; 1733 1734 case TGSI_OPCODE_MAD: 1735 /* TGSI_OPCODE_MADD */ 1736 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1737 FETCH( &r[0], 0, chan_index ); 1738 FETCH( &r[1], 1, chan_index ); 1739 micro_mul( &r[0], &r[0], &r[1] ); 1740 FETCH( &r[1], 2, chan_index ); 1741 micro_add( &r[0], &r[0], &r[1] ); 1742 STORE( &r[0], 0, chan_index ); 1743 } 1744 break; 1745 1746 case TGSI_OPCODE_SUB: 1747 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1748 FETCH(&r[0], 0, chan_index); 1749 FETCH(&r[1], 1, chan_index); 1750 1751 micro_sub( &r[0], &r[0], &r[1] ); 1752 1753 STORE(&r[0], 0, chan_index); 1754 } 1755 break; 1756 1757 case TGSI_OPCODE_LERP: 1758 /* TGSI_OPCODE_LRP */ 1759 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1760 FETCH(&r[0], 0, chan_index); 1761 FETCH(&r[1], 1, chan_index); 1762 FETCH(&r[2], 2, chan_index); 1763 1764 micro_sub( &r[1], &r[1], &r[2] ); 1765 micro_mul( &r[0], &r[0], &r[1] ); 1766 micro_add( &r[0], &r[0], &r[2] ); 1767 1768 STORE(&r[0], 0, chan_index); 1769 } 1770 break; 1771 1772 case TGSI_OPCODE_CND: 1773 assert (0); 1774 break; 1775 1776 case TGSI_OPCODE_CND0: 1777 assert (0); 1778 break; 1779 1780 case TGSI_OPCODE_DOT2ADD: 1781 /* TGSI_OPCODE_DP2A */ 1782 assert (0); 1783 break; 1784 1785 case TGSI_OPCODE_INDEX: 1786 assert (0); 1787 break; 1788 1789 case TGSI_OPCODE_NEGATE: 1790 assert (0); 1791 break; 1792 1793 case TGSI_OPCODE_FRAC: 1794 /* TGSI_OPCODE_FRC */ 1795 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1796 FETCH( &r[0], 0, chan_index ); 1797 micro_frc( &r[0], &r[0] ); 1798 STORE( &r[0], 0, chan_index ); 1799 } 1800 break; 1801 1802 case TGSI_OPCODE_CLAMP: 1803 assert (0); 1804 break; 1805 1806 case TGSI_OPCODE_FLOOR: 1807 /* TGSI_OPCODE_FLR */ 1808 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1809 FETCH( &r[0], 0, chan_index ); 1810 micro_flr( &r[0], &r[0] ); 1811 STORE( &r[0], 0, chan_index ); 1812 } 1813 break; 1814 1815 case TGSI_OPCODE_ROUND: 1816 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1817 FETCH( &r[0], 0, chan_index ); 1818 micro_rnd( &r[0], &r[0] ); 1819 STORE( &r[0], 0, chan_index ); 1820 } 1821 break; 1822 1823 case TGSI_OPCODE_EXPBASE2: 1824 /* TGSI_OPCODE_EX2 */ 1825 FETCH(&r[0], 0, CHAN_X); 1826 1827 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 1828 1829 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1830 STORE( &r[0], 0, chan_index ); 1831 } 1832 break; 1833 1834 case TGSI_OPCODE_LOGBASE2: 1835 /* TGSI_OPCODE_LG2 */ 1836 FETCH( &r[0], 0, CHAN_X ); 1837 micro_lg2( &r[0], &r[0] ); 1838 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1839 STORE( &r[0], 0, chan_index ); 1840 } 1841 break; 1842 1843 case TGSI_OPCODE_POWER: 1844 /* TGSI_OPCODE_POW */ 1845 FETCH(&r[0], 0, CHAN_X); 1846 FETCH(&r[1], 1, CHAN_X); 1847 1848 micro_pow( &r[0], &r[0], &r[1] ); 1849 1850 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1851 STORE( &r[0], 0, chan_index ); 1852 } 1853 break; 1854 1855 case TGSI_OPCODE_CROSSPRODUCT: 1856 /* TGSI_OPCODE_XPD */ 1857 FETCH(&r[0], 0, CHAN_Y); 1858 FETCH(&r[1], 1, CHAN_Z); 1859 1860 micro_mul( &r[2], &r[0], &r[1] ); 1861 1862 FETCH(&r[3], 0, CHAN_Z); 1863 FETCH(&r[4], 1, CHAN_Y); 1864 1865 micro_mul( &r[5], &r[3], &r[4] ); 1866 micro_sub( &r[2], &r[2], &r[5] ); 1867 1868 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1869 STORE( &r[2], 0, CHAN_X ); 1870 } 1871 1872 FETCH(&r[2], 1, CHAN_X); 1873 1874 micro_mul( &r[3], &r[3], &r[2] ); 1875 1876 FETCH(&r[5], 0, CHAN_X); 1877 1878 micro_mul( &r[1], &r[1], &r[5] ); 1879 micro_sub( &r[3], &r[3], &r[1] ); 1880 1881 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1882 STORE( &r[3], 0, CHAN_Y ); 1883 } 1884 1885 micro_mul( &r[5], &r[5], &r[4] ); 1886 micro_mul( &r[0], &r[0], &r[2] ); 1887 micro_sub( &r[5], &r[5], &r[0] ); 1888 1889 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1890 STORE( &r[5], 0, CHAN_Z ); 1891 } 1892 1893 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1894 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1895 } 1896 break; 1897 1898 case TGSI_OPCODE_MULTIPLYMATRIX: 1899 assert (0); 1900 break; 1901 1902 case TGSI_OPCODE_ABS: 1903 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1904 FETCH(&r[0], 0, chan_index); 1905 1906 micro_abs( &r[0], &r[0] ); 1907 1908 STORE(&r[0], 0, chan_index); 1909 } 1910 break; 1911 1912 case TGSI_OPCODE_RCC: 1913 assert (0); 1914 break; 1915 1916 case TGSI_OPCODE_DPH: 1917 FETCH(&r[0], 0, CHAN_X); 1918 FETCH(&r[1], 1, CHAN_X); 1919 1920 micro_mul( &r[0], &r[0], &r[1] ); 1921 1922 FETCH(&r[1], 0, CHAN_Y); 1923 FETCH(&r[2], 1, CHAN_Y); 1924 1925 micro_mul( &r[1], &r[1], &r[2] ); 1926 micro_add( &r[0], &r[0], &r[1] ); 1927 1928 FETCH(&r[1], 0, CHAN_Z); 1929 FETCH(&r[2], 1, CHAN_Z); 1930 1931 micro_mul( &r[1], &r[1], &r[2] ); 1932 micro_add( &r[0], &r[0], &r[1] ); 1933 1934 FETCH(&r[1], 1, CHAN_W); 1935 1936 micro_add( &r[0], &r[0], &r[1] ); 1937 1938 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1939 STORE( &r[0], 0, chan_index ); 1940 } 1941 break; 1942 1943 case TGSI_OPCODE_COS: 1944 FETCH(&r[0], 0, CHAN_X); 1945 1946 micro_cos( &r[0], &r[0] ); 1947 1948 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1949 STORE( &r[0], 0, chan_index ); 1950 } 1951 break; 1952 1953 case TGSI_OPCODE_DDX: 1954 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1955 FETCH( &r[0], 0, chan_index ); 1956 micro_ddx( &r[0], &r[0] ); 1957 STORE( &r[0], 0, chan_index ); 1958 } 1959 break; 1960 1961 case TGSI_OPCODE_DDY: 1962 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1963 FETCH( &r[0], 0, chan_index ); 1964 micro_ddy( &r[0], &r[0] ); 1965 STORE( &r[0], 0, chan_index ); 1966 } 1967 break; 1968 1969 case TGSI_OPCODE_KILP: 1970 exec_kilp (mach, inst); 1971 break; 1972 1973 case TGSI_OPCODE_KIL: 1974 /* for enabled ExecMask bits, set the killed bit */ 1975 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; 1976 break; 1977 1978 case TGSI_OPCODE_PK2H: 1979 assert (0); 1980 break; 1981 1982 case TGSI_OPCODE_PK2US: 1983 assert (0); 1984 break; 1985 1986 case TGSI_OPCODE_PK4B: 1987 assert (0); 1988 break; 1989 1990 case TGSI_OPCODE_PK4UB: 1991 assert (0); 1992 break; 1993 1994 case TGSI_OPCODE_RFL: 1995 assert (0); 1996 break; 1997 1998 case TGSI_OPCODE_SEQ: 1999 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2000 FETCH( &r[0], 0, chan_index ); 2001 FETCH( &r[1], 1, chan_index ); 2002 micro_eq( &r[0], &r[0], &r[1], 2003 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2004 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2005 STORE( &r[0], 0, chan_index ); 2006 } 2007 break; 2008 2009 case TGSI_OPCODE_SFL: 2010 assert (0); 2011 break; 2012 2013 case TGSI_OPCODE_SGT: 2014 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2015 FETCH( &r[0], 0, chan_index ); 2016 FETCH( &r[1], 1, chan_index ); 2017 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2018 STORE( &r[0], 0, chan_index ); 2019 } 2020 break; 2021 2022 case TGSI_OPCODE_SIN: 2023 FETCH( &r[0], 0, CHAN_X ); 2024 micro_sin( &r[0], &r[0] ); 2025 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2026 STORE( &r[0], 0, chan_index ); 2027 } 2028 break; 2029 2030 case TGSI_OPCODE_SLE: 2031 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2032 FETCH( &r[0], 0, chan_index ); 2033 FETCH( &r[1], 1, chan_index ); 2034 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2035 STORE( &r[0], 0, chan_index ); 2036 } 2037 break; 2038 2039 case TGSI_OPCODE_SNE: 2040 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2041 FETCH( &r[0], 0, chan_index ); 2042 FETCH( &r[1], 1, chan_index ); 2043 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2044 STORE( &r[0], 0, chan_index ); 2045 } 2046 break; 2047 2048 case TGSI_OPCODE_STR: 2049 assert (0); 2050 break; 2051 2052 case TGSI_OPCODE_TEX: 2053 /* simple texture lookup */ 2054 /* src[0] = texcoord */ 2055 /* src[1] = sampler unit */ 2056 exec_tex(mach, inst, FALSE, FALSE); 2057 break; 2058 2059 case TGSI_OPCODE_TXB: 2060 /* Texture lookup with lod bias */ 2061 /* src[0] = texcoord (src[0].w = LOD bias) */ 2062 /* src[1] = sampler unit */ 2063 exec_tex(mach, inst, TRUE, FALSE); 2064 break; 2065 2066 case TGSI_OPCODE_TXD: 2067 /* Texture lookup with explict partial derivatives */ 2068 /* src[0] = texcoord */ 2069 /* src[1] = d[strq]/dx */ 2070 /* src[2] = d[strq]/dy */ 2071 /* src[3] = sampler unit */ 2072 assert (0); 2073 break; 2074 2075 case TGSI_OPCODE_TXL: 2076 /* Texture lookup with explit LOD */ 2077 /* src[0] = texcoord (src[0].w = LOD) */ 2078 /* src[1] = sampler unit */ 2079 exec_tex(mach, inst, TRUE, FALSE); 2080 break; 2081 2082 case TGSI_OPCODE_TXP: 2083 /* Texture lookup with projection */ 2084 /* src[0] = texcoord (src[0].w = projection) */ 2085 /* src[1] = sampler unit */ 2086 exec_tex(mach, inst, FALSE, TRUE); 2087 break; 2088 2089 case TGSI_OPCODE_UP2H: 2090 assert (0); 2091 break; 2092 2093 case TGSI_OPCODE_UP2US: 2094 assert (0); 2095 break; 2096 2097 case TGSI_OPCODE_UP4B: 2098 assert (0); 2099 break; 2100 2101 case TGSI_OPCODE_UP4UB: 2102 assert (0); 2103 break; 2104 2105 case TGSI_OPCODE_X2D: 2106 assert (0); 2107 break; 2108 2109 case TGSI_OPCODE_ARA: 2110 assert (0); 2111 break; 2112 2113 case TGSI_OPCODE_ARR: 2114 assert (0); 2115 break; 2116 2117 case TGSI_OPCODE_BRA: 2118 assert (0); 2119 break; 2120 2121 case TGSI_OPCODE_CAL: 2122 /* skip the call if no execution channels are enabled */ 2123 if (mach->ExecMask) { 2124 /* do the call */ 2125 2126 /* push the Cond, Loop, Cont stacks */ 2127 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2128 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2129 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2130 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2131 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2132 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2133 2134 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2135 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2136 2137 /* note that PC was already incremented above */ 2138 mach->CallStack[mach->CallStackTop++] = *pc; 2139 *pc = inst->InstructionExtLabel.Label; 2140 } 2141 break; 2142 2143 case TGSI_OPCODE_RET: 2144 mach->FuncMask &= ~mach->ExecMask; 2145 UPDATE_EXEC_MASK(mach); 2146 2147 if (mach->ExecMask == 0x0) { 2148 /* really return now (otherwise, keep executing */ 2149 2150 if (mach->CallStackTop == 0) { 2151 /* returning from main() */ 2152 *pc = -1; 2153 return; 2154 } 2155 *pc = mach->CallStack[--mach->CallStackTop]; 2156 2157 /* pop the Cond, Loop, Cont stacks */ 2158 assert(mach->CondStackTop > 0); 2159 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2160 assert(mach->LoopStackTop > 0); 2161 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2162 assert(mach->ContStackTop > 0); 2163 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2164 assert(mach->FuncStackTop > 0); 2165 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2166 2167 UPDATE_EXEC_MASK(mach); 2168 } 2169 break; 2170 2171 case TGSI_OPCODE_SSG: 2172 assert (0); 2173 break; 2174 2175 case TGSI_OPCODE_CMP: 2176 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2177 FETCH(&r[0], 0, chan_index); 2178 FETCH(&r[1], 1, chan_index); 2179 FETCH(&r[2], 2, chan_index); 2180 2181 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2182 2183 STORE(&r[0], 0, chan_index); 2184 } 2185 break; 2186 2187 case TGSI_OPCODE_SCS: 2188 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2189 FETCH( &r[0], 0, CHAN_X ); 2190 } 2191 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { 2192 micro_cos( &r[1], &r[0] ); 2193 STORE( &r[1], 0, CHAN_X ); 2194 } 2195 if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2196 micro_sin( &r[1], &r[0] ); 2197 STORE( &r[1], 0, CHAN_Y ); 2198 } 2199 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2200 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2201 } 2202 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2203 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2204 } 2205 break; 2206 2207 case TGSI_OPCODE_NRM: 2208 assert (0); 2209 break; 2210 2211 case TGSI_OPCODE_DIV: 2212 assert( 0 ); 2213 break; 2214 2215 case TGSI_OPCODE_DP2: 2216 FETCH( &r[0], 0, CHAN_X ); 2217 FETCH( &r[1], 1, CHAN_X ); 2218 micro_mul( &r[0], &r[0], &r[1] ); 2219 2220 FETCH( &r[1], 0, CHAN_Y ); 2221 FETCH( &r[2], 1, CHAN_Y ); 2222 micro_mul( &r[1], &r[1], &r[2] ); 2223 micro_add( &r[0], &r[0], &r[1] ); 2224 2225 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2226 STORE( &r[0], 0, chan_index ); 2227 } 2228 break; 2229 2230 case TGSI_OPCODE_IF: 2231 /* push CondMask */ 2232 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2233 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2234 FETCH( &r[0], 0, CHAN_X ); 2235 /* update CondMask */ 2236 if( ! r[0].u[0] ) { 2237 mach->CondMask &= ~0x1; 2238 } 2239 if( ! r[0].u[1] ) { 2240 mach->CondMask &= ~0x2; 2241 } 2242 if( ! r[0].u[2] ) { 2243 mach->CondMask &= ~0x4; 2244 } 2245 if( ! r[0].u[3] ) { 2246 mach->CondMask &= ~0x8; 2247 } 2248 UPDATE_EXEC_MASK(mach); 2249 /* Todo: If CondMask==0, jump to ELSE */ 2250 break; 2251 2252 case TGSI_OPCODE_ELSE: 2253 /* invert CondMask wrt previous mask */ 2254 { 2255 uint prevMask; 2256 assert(mach->CondStackTop > 0); 2257 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2258 mach->CondMask = ~mach->CondMask & prevMask; 2259 UPDATE_EXEC_MASK(mach); 2260 /* Todo: If CondMask==0, jump to ENDIF */ 2261 } 2262 break; 2263 2264 case TGSI_OPCODE_ENDIF: 2265 /* pop CondMask */ 2266 assert(mach->CondStackTop > 0); 2267 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2268 UPDATE_EXEC_MASK(mach); 2269 break; 2270 2271 case TGSI_OPCODE_END: 2272 /* halt execution */ 2273 *pc = -1; 2274 break; 2275 2276 case TGSI_OPCODE_REP: 2277 assert (0); 2278 break; 2279 2280 case TGSI_OPCODE_ENDREP: 2281 assert (0); 2282 break; 2283 2284 case TGSI_OPCODE_PUSHA: 2285 assert (0); 2286 break; 2287 2288 case TGSI_OPCODE_POPA: 2289 assert (0); 2290 break; 2291 2292 case TGSI_OPCODE_CEIL: 2293 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2294 FETCH( &r[0], 0, chan_index ); 2295 micro_ceil( &r[0], &r[0] ); 2296 STORE( &r[0], 0, chan_index ); 2297 } 2298 break; 2299 2300 case TGSI_OPCODE_I2F: 2301 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2302 FETCH( &r[0], 0, chan_index ); 2303 micro_i2f( &r[0], &r[0] ); 2304 STORE( &r[0], 0, chan_index ); 2305 } 2306 break; 2307 2308 case TGSI_OPCODE_NOT: 2309 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2310 FETCH( &r[0], 0, chan_index ); 2311 micro_not( &r[0], &r[0] ); 2312 STORE( &r[0], 0, chan_index ); 2313 } 2314 break; 2315 2316 case TGSI_OPCODE_TRUNC: 2317 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2318 FETCH( &r[0], 0, chan_index ); 2319 micro_trunc( &r[0], &r[0] ); 2320 STORE( &r[0], 0, chan_index ); 2321 } 2322 break; 2323 2324 case TGSI_OPCODE_SHL: 2325 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2326 FETCH( &r[0], 0, chan_index ); 2327 FETCH( &r[1], 1, chan_index ); 2328 micro_shl( &r[0], &r[0], &r[1] ); 2329 STORE( &r[0], 0, chan_index ); 2330 } 2331 break; 2332 2333 case TGSI_OPCODE_SHR: 2334 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2335 FETCH( &r[0], 0, chan_index ); 2336 FETCH( &r[1], 1, chan_index ); 2337 micro_ishr( &r[0], &r[0], &r[1] ); 2338 STORE( &r[0], 0, chan_index ); 2339 } 2340 break; 2341 2342 case TGSI_OPCODE_AND: 2343 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2344 FETCH( &r[0], 0, chan_index ); 2345 FETCH( &r[1], 1, chan_index ); 2346 micro_and( &r[0], &r[0], &r[1] ); 2347 STORE( &r[0], 0, chan_index ); 2348 } 2349 break; 2350 2351 case TGSI_OPCODE_OR: 2352 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2353 FETCH( &r[0], 0, chan_index ); 2354 FETCH( &r[1], 1, chan_index ); 2355 micro_or( &r[0], &r[0], &r[1] ); 2356 STORE( &r[0], 0, chan_index ); 2357 } 2358 break; 2359 2360 case TGSI_OPCODE_MOD: 2361 assert (0); 2362 break; 2363 2364 case TGSI_OPCODE_XOR: 2365 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2366 FETCH( &r[0], 0, chan_index ); 2367 FETCH( &r[1], 1, chan_index ); 2368 micro_xor( &r[0], &r[0], &r[1] ); 2369 STORE( &r[0], 0, chan_index ); 2370 } 2371 break; 2372 2373 case TGSI_OPCODE_SAD: 2374 assert (0); 2375 break; 2376 2377 case TGSI_OPCODE_TXF: 2378 assert (0); 2379 break; 2380 2381 case TGSI_OPCODE_TXQ: 2382 assert (0); 2383 break; 2384 2385 case TGSI_OPCODE_EMIT: 2386 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 2387 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2388 break; 2389 2390 case TGSI_OPCODE_ENDPRIM: 2391 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 2392 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 2393 break; 2394 2395 case TGSI_OPCODE_LOOP: 2396 /* fall-through (for now) */ 2397 case TGSI_OPCODE_BGNLOOP2: 2398 /* push LoopMask and ContMasks */ 2399 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2400 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2401 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2402 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2403 break; 2404 2405 case TGSI_OPCODE_ENDLOOP: 2406 /* fall-through (for now at least) */ 2407 case TGSI_OPCODE_ENDLOOP2: 2408 /* Restore ContMask, but don't pop */ 2409 assert(mach->ContStackTop > 0); 2410 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 2411 UPDATE_EXEC_MASK(mach); 2412 if (mach->ExecMask) { 2413 /* repeat loop: jump to instruction just past BGNLOOP */ 2414 *pc = inst->InstructionExtLabel.Label + 1; 2415 } 2416 else { 2417 /* exit loop: pop LoopMask */ 2418 assert(mach->LoopStackTop > 0); 2419 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 2420 /* pop ContMask */ 2421 assert(mach->ContStackTop > 0); 2422 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 2423 } 2424 UPDATE_EXEC_MASK(mach); 2425 break; 2426 2427 case TGSI_OPCODE_BRK: 2428 /* turn off loop channels for each enabled exec channel */ 2429 mach->LoopMask &= ~mach->ExecMask; 2430 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2431 UPDATE_EXEC_MASK(mach); 2432 break; 2433 2434 case TGSI_OPCODE_CONT: 2435 /* turn off cont channels for each enabled exec channel */ 2436 mach->ContMask &= ~mach->ExecMask; 2437 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2438 UPDATE_EXEC_MASK(mach); 2439 break; 2440 2441 case TGSI_OPCODE_BGNSUB: 2442 /* no-op */ 2443 break; 2444 2445 case TGSI_OPCODE_ENDSUB: 2446 /* no-op */ 2447 break; 2448 2449 case TGSI_OPCODE_NOISE1: 2450 assert( 0 ); 2451 break; 2452 2453 case TGSI_OPCODE_NOISE2: 2454 assert( 0 ); 2455 break; 2456 2457 case TGSI_OPCODE_NOISE3: 2458 assert( 0 ); 2459 break; 2460 2461 case TGSI_OPCODE_NOISE4: 2462 assert( 0 ); 2463 break; 2464 2465 case TGSI_OPCODE_NOP: 2466 break; 2467 2468 default: 2469 assert( 0 ); 2470 } 2471} 2472 2473 2474/** 2475 * Run TGSI interpreter. 2476 * \return bitmask of "alive" quad components 2477 */ 2478uint 2479tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 2480{ 2481 uint i; 2482 int pc = 0; 2483 2484 mach->CondMask = 0xf; 2485 mach->LoopMask = 0xf; 2486 mach->ContMask = 0xf; 2487 mach->FuncMask = 0xf; 2488 mach->ExecMask = 0xf; 2489 2490 mach->CondStackTop = 0; /* temporarily subvert this assertion */ 2491 assert(mach->CondStackTop == 0); 2492 assert(mach->LoopStackTop == 0); 2493 assert(mach->ContStackTop == 0); 2494 assert(mach->CallStackTop == 0); 2495 2496 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 2497 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 2498 2499 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 2500 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 2501 mach->Primitives[0] = 0; 2502 } 2503 2504 2505 /* execute declarations (interpolants) */ 2506 for (i = 0; i < mach->NumDeclarations; i++) { 2507 exec_declaration( mach, mach->Declarations+i ); 2508 } 2509 2510 /* execute instructions, until pc is set to -1 */ 2511 while (pc != -1) { 2512 assert(pc < (int) mach->NumInstructions); 2513 exec_instruction( mach, mach->Instructions + pc, &pc ); 2514 } 2515 2516#if 0 2517 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 2518 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 2519 /* 2520 * Scale back depth component. 2521 */ 2522 for (i = 0; i < 4; i++) 2523 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 2524 } 2525#endif 2526 2527 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 2528} 2529 2530 2531