tgsi_exec.c revision d68f024b7dd1891d4939bf56d3065acc225b9c81
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/u_memory.h" 62#include "util/u_math.h" 63 64 65#define FAST_MATH 1 66 67#define TILE_TOP_LEFT 0 68#define TILE_TOP_RIGHT 1 69#define TILE_BOTTOM_LEFT 2 70#define TILE_BOTTOM_RIGHT 3 71 72static void 73micro_abs(union tgsi_exec_channel *dst, 74 const union tgsi_exec_channel *src) 75{ 76 dst->f[0] = fabsf(src->f[0]); 77 dst->f[1] = fabsf(src->f[1]); 78 dst->f[2] = fabsf(src->f[2]); 79 dst->f[3] = fabsf(src->f[3]); 80} 81 82static void 83micro_arl(union tgsi_exec_channel *dst, 84 const union tgsi_exec_channel *src) 85{ 86 dst->i[0] = (int)floorf(src->f[0]); 87 dst->i[1] = (int)floorf(src->f[1]); 88 dst->i[2] = (int)floorf(src->f[2]); 89 dst->i[3] = (int)floorf(src->f[3]); 90} 91 92static void 93micro_arr(union tgsi_exec_channel *dst, 94 const union tgsi_exec_channel *src) 95{ 96 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 97 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 98 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 99 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 100} 101 102static void 103micro_ceil(union tgsi_exec_channel *dst, 104 const union tgsi_exec_channel *src) 105{ 106 dst->f[0] = ceilf(src->f[0]); 107 dst->f[1] = ceilf(src->f[1]); 108 dst->f[2] = ceilf(src->f[2]); 109 dst->f[3] = ceilf(src->f[3]); 110} 111 112static void 113micro_cos(union tgsi_exec_channel *dst, 114 const union tgsi_exec_channel *src) 115{ 116 dst->f[0] = cosf(src->f[0]); 117 dst->f[1] = cosf(src->f[1]); 118 dst->f[2] = cosf(src->f[2]); 119 dst->f[3] = cosf(src->f[3]); 120} 121 122static void 123micro_ddx(union tgsi_exec_channel *dst, 124 const union tgsi_exec_channel *src) 125{ 126 dst->f[0] = 127 dst->f[1] = 128 dst->f[2] = 129 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 130} 131 132static void 133micro_ddy(union tgsi_exec_channel *dst, 134 const union tgsi_exec_channel *src) 135{ 136 dst->f[0] = 137 dst->f[1] = 138 dst->f[2] = 139 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 140} 141 142static void 143micro_exp2(union tgsi_exec_channel *dst, 144 const union tgsi_exec_channel *src) 145{ 146#if FAST_MATH 147 dst->f[0] = util_fast_exp2(src->f[0]); 148 dst->f[1] = util_fast_exp2(src->f[1]); 149 dst->f[2] = util_fast_exp2(src->f[2]); 150 dst->f[3] = util_fast_exp2(src->f[3]); 151#else 152#if DEBUG 153 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 154 uint i; 155 union tgsi_exec_channel clamped; 156 157 for (i = 0; i < 4; i++) { 158 if (src->f[i] > 127.99999f) { 159 clamped.f[i] = 127.99999f; 160 } else if (src->f[i] < -126.99999f) { 161 clamped.f[i] = -126.99999f; 162 } else { 163 clamped.f[i] = src->f[i]; 164 } 165 } 166 src = &clamped; 167#endif /* DEBUG */ 168 169 dst->f[0] = powf(2.0f, src->f[0]); 170 dst->f[1] = powf(2.0f, src->f[1]); 171 dst->f[2] = powf(2.0f, src->f[2]); 172 dst->f[3] = powf(2.0f, src->f[3]); 173#endif /* FAST_MATH */ 174} 175 176static void 177micro_flr(union tgsi_exec_channel *dst, 178 const union tgsi_exec_channel *src) 179{ 180 dst->f[0] = floorf(src->f[0]); 181 dst->f[1] = floorf(src->f[1]); 182 dst->f[2] = floorf(src->f[2]); 183 dst->f[3] = floorf(src->f[3]); 184} 185 186static void 187micro_frc(union tgsi_exec_channel *dst, 188 const union tgsi_exec_channel *src) 189{ 190 dst->f[0] = src->f[0] - floorf(src->f[0]); 191 dst->f[1] = src->f[1] - floorf(src->f[1]); 192 dst->f[2] = src->f[2] - floorf(src->f[2]); 193 dst->f[3] = src->f[3] - floorf(src->f[3]); 194} 195 196static void 197micro_iabs(union tgsi_exec_channel *dst, 198 const union tgsi_exec_channel *src) 199{ 200 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 201 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 202 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 203 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 204} 205 206static void 207micro_ineg(union tgsi_exec_channel *dst, 208 const union tgsi_exec_channel *src) 209{ 210 dst->i[0] = -src->i[0]; 211 dst->i[1] = -src->i[1]; 212 dst->i[2] = -src->i[2]; 213 dst->i[3] = -src->i[3]; 214} 215 216static void 217micro_lg2(union tgsi_exec_channel *dst, 218 const union tgsi_exec_channel *src) 219{ 220#if FAST_MATH 221 dst->f[0] = util_fast_log2(src->f[0]); 222 dst->f[1] = util_fast_log2(src->f[1]); 223 dst->f[2] = util_fast_log2(src->f[2]); 224 dst->f[3] = util_fast_log2(src->f[3]); 225#else 226 dst->f[0] = logf(src->f[0]) * 1.442695f; 227 dst->f[1] = logf(src->f[1]) * 1.442695f; 228 dst->f[2] = logf(src->f[2]) * 1.442695f; 229 dst->f[3] = logf(src->f[3]) * 1.442695f; 230#endif 231} 232 233static void 234micro_lrp(union tgsi_exec_channel *dst, 235 const union tgsi_exec_channel *src) 236{ 237 dst->f[0] = src[0].f[0] * (src[1].f[0] - src[2].f[0]) + src[2].f[0]; 238 dst->f[1] = src[0].f[1] * (src[1].f[1] - src[2].f[1]) + src[2].f[1]; 239 dst->f[2] = src[0].f[2] * (src[1].f[2] - src[2].f[2]) + src[2].f[2]; 240 dst->f[3] = src[0].f[3] * (src[1].f[3] - src[2].f[3]) + src[2].f[3]; 241} 242 243static void 244micro_mad(union tgsi_exec_channel *dst, 245 const union tgsi_exec_channel *src) 246{ 247 dst->f[0] = src[0].f[0] * src[1].f[0] + src[2].f[0]; 248 dst->f[1] = src[0].f[1] * src[1].f[1] + src[2].f[1]; 249 dst->f[2] = src[0].f[2] * src[1].f[2] + src[2].f[2]; 250 dst->f[3] = src[0].f[3] * src[1].f[3] + src[2].f[3]; 251} 252 253static void 254micro_mov(union tgsi_exec_channel *dst, 255 const union tgsi_exec_channel *src) 256{ 257 dst->u[0] = src->u[0]; 258 dst->u[1] = src->u[1]; 259 dst->u[2] = src->u[2]; 260 dst->u[3] = src->u[3]; 261} 262 263static void 264micro_rcp(union tgsi_exec_channel *dst, 265 const union tgsi_exec_channel *src) 266{ 267#if 0 /* for debugging */ 268 assert(src->f[0] != 0.0f); 269 assert(src->f[1] != 0.0f); 270 assert(src->f[2] != 0.0f); 271 assert(src->f[3] != 0.0f); 272#endif 273 dst->f[0] = 1.0f / src->f[0]; 274 dst->f[1] = 1.0f / src->f[1]; 275 dst->f[2] = 1.0f / src->f[2]; 276 dst->f[3] = 1.0f / src->f[3]; 277} 278 279static void 280micro_rnd(union tgsi_exec_channel *dst, 281 const union tgsi_exec_channel *src) 282{ 283 dst->f[0] = floorf(src->f[0] + 0.5f); 284 dst->f[1] = floorf(src->f[1] + 0.5f); 285 dst->f[2] = floorf(src->f[2] + 0.5f); 286 dst->f[3] = floorf(src->f[3] + 0.5f); 287} 288 289static void 290micro_rsq(union tgsi_exec_channel *dst, 291 const union tgsi_exec_channel *src) 292{ 293#if 0 /* for debugging */ 294 assert(src->f[0] != 0.0f); 295 assert(src->f[1] != 0.0f); 296 assert(src->f[2] != 0.0f); 297 assert(src->f[3] != 0.0f); 298#endif 299 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); 300 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); 301 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); 302 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); 303} 304 305static void 306micro_seq(union tgsi_exec_channel *dst, 307 const union tgsi_exec_channel *src) 308{ 309 dst->f[0] = src[0].f[0] == src[1].f[0] ? 1.0f : 0.0f; 310 dst->f[1] = src[0].f[1] == src[1].f[1] ? 1.0f : 0.0f; 311 dst->f[2] = src[0].f[2] == src[1].f[2] ? 1.0f : 0.0f; 312 dst->f[3] = src[0].f[3] == src[1].f[3] ? 1.0f : 0.0f; 313} 314 315static void 316micro_sge(union tgsi_exec_channel *dst, 317 const union tgsi_exec_channel *src) 318{ 319 dst->f[0] = src[0].f[0] >= src[1].f[0] ? 1.0f : 0.0f; 320 dst->f[1] = src[0].f[1] >= src[1].f[1] ? 1.0f : 0.0f; 321 dst->f[2] = src[0].f[2] >= src[1].f[2] ? 1.0f : 0.0f; 322 dst->f[3] = src[0].f[3] >= src[1].f[3] ? 1.0f : 0.0f; 323} 324 325static void 326micro_sgn(union tgsi_exec_channel *dst, 327 const union tgsi_exec_channel *src) 328{ 329 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 330 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 331 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 332 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 333} 334 335static void 336micro_sgt(union tgsi_exec_channel *dst, 337 const union tgsi_exec_channel *src) 338{ 339 dst->f[0] = src[0].f[0] > src[1].f[0] ? 1.0f : 0.0f; 340 dst->f[1] = src[0].f[1] > src[1].f[1] ? 1.0f : 0.0f; 341 dst->f[2] = src[0].f[2] > src[1].f[2] ? 1.0f : 0.0f; 342 dst->f[3] = src[0].f[3] > src[1].f[3] ? 1.0f : 0.0f; 343} 344 345static void 346micro_sin(union tgsi_exec_channel *dst, 347 const union tgsi_exec_channel *src) 348{ 349 dst->f[0] = sinf(src->f[0]); 350 dst->f[1] = sinf(src->f[1]); 351 dst->f[2] = sinf(src->f[2]); 352 dst->f[3] = sinf(src->f[3]); 353} 354 355static void 356micro_sle(union tgsi_exec_channel *dst, 357 const union tgsi_exec_channel *src) 358{ 359 dst->f[0] = src[0].f[0] <= src[1].f[0] ? 1.0f : 0.0f; 360 dst->f[1] = src[0].f[1] <= src[1].f[1] ? 1.0f : 0.0f; 361 dst->f[2] = src[0].f[2] <= src[1].f[2] ? 1.0f : 0.0f; 362 dst->f[3] = src[0].f[3] <= src[1].f[3] ? 1.0f : 0.0f; 363} 364 365static void 366micro_slt(union tgsi_exec_channel *dst, 367 const union tgsi_exec_channel *src) 368{ 369 dst->f[0] = src[0].f[0] < src[1].f[0] ? 1.0f : 0.0f; 370 dst->f[1] = src[0].f[1] < src[1].f[1] ? 1.0f : 0.0f; 371 dst->f[2] = src[0].f[2] < src[1].f[2] ? 1.0f : 0.0f; 372 dst->f[3] = src[0].f[3] < src[1].f[3] ? 1.0f : 0.0f; 373} 374 375static void 376micro_sne(union tgsi_exec_channel *dst, 377 const union tgsi_exec_channel *src) 378{ 379 dst->f[0] = src[0].f[0] != src[1].f[0] ? 1.0f : 0.0f; 380 dst->f[1] = src[0].f[1] != src[1].f[1] ? 1.0f : 0.0f; 381 dst->f[2] = src[0].f[2] != src[1].f[2] ? 1.0f : 0.0f; 382 dst->f[3] = src[0].f[3] != src[1].f[3] ? 1.0f : 0.0f; 383} 384 385static void 386micro_trunc(union tgsi_exec_channel *dst, 387 const union tgsi_exec_channel *src) 388{ 389 dst->f[0] = (float)(int)src->f[0]; 390 dst->f[1] = (float)(int)src->f[1]; 391 dst->f[2] = (float)(int)src->f[2]; 392 dst->f[3] = (float)(int)src->f[3]; 393} 394 395 396#define CHAN_X 0 397#define CHAN_Y 1 398#define CHAN_Z 2 399#define CHAN_W 3 400 401enum tgsi_exec_datatype { 402 TGSI_EXEC_DATA_FLOAT, 403 TGSI_EXEC_DATA_INT, 404 TGSI_EXEC_DATA_UINT 405}; 406 407/* 408 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 409 */ 410#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 411#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 412#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 413#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 414#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 415#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 416#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 417#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 418#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 419#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 420#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 421#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 422#define TEMP_128_I TGSI_EXEC_TEMP_128_I 423#define TEMP_128_C TGSI_EXEC_TEMP_128_C 424#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 425#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 426#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 427#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 428#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 429#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 430#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 431#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 432#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 433#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 434#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 435#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 436#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 437#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 438#define TEMP_R0 TGSI_EXEC_TEMP_R0 439#define TEMP_P0 TGSI_EXEC_TEMP_P0 440 441#define IS_CHANNEL_ENABLED(INST, CHAN)\ 442 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 443 444#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 445 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) 446 447#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 448 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 449 if (IS_CHANNEL_ENABLED( INST, CHAN )) 450 451#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 452 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 453 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 454 455 456/** The execution mask depends on the conditional mask and the loop mask */ 457#define UPDATE_EXEC_MASK(MACH) \ 458 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 459 460 461static const union tgsi_exec_channel ZeroVec = 462 { { 0.0, 0.0, 0.0, 0.0 } }; 463 464static const union tgsi_exec_channel OneVec = { 465 {1.0f, 1.0f, 1.0f, 1.0f} 466}; 467 468 469/** 470 * Assert that none of the float values in 'chan' are infinite or NaN. 471 * NaN and Inf may occur normally during program execution and should 472 * not lead to crashes, etc. But when debugging, it's helpful to catch 473 * them. 474 */ 475static INLINE void 476check_inf_or_nan(const union tgsi_exec_channel *chan) 477{ 478 assert(!util_is_inf_or_nan((chan)->f[0])); 479 assert(!util_is_inf_or_nan((chan)->f[1])); 480 assert(!util_is_inf_or_nan((chan)->f[2])); 481 assert(!util_is_inf_or_nan((chan)->f[3])); 482} 483 484 485#ifdef DEBUG 486static void 487print_chan(const char *msg, const union tgsi_exec_channel *chan) 488{ 489 debug_printf("%s = {%f, %f, %f, %f}\n", 490 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 491} 492#endif 493 494 495#ifdef DEBUG 496static void 497print_temp(const struct tgsi_exec_machine *mach, uint index) 498{ 499 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 500 int i; 501 debug_printf("Temp[%u] =\n", index); 502 for (i = 0; i < 4; i++) { 503 debug_printf(" %c: { %f, %f, %f, %f }\n", 504 "XYZW"[i], 505 tmp->xyzw[i].f[0], 506 tmp->xyzw[i].f[1], 507 tmp->xyzw[i].f[2], 508 tmp->xyzw[i].f[3]); 509 } 510} 511#endif 512 513 514/** 515 * Check if there's a potential src/dst register data dependency when 516 * using SOA execution. 517 * Example: 518 * MOV T, T.yxwz; 519 * This would expand into: 520 * MOV t0, t1; 521 * MOV t1, t0; 522 * MOV t2, t3; 523 * MOV t3, t2; 524 * The second instruction will have the wrong value for t0 if executed as-is. 525 */ 526boolean 527tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 528{ 529 uint i, chan; 530 531 uint writemask = inst->Dst[0].Register.WriteMask; 532 if (writemask == TGSI_WRITEMASK_X || 533 writemask == TGSI_WRITEMASK_Y || 534 writemask == TGSI_WRITEMASK_Z || 535 writemask == TGSI_WRITEMASK_W || 536 writemask == TGSI_WRITEMASK_NONE) { 537 /* no chance of data dependency */ 538 return FALSE; 539 } 540 541 /* loop over src regs */ 542 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 543 if ((inst->Src[i].Register.File == 544 inst->Dst[0].Register.File) && 545 (inst->Src[i].Register.Index == 546 inst->Dst[0].Register.Index)) { 547 /* loop over dest channels */ 548 uint channelsWritten = 0x0; 549 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 550 /* check if we're reading a channel that's been written */ 551 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 552 if (channelsWritten & (1 << swizzle)) { 553 return TRUE; 554 } 555 556 channelsWritten |= (1 << chan); 557 } 558 } 559 } 560 return FALSE; 561} 562 563 564/** 565 * Initialize machine state by expanding tokens to full instructions, 566 * allocating temporary storage, setting up constants, etc. 567 * After this, we can call tgsi_exec_machine_run() many times. 568 */ 569void 570tgsi_exec_machine_bind_shader( 571 struct tgsi_exec_machine *mach, 572 const struct tgsi_token *tokens, 573 uint numSamplers, 574 struct tgsi_sampler **samplers) 575{ 576 uint k; 577 struct tgsi_parse_context parse; 578 struct tgsi_exec_labels *labels = &mach->Labels; 579 struct tgsi_full_instruction *instructions; 580 struct tgsi_full_declaration *declarations; 581 uint maxInstructions = 10, numInstructions = 0; 582 uint maxDeclarations = 10, numDeclarations = 0; 583 uint instno = 0; 584 585#if 0 586 tgsi_dump(tokens, 0); 587#endif 588 589 util_init_math(); 590 591 mach->Tokens = tokens; 592 mach->Samplers = samplers; 593 594 k = tgsi_parse_init (&parse, mach->Tokens); 595 if (k != TGSI_PARSE_OK) { 596 debug_printf( "Problem parsing!\n" ); 597 return; 598 } 599 600 mach->Processor = parse.FullHeader.Processor.Processor; 601 mach->ImmLimit = 0; 602 labels->count = 0; 603 604 declarations = (struct tgsi_full_declaration *) 605 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 606 607 if (!declarations) { 608 return; 609 } 610 611 instructions = (struct tgsi_full_instruction *) 612 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 613 614 if (!instructions) { 615 FREE( declarations ); 616 return; 617 } 618 619 while( !tgsi_parse_end_of_tokens( &parse ) ) { 620 uint pointer = parse.Position; 621 uint i; 622 623 tgsi_parse_token( &parse ); 624 switch( parse.FullToken.Token.Type ) { 625 case TGSI_TOKEN_TYPE_DECLARATION: 626 /* save expanded declaration */ 627 if (numDeclarations == maxDeclarations) { 628 declarations = REALLOC(declarations, 629 maxDeclarations 630 * sizeof(struct tgsi_full_declaration), 631 (maxDeclarations + 10) 632 * sizeof(struct tgsi_full_declaration)); 633 maxDeclarations += 10; 634 } 635 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 636 unsigned reg; 637 for (reg = parse.FullToken.FullDeclaration.Range.First; 638 reg <= parse.FullToken.FullDeclaration.Range.Last; 639 ++reg) { 640 ++mach->NumOutputs; 641 } 642 } 643 memcpy(declarations + numDeclarations, 644 &parse.FullToken.FullDeclaration, 645 sizeof(declarations[0])); 646 numDeclarations++; 647 break; 648 649 case TGSI_TOKEN_TYPE_IMMEDIATE: 650 { 651 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 652 assert( size <= 4 ); 653 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 654 655 for( i = 0; i < size; i++ ) { 656 mach->Imms[mach->ImmLimit][i] = 657 parse.FullToken.FullImmediate.u[i].Float; 658 } 659 mach->ImmLimit += 1; 660 } 661 break; 662 663 case TGSI_TOKEN_TYPE_INSTRUCTION: 664 assert( labels->count < MAX_LABELS ); 665 666 labels->labels[labels->count][0] = instno; 667 labels->labels[labels->count][1] = pointer; 668 labels->count++; 669 670 /* save expanded instruction */ 671 if (numInstructions == maxInstructions) { 672 instructions = REALLOC(instructions, 673 maxInstructions 674 * sizeof(struct tgsi_full_instruction), 675 (maxInstructions + 10) 676 * sizeof(struct tgsi_full_instruction)); 677 maxInstructions += 10; 678 } 679 680 memcpy(instructions + numInstructions, 681 &parse.FullToken.FullInstruction, 682 sizeof(instructions[0])); 683 684 numInstructions++; 685 break; 686 687 case TGSI_TOKEN_TYPE_PROPERTY: 688 break; 689 690 default: 691 assert( 0 ); 692 } 693 } 694 tgsi_parse_free (&parse); 695 696 if (mach->Declarations) { 697 FREE( mach->Declarations ); 698 } 699 mach->Declarations = declarations; 700 mach->NumDeclarations = numDeclarations; 701 702 if (mach->Instructions) { 703 FREE( mach->Instructions ); 704 } 705 mach->Instructions = instructions; 706 mach->NumInstructions = numInstructions; 707} 708 709 710struct tgsi_exec_machine * 711tgsi_exec_machine_create( void ) 712{ 713 struct tgsi_exec_machine *mach; 714 uint i; 715 716 mach = align_malloc( sizeof *mach, 16 ); 717 if (!mach) 718 goto fail; 719 720 memset(mach, 0, sizeof(*mach)); 721 722 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 723 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 724 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 725 726 /* Setup constants. */ 727 for( i = 0; i < 4; i++ ) { 728 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 729 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 730 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 731 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 732 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 733 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 734 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 735 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 736 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 737 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 738 } 739 740#ifdef DEBUG 741 /* silence warnings */ 742 (void) print_chan; 743 (void) print_temp; 744#endif 745 746 return mach; 747 748fail: 749 align_free(mach); 750 return NULL; 751} 752 753 754void 755tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 756{ 757 if (mach) { 758 FREE(mach->Instructions); 759 FREE(mach->Declarations); 760 } 761 762 align_free(mach); 763} 764 765static void 766micro_add( 767 union tgsi_exec_channel *dst, 768 const union tgsi_exec_channel *src0, 769 const union tgsi_exec_channel *src1 ) 770{ 771 dst->f[0] = src0->f[0] + src1->f[0]; 772 dst->f[1] = src0->f[1] + src1->f[1]; 773 dst->f[2] = src0->f[2] + src1->f[2]; 774 dst->f[3] = src0->f[3] + src1->f[3]; 775} 776 777static void 778micro_div( 779 union tgsi_exec_channel *dst, 780 const union tgsi_exec_channel *src0, 781 const union tgsi_exec_channel *src1 ) 782{ 783 if (src1->f[0] != 0) { 784 dst->f[0] = src0->f[0] / src1->f[0]; 785 } 786 if (src1->f[1] != 0) { 787 dst->f[1] = src0->f[1] / src1->f[1]; 788 } 789 if (src1->f[2] != 0) { 790 dst->f[2] = src0->f[2] / src1->f[2]; 791 } 792 if (src1->f[3] != 0) { 793 dst->f[3] = src0->f[3] / src1->f[3]; 794 } 795} 796 797static void 798micro_float_clamp(union tgsi_exec_channel *dst, 799 const union tgsi_exec_channel *src) 800{ 801 uint i; 802 803 for (i = 0; i < 4; i++) { 804 if (src->f[i] > 0.0f) { 805 if (src->f[i] > 1.884467e+019f) 806 dst->f[i] = 1.884467e+019f; 807 else if (src->f[i] < 5.42101e-020f) 808 dst->f[i] = 5.42101e-020f; 809 else 810 dst->f[i] = src->f[i]; 811 } 812 else { 813 if (src->f[i] < -1.884467e+019f) 814 dst->f[i] = -1.884467e+019f; 815 else if (src->f[i] > -5.42101e-020f) 816 dst->f[i] = -5.42101e-020f; 817 else 818 dst->f[i] = src->f[i]; 819 } 820 } 821} 822 823static void 824micro_lt( 825 union tgsi_exec_channel *dst, 826 const union tgsi_exec_channel *src0, 827 const union tgsi_exec_channel *src1, 828 const union tgsi_exec_channel *src2, 829 const union tgsi_exec_channel *src3 ) 830{ 831 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 832 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 833 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 834 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 835} 836 837static void 838micro_max( 839 union tgsi_exec_channel *dst, 840 const union tgsi_exec_channel *src0, 841 const union tgsi_exec_channel *src1 ) 842{ 843 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 844 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 845 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 846 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 847} 848 849static void 850micro_min( 851 union tgsi_exec_channel *dst, 852 const union tgsi_exec_channel *src0, 853 const union tgsi_exec_channel *src1 ) 854{ 855 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 856 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 857 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 858 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 859} 860 861static void 862micro_mul( 863 union tgsi_exec_channel *dst, 864 const union tgsi_exec_channel *src0, 865 const union tgsi_exec_channel *src1 ) 866{ 867 dst->f[0] = src0->f[0] * src1->f[0]; 868 dst->f[1] = src0->f[1] * src1->f[1]; 869 dst->f[2] = src0->f[2] * src1->f[2]; 870 dst->f[3] = src0->f[3] * src1->f[3]; 871} 872 873#if 0 874static void 875micro_imul64( 876 union tgsi_exec_channel *dst0, 877 union tgsi_exec_channel *dst1, 878 const union tgsi_exec_channel *src0, 879 const union tgsi_exec_channel *src1 ) 880{ 881 dst1->i[0] = src0->i[0] * src1->i[0]; 882 dst1->i[1] = src0->i[1] * src1->i[1]; 883 dst1->i[2] = src0->i[2] * src1->i[2]; 884 dst1->i[3] = src0->i[3] * src1->i[3]; 885 dst0->i[0] = 0; 886 dst0->i[1] = 0; 887 dst0->i[2] = 0; 888 dst0->i[3] = 0; 889} 890#endif 891 892#if 0 893static void 894micro_umul64( 895 union tgsi_exec_channel *dst0, 896 union tgsi_exec_channel *dst1, 897 const union tgsi_exec_channel *src0, 898 const union tgsi_exec_channel *src1 ) 899{ 900 dst1->u[0] = src0->u[0] * src1->u[0]; 901 dst1->u[1] = src0->u[1] * src1->u[1]; 902 dst1->u[2] = src0->u[2] * src1->u[2]; 903 dst1->u[3] = src0->u[3] * src1->u[3]; 904 dst0->u[0] = 0; 905 dst0->u[1] = 0; 906 dst0->u[2] = 0; 907 dst0->u[3] = 0; 908} 909#endif 910 911 912#if 0 913static void 914micro_movc( 915 union tgsi_exec_channel *dst, 916 const union tgsi_exec_channel *src0, 917 const union tgsi_exec_channel *src1, 918 const union tgsi_exec_channel *src2 ) 919{ 920 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 921 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 922 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 923 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 924} 925#endif 926 927static void 928micro_neg( 929 union tgsi_exec_channel *dst, 930 const union tgsi_exec_channel *src ) 931{ 932 dst->f[0] = -src->f[0]; 933 dst->f[1] = -src->f[1]; 934 dst->f[2] = -src->f[2]; 935 dst->f[3] = -src->f[3]; 936} 937 938static void 939micro_pow( 940 union tgsi_exec_channel *dst, 941 const union tgsi_exec_channel *src0, 942 const union tgsi_exec_channel *src1 ) 943{ 944#if FAST_MATH 945 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 946 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 947 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 948 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 949#else 950 dst->f[0] = powf( src0->f[0], src1->f[0] ); 951 dst->f[1] = powf( src0->f[1], src1->f[1] ); 952 dst->f[2] = powf( src0->f[2], src1->f[2] ); 953 dst->f[3] = powf( src0->f[3], src1->f[3] ); 954#endif 955} 956 957static void 958micro_sqrt( union tgsi_exec_channel *dst, 959 const union tgsi_exec_channel *src ) 960{ 961 dst->f[0] = sqrtf( src->f[0] ); 962 dst->f[1] = sqrtf( src->f[1] ); 963 dst->f[2] = sqrtf( src->f[2] ); 964 dst->f[3] = sqrtf( src->f[3] ); 965} 966 967static void 968micro_sub( 969 union tgsi_exec_channel *dst, 970 const union tgsi_exec_channel *src0, 971 const union tgsi_exec_channel *src1 ) 972{ 973 dst->f[0] = src0->f[0] - src1->f[0]; 974 dst->f[1] = src0->f[1] - src1->f[1]; 975 dst->f[2] = src0->f[2] - src1->f[2]; 976 dst->f[3] = src0->f[3] - src1->f[3]; 977} 978 979static void 980fetch_src_file_channel(const struct tgsi_exec_machine *mach, 981 const uint file, 982 const uint swizzle, 983 const union tgsi_exec_channel *index, 984 const union tgsi_exec_channel *index2D, 985 union tgsi_exec_channel *chan) 986{ 987 uint i; 988 989 switch (file) { 990 case TGSI_FILE_CONSTANT: 991 for (i = 0; i < QUAD_SIZE; i++) { 992 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 993 assert(mach->Consts[index2D->i[i]]); 994 995 if (index->i[i] < 0) { 996 chan->u[i] = 0; 997 } else { 998 const uint *p = (const uint *)mach->Consts[index2D->i[i]]; 999 1000 chan->u[i] = p[index->i[i] * 4 + swizzle]; 1001 } 1002 } 1003 break; 1004 1005 case TGSI_FILE_INPUT: 1006 case TGSI_FILE_SYSTEM_VALUE: 1007 for (i = 0; i < QUAD_SIZE; i++) { 1008 /* XXX: 2D indexing */ 1009 chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; 1010 } 1011 break; 1012 1013 case TGSI_FILE_TEMPORARY: 1014 for (i = 0; i < QUAD_SIZE; i++) { 1015 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1016 assert(index2D->i[i] == 0); 1017 1018 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1019 } 1020 break; 1021 1022 case TGSI_FILE_IMMEDIATE: 1023 for (i = 0; i < QUAD_SIZE; i++) { 1024 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1025 assert(index2D->i[i] == 0); 1026 1027 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1028 } 1029 break; 1030 1031 case TGSI_FILE_ADDRESS: 1032 for (i = 0; i < QUAD_SIZE; i++) { 1033 assert(index->i[i] >= 0); 1034 assert(index2D->i[i] == 0); 1035 1036 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1037 } 1038 break; 1039 1040 case TGSI_FILE_PREDICATE: 1041 for (i = 0; i < QUAD_SIZE; i++) { 1042 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); 1043 assert(index2D->i[i] == 0); 1044 1045 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; 1046 } 1047 break; 1048 1049 case TGSI_FILE_OUTPUT: 1050 /* vertex/fragment output vars can be read too */ 1051 for (i = 0; i < QUAD_SIZE; i++) { 1052 assert(index->i[i] >= 0); 1053 assert(index2D->i[i] == 0); 1054 1055 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1056 } 1057 break; 1058 1059 default: 1060 assert(0); 1061 for (i = 0; i < QUAD_SIZE; i++) { 1062 chan->u[i] = 0; 1063 } 1064 } 1065} 1066 1067static void 1068fetch_source(const struct tgsi_exec_machine *mach, 1069 union tgsi_exec_channel *chan, 1070 const struct tgsi_full_src_register *reg, 1071 const uint chan_index, 1072 enum tgsi_exec_datatype src_datatype) 1073{ 1074 union tgsi_exec_channel index; 1075 union tgsi_exec_channel index2D; 1076 uint swizzle; 1077 1078 /* We start with a direct index into a register file. 1079 * 1080 * file[1], 1081 * where: 1082 * file = Register.File 1083 * [1] = Register.Index 1084 */ 1085 index.i[0] = 1086 index.i[1] = 1087 index.i[2] = 1088 index.i[3] = reg->Register.Index; 1089 1090 /* There is an extra source register that indirectly subscripts 1091 * a register file. The direct index now becomes an offset 1092 * that is being added to the indirect register. 1093 * 1094 * file[ind[2].x+1], 1095 * where: 1096 * ind = Indirect.File 1097 * [2] = Indirect.Index 1098 * .x = Indirect.SwizzleX 1099 */ 1100 if (reg->Register.Indirect) { 1101 union tgsi_exec_channel index2; 1102 union tgsi_exec_channel indir_index; 1103 const uint execmask = mach->ExecMask; 1104 uint i; 1105 1106 /* which address register (always zero now) */ 1107 index2.i[0] = 1108 index2.i[1] = 1109 index2.i[2] = 1110 index2.i[3] = reg->Indirect.Index; 1111 1112 /* get current value of address register[swizzle] */ 1113 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1114 fetch_src_file_channel(mach, 1115 reg->Indirect.File, 1116 swizzle, 1117 &index2, 1118 &ZeroVec, 1119 &indir_index); 1120 1121 /* add value of address register to the offset */ 1122 index.i[0] += indir_index.i[0]; 1123 index.i[1] += indir_index.i[1]; 1124 index.i[2] += indir_index.i[2]; 1125 index.i[3] += indir_index.i[3]; 1126 1127 /* for disabled execution channels, zero-out the index to 1128 * avoid using a potential garbage value. 1129 */ 1130 for (i = 0; i < QUAD_SIZE; i++) { 1131 if ((execmask & (1 << i)) == 0) 1132 index.i[i] = 0; 1133 } 1134 } 1135 1136 /* There is an extra source register that is a second 1137 * subscript to a register file. Effectively it means that 1138 * the register file is actually a 2D array of registers. 1139 * 1140 * file[3][1], 1141 * where: 1142 * [3] = Dimension.Index 1143 */ 1144 if (reg->Register.Dimension) { 1145 index2D.i[0] = 1146 index2D.i[1] = 1147 index2D.i[2] = 1148 index2D.i[3] = reg->Dimension.Index; 1149 1150 /* Again, the second subscript index can be addressed indirectly 1151 * identically to the first one. 1152 * Nothing stops us from indirectly addressing the indirect register, 1153 * but there is no need for that, so we won't exercise it. 1154 * 1155 * file[ind[4].y+3][1], 1156 * where: 1157 * ind = DimIndirect.File 1158 * [4] = DimIndirect.Index 1159 * .y = DimIndirect.SwizzleX 1160 */ 1161 if (reg->Dimension.Indirect) { 1162 union tgsi_exec_channel index2; 1163 union tgsi_exec_channel indir_index; 1164 const uint execmask = mach->ExecMask; 1165 uint i; 1166 1167 index2.i[0] = 1168 index2.i[1] = 1169 index2.i[2] = 1170 index2.i[3] = reg->DimIndirect.Index; 1171 1172 swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); 1173 fetch_src_file_channel(mach, 1174 reg->DimIndirect.File, 1175 swizzle, 1176 &index2, 1177 &ZeroVec, 1178 &indir_index); 1179 1180 index2D.i[0] += indir_index.i[0]; 1181 index2D.i[1] += indir_index.i[1]; 1182 index2D.i[2] += indir_index.i[2]; 1183 index2D.i[3] += indir_index.i[3]; 1184 1185 /* for disabled execution channels, zero-out the index to 1186 * avoid using a potential garbage value. 1187 */ 1188 for (i = 0; i < QUAD_SIZE; i++) { 1189 if ((execmask & (1 << i)) == 0) { 1190 index2D.i[i] = 0; 1191 } 1192 } 1193 } 1194 1195 /* If by any chance there was a need for a 3D array of register 1196 * files, we would have to check whether Dimension is followed 1197 * by a dimension register and continue the saga. 1198 */ 1199 } else { 1200 index2D.i[0] = 1201 index2D.i[1] = 1202 index2D.i[2] = 1203 index2D.i[3] = 0; 1204 } 1205 1206 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1207 fetch_src_file_channel(mach, 1208 reg->Register.File, 1209 swizzle, 1210 &index, 1211 &index2D, 1212 chan); 1213 1214 if (reg->Register.Absolute) { 1215 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1216 micro_abs(chan, chan); 1217 } else { 1218 micro_iabs(chan, chan); 1219 } 1220 } 1221 1222 if (reg->Register.Negate) { 1223 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1224 micro_neg(chan, chan); 1225 } else { 1226 micro_ineg(chan, chan); 1227 } 1228 } 1229} 1230 1231static void 1232store_dest(struct tgsi_exec_machine *mach, 1233 const union tgsi_exec_channel *chan, 1234 const struct tgsi_full_dst_register *reg, 1235 const struct tgsi_full_instruction *inst, 1236 uint chan_index, 1237 enum tgsi_exec_datatype dst_datatype) 1238{ 1239 uint i; 1240 union tgsi_exec_channel null; 1241 union tgsi_exec_channel *dst; 1242 uint execmask = mach->ExecMask; 1243 int offset = 0; /* indirection offset */ 1244 int index; 1245 1246 /* for debugging */ 1247 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1248 check_inf_or_nan(chan); 1249 } 1250 1251 /* There is an extra source register that indirectly subscripts 1252 * a register file. The direct index now becomes an offset 1253 * that is being added to the indirect register. 1254 * 1255 * file[ind[2].x+1], 1256 * where: 1257 * ind = Indirect.File 1258 * [2] = Indirect.Index 1259 * .x = Indirect.SwizzleX 1260 */ 1261 if (reg->Register.Indirect) { 1262 union tgsi_exec_channel index; 1263 union tgsi_exec_channel indir_index; 1264 uint swizzle; 1265 1266 /* which address register (always zero for now) */ 1267 index.i[0] = 1268 index.i[1] = 1269 index.i[2] = 1270 index.i[3] = reg->Indirect.Index; 1271 1272 /* get current value of address register[swizzle] */ 1273 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1274 1275 /* fetch values from the address/indirection register */ 1276 fetch_src_file_channel(mach, 1277 reg->Indirect.File, 1278 swizzle, 1279 &index, 1280 &ZeroVec, 1281 &indir_index); 1282 1283 /* save indirection offset */ 1284 offset = indir_index.i[0]; 1285 } 1286 1287 switch (reg->Register.File) { 1288 case TGSI_FILE_NULL: 1289 dst = &null; 1290 break; 1291 1292 case TGSI_FILE_OUTPUT: 1293 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1294 + reg->Register.Index; 1295 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1296#if 0 1297 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1298 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1299 for (i = 0; i < QUAD_SIZE; i++) 1300 if (execmask & (1 << i)) 1301 fprintf(stderr, "%f, ", chan->f[i]); 1302 fprintf(stderr, ")\n"); 1303 } 1304#endif 1305 break; 1306 1307 case TGSI_FILE_TEMPORARY: 1308 index = reg->Register.Index; 1309 assert( index < TGSI_EXEC_NUM_TEMPS ); 1310 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1311 break; 1312 1313 case TGSI_FILE_ADDRESS: 1314 index = reg->Register.Index; 1315 dst = &mach->Addrs[index].xyzw[chan_index]; 1316 break; 1317 1318 case TGSI_FILE_LOOP: 1319 assert(reg->Register.Index == 0); 1320 assert(mach->LoopCounterStackTop > 0); 1321 assert(chan_index == CHAN_X); 1322 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; 1323 break; 1324 1325 case TGSI_FILE_PREDICATE: 1326 index = reg->Register.Index; 1327 assert(index < TGSI_EXEC_NUM_PREDS); 1328 dst = &mach->Predicates[index].xyzw[chan_index]; 1329 break; 1330 1331 default: 1332 assert( 0 ); 1333 return; 1334 } 1335 1336 if (inst->Instruction.Predicate) { 1337 uint swizzle; 1338 union tgsi_exec_channel *pred; 1339 1340 switch (chan_index) { 1341 case CHAN_X: 1342 swizzle = inst->Predicate.SwizzleX; 1343 break; 1344 case CHAN_Y: 1345 swizzle = inst->Predicate.SwizzleY; 1346 break; 1347 case CHAN_Z: 1348 swizzle = inst->Predicate.SwizzleZ; 1349 break; 1350 case CHAN_W: 1351 swizzle = inst->Predicate.SwizzleW; 1352 break; 1353 default: 1354 assert(0); 1355 return; 1356 } 1357 1358 assert(inst->Predicate.Index == 0); 1359 1360 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; 1361 1362 if (inst->Predicate.Negate) { 1363 for (i = 0; i < QUAD_SIZE; i++) { 1364 if (pred->u[i]) { 1365 execmask &= ~(1 << i); 1366 } 1367 } 1368 } else { 1369 for (i = 0; i < QUAD_SIZE; i++) { 1370 if (!pred->u[i]) { 1371 execmask &= ~(1 << i); 1372 } 1373 } 1374 } 1375 } 1376 1377 switch (inst->Instruction.Saturate) { 1378 case TGSI_SAT_NONE: 1379 for (i = 0; i < QUAD_SIZE; i++) 1380 if (execmask & (1 << i)) 1381 dst->i[i] = chan->i[i]; 1382 break; 1383 1384 case TGSI_SAT_ZERO_ONE: 1385 for (i = 0; i < QUAD_SIZE; i++) 1386 if (execmask & (1 << i)) { 1387 if (chan->f[i] < 0.0f) 1388 dst->f[i] = 0.0f; 1389 else if (chan->f[i] > 1.0f) 1390 dst->f[i] = 1.0f; 1391 else 1392 dst->i[i] = chan->i[i]; 1393 } 1394 break; 1395 1396 case TGSI_SAT_MINUS_PLUS_ONE: 1397 for (i = 0; i < QUAD_SIZE; i++) 1398 if (execmask & (1 << i)) { 1399 if (chan->f[i] < -1.0f) 1400 dst->f[i] = -1.0f; 1401 else if (chan->f[i] > 1.0f) 1402 dst->f[i] = 1.0f; 1403 else 1404 dst->i[i] = chan->i[i]; 1405 } 1406 break; 1407 1408 default: 1409 assert( 0 ); 1410 } 1411} 1412 1413#define FETCH(VAL,INDEX,CHAN)\ 1414 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1415 1416#define STORE(VAL,INDEX,CHAN)\ 1417 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) 1418 1419 1420/** 1421 * Execute ARB-style KIL which is predicated by a src register. 1422 * Kill fragment if any of the four values is less than zero. 1423 */ 1424static void 1425exec_kil(struct tgsi_exec_machine *mach, 1426 const struct tgsi_full_instruction *inst) 1427{ 1428 uint uniquemask; 1429 uint chan_index; 1430 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1431 union tgsi_exec_channel r[1]; 1432 1433 /* This mask stores component bits that were already tested. */ 1434 uniquemask = 0; 1435 1436 for (chan_index = 0; chan_index < 4; chan_index++) 1437 { 1438 uint swizzle; 1439 uint i; 1440 1441 /* unswizzle channel */ 1442 swizzle = tgsi_util_get_full_src_register_swizzle ( 1443 &inst->Src[0], 1444 chan_index); 1445 1446 /* check if the component has not been already tested */ 1447 if (uniquemask & (1 << swizzle)) 1448 continue; 1449 uniquemask |= 1 << swizzle; 1450 1451 FETCH(&r[0], 0, chan_index); 1452 for (i = 0; i < 4; i++) 1453 if (r[0].f[i] < 0.0f) 1454 kilmask |= 1 << i; 1455 } 1456 1457 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1458} 1459 1460/** 1461 * Execute NVIDIA-style KIL which is predicated by a condition code. 1462 * Kill fragment if the condition code is TRUE. 1463 */ 1464static void 1465exec_kilp(struct tgsi_exec_machine *mach, 1466 const struct tgsi_full_instruction *inst) 1467{ 1468 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1469 1470 /* "unconditional" kil */ 1471 kilmask = mach->ExecMask; 1472 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1473} 1474 1475static void 1476emit_vertex(struct tgsi_exec_machine *mach) 1477{ 1478 /* FIXME: check for exec mask correctly 1479 unsigned i; 1480 for (i = 0; i < QUAD_SIZE; ++i) { 1481 if ((mach->ExecMask & (1 << i))) 1482 */ 1483 if (mach->ExecMask) { 1484 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 1485 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 1486 } 1487} 1488 1489static void 1490emit_primitive(struct tgsi_exec_machine *mach) 1491{ 1492 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 1493 /* FIXME: check for exec mask correctly 1494 unsigned i; 1495 for (i = 0; i < QUAD_SIZE; ++i) { 1496 if ((mach->ExecMask & (1 << i))) 1497 */ 1498 if (mach->ExecMask) { 1499 ++(*prim_count); 1500 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 1501 mach->Primitives[*prim_count] = 0; 1502 } 1503} 1504 1505/* 1506 * Fetch four texture samples using STR texture coordinates. 1507 */ 1508static void 1509fetch_texel( struct tgsi_sampler *sampler, 1510 const union tgsi_exec_channel *s, 1511 const union tgsi_exec_channel *t, 1512 const union tgsi_exec_channel *p, 1513 const union tgsi_exec_channel *c0, 1514 enum tgsi_sampler_control control, 1515 union tgsi_exec_channel *r, 1516 union tgsi_exec_channel *g, 1517 union tgsi_exec_channel *b, 1518 union tgsi_exec_channel *a ) 1519{ 1520 uint j; 1521 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1522 1523 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); 1524 1525 for (j = 0; j < 4; j++) { 1526 r->f[j] = rgba[0][j]; 1527 g->f[j] = rgba[1][j]; 1528 b->f[j] = rgba[2][j]; 1529 a->f[j] = rgba[3][j]; 1530 } 1531} 1532 1533 1534#define TEX_MODIFIER_NONE 0 1535#define TEX_MODIFIER_PROJECTED 1 1536#define TEX_MODIFIER_LOD_BIAS 2 1537#define TEX_MODIFIER_EXPLICIT_LOD 3 1538 1539 1540static void 1541exec_tex(struct tgsi_exec_machine *mach, 1542 const struct tgsi_full_instruction *inst, 1543 uint modifier) 1544{ 1545 const uint unit = inst->Src[1].Register.Index; 1546 union tgsi_exec_channel r[4]; 1547 const union tgsi_exec_channel *lod = &ZeroVec; 1548 enum tgsi_sampler_control control; 1549 uint chan_index; 1550 1551 if (modifier != TEX_MODIFIER_NONE) { 1552 FETCH(&r[3], 0, CHAN_W); 1553 if (modifier != TEX_MODIFIER_PROJECTED) { 1554 lod = &r[3]; 1555 } 1556 } 1557 1558 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 1559 control = tgsi_sampler_lod_explicit; 1560 } else { 1561 control = tgsi_sampler_lod_bias; 1562 } 1563 1564 switch (inst->Texture.Texture) { 1565 case TGSI_TEXTURE_1D: 1566 case TGSI_TEXTURE_SHADOW1D: 1567 FETCH(&r[0], 0, CHAN_X); 1568 1569 if (modifier == TEX_MODIFIER_PROJECTED) { 1570 micro_div(&r[0], &r[0], &r[3]); 1571 } 1572 1573 fetch_texel(mach->Samplers[unit], 1574 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ 1575 control, 1576 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1577 break; 1578 1579 case TGSI_TEXTURE_2D: 1580 case TGSI_TEXTURE_RECT: 1581 case TGSI_TEXTURE_SHADOW2D: 1582 case TGSI_TEXTURE_SHADOWRECT: 1583 FETCH(&r[0], 0, CHAN_X); 1584 FETCH(&r[1], 0, CHAN_Y); 1585 FETCH(&r[2], 0, CHAN_Z); 1586 1587 if (modifier == TEX_MODIFIER_PROJECTED) { 1588 micro_div(&r[0], &r[0], &r[3]); 1589 micro_div(&r[1], &r[1], &r[3]); 1590 micro_div(&r[2], &r[2], &r[3]); 1591 } 1592 1593 fetch_texel(mach->Samplers[unit], 1594 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ 1595 control, 1596 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1597 break; 1598 1599 case TGSI_TEXTURE_3D: 1600 case TGSI_TEXTURE_CUBE: 1601 FETCH(&r[0], 0, CHAN_X); 1602 FETCH(&r[1], 0, CHAN_Y); 1603 FETCH(&r[2], 0, CHAN_Z); 1604 1605 if (modifier == TEX_MODIFIER_PROJECTED) { 1606 micro_div(&r[0], &r[0], &r[3]); 1607 micro_div(&r[1], &r[1], &r[3]); 1608 micro_div(&r[2], &r[2], &r[3]); 1609 } 1610 1611 fetch_texel(mach->Samplers[unit], 1612 &r[0], &r[1], &r[2], lod, 1613 control, 1614 &r[0], &r[1], &r[2], &r[3]); 1615 break; 1616 1617 default: 1618 assert(0); 1619 } 1620 1621 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1622 STORE(&r[chan_index], 0, chan_index); 1623 } 1624} 1625 1626static void 1627exec_txd(struct tgsi_exec_machine *mach, 1628 const struct tgsi_full_instruction *inst) 1629{ 1630 const uint unit = inst->Src[3].Register.Index; 1631 union tgsi_exec_channel r[4]; 1632 uint chan_index; 1633 1634 /* 1635 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1636 */ 1637 1638 switch (inst->Texture.Texture) { 1639 case TGSI_TEXTURE_1D: 1640 case TGSI_TEXTURE_SHADOW1D: 1641 1642 FETCH(&r[0], 0, CHAN_X); 1643 1644 fetch_texel(mach->Samplers[unit], 1645 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ 1646 tgsi_sampler_lod_bias, 1647 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1648 break; 1649 1650 case TGSI_TEXTURE_2D: 1651 case TGSI_TEXTURE_RECT: 1652 case TGSI_TEXTURE_SHADOW2D: 1653 case TGSI_TEXTURE_SHADOWRECT: 1654 1655 FETCH(&r[0], 0, CHAN_X); 1656 FETCH(&r[1], 0, CHAN_Y); 1657 FETCH(&r[2], 0, CHAN_Z); 1658 1659 fetch_texel(mach->Samplers[unit], 1660 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ 1661 tgsi_sampler_lod_bias, 1662 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1663 break; 1664 1665 case TGSI_TEXTURE_3D: 1666 case TGSI_TEXTURE_CUBE: 1667 1668 FETCH(&r[0], 0, CHAN_X); 1669 FETCH(&r[1], 0, CHAN_Y); 1670 FETCH(&r[2], 0, CHAN_Z); 1671 1672 fetch_texel(mach->Samplers[unit], 1673 &r[0], &r[1], &r[2], &ZeroVec, 1674 tgsi_sampler_lod_bias, 1675 &r[0], &r[1], &r[2], &r[3]); 1676 break; 1677 1678 default: 1679 assert(0); 1680 } 1681 1682 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1683 STORE(&r[chan_index], 0, chan_index); 1684 } 1685} 1686 1687 1688/** 1689 * Evaluate a constant-valued coefficient at the position of the 1690 * current quad. 1691 */ 1692static void 1693eval_constant_coef( 1694 struct tgsi_exec_machine *mach, 1695 unsigned attrib, 1696 unsigned chan ) 1697{ 1698 unsigned i; 1699 1700 for( i = 0; i < QUAD_SIZE; i++ ) { 1701 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1702 } 1703} 1704 1705/** 1706 * Evaluate a linear-valued coefficient at the position of the 1707 * current quad. 1708 */ 1709static void 1710eval_linear_coef( 1711 struct tgsi_exec_machine *mach, 1712 unsigned attrib, 1713 unsigned chan ) 1714{ 1715 const float x = mach->QuadPos.xyzw[0].f[0]; 1716 const float y = mach->QuadPos.xyzw[1].f[0]; 1717 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1718 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1719 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1720 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1721 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1722 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1723 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1724} 1725 1726/** 1727 * Evaluate a perspective-valued coefficient at the position of the 1728 * current quad. 1729 */ 1730static void 1731eval_perspective_coef( 1732 struct tgsi_exec_machine *mach, 1733 unsigned attrib, 1734 unsigned chan ) 1735{ 1736 const float x = mach->QuadPos.xyzw[0].f[0]; 1737 const float y = mach->QuadPos.xyzw[1].f[0]; 1738 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1739 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1740 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1741 const float *w = mach->QuadPos.xyzw[3].f; 1742 /* divide by W here */ 1743 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1744 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1745 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1746 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1747} 1748 1749 1750typedef void (* eval_coef_func)( 1751 struct tgsi_exec_machine *mach, 1752 unsigned attrib, 1753 unsigned chan ); 1754 1755static void 1756exec_declaration(struct tgsi_exec_machine *mach, 1757 const struct tgsi_full_declaration *decl) 1758{ 1759 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 1760 if (decl->Declaration.File == TGSI_FILE_INPUT || 1761 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1762 uint first, last, mask; 1763 1764 first = decl->Range.First; 1765 last = decl->Range.Last; 1766 mask = decl->Declaration.UsageMask; 1767 1768 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 1769 uint i; 1770 1771 assert(decl->Semantic.Index == 0); 1772 assert(first == last); 1773 1774 for (i = 0; i < QUAD_SIZE; i++) { 1775 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 1776 } 1777 } else { 1778 eval_coef_func eval; 1779 uint i, j; 1780 1781 switch (decl->Declaration.Interpolate) { 1782 case TGSI_INTERPOLATE_CONSTANT: 1783 eval = eval_constant_coef; 1784 break; 1785 1786 case TGSI_INTERPOLATE_LINEAR: 1787 eval = eval_linear_coef; 1788 break; 1789 1790 case TGSI_INTERPOLATE_PERSPECTIVE: 1791 eval = eval_perspective_coef; 1792 break; 1793 1794 default: 1795 assert(0); 1796 return; 1797 } 1798 1799 for (j = 0; j < NUM_CHANNELS; j++) { 1800 if (mask & (1 << j)) { 1801 for (i = first; i <= last; i++) { 1802 eval(mach, i, j); 1803 } 1804 } 1805 } 1806 } 1807 } 1808 } 1809} 1810 1811typedef void (* micro_op)(union tgsi_exec_channel *dst, 1812 const union tgsi_exec_channel *src); 1813 1814static void 1815exec_scalar_unary(struct tgsi_exec_machine *mach, 1816 const struct tgsi_full_instruction *inst, 1817 micro_op op, 1818 enum tgsi_exec_datatype dst_datatype, 1819 enum tgsi_exec_datatype src_datatype) 1820{ 1821 unsigned int chan; 1822 union tgsi_exec_channel src; 1823 union tgsi_exec_channel dst; 1824 1825 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); 1826 op(&dst, &src); 1827 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1828 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1829 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 1830 } 1831 } 1832} 1833 1834static void 1835exec_vector_unary(struct tgsi_exec_machine *mach, 1836 const struct tgsi_full_instruction *inst, 1837 micro_op op, 1838 enum tgsi_exec_datatype dst_datatype, 1839 enum tgsi_exec_datatype src_datatype) 1840{ 1841 unsigned int chan; 1842 struct tgsi_exec_vector dst; 1843 1844 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1845 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1846 union tgsi_exec_channel src; 1847 1848 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 1849 op(&dst.xyzw[chan], &src); 1850 } 1851 } 1852 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1853 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1854 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 1855 } 1856 } 1857} 1858 1859static void 1860exec_vector_binary(struct tgsi_exec_machine *mach, 1861 const struct tgsi_full_instruction *inst, 1862 micro_op op, 1863 enum tgsi_exec_datatype dst_datatype, 1864 enum tgsi_exec_datatype src_datatype) 1865{ 1866 unsigned int chan; 1867 struct tgsi_exec_vector dst; 1868 1869 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1870 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1871 union tgsi_exec_channel src[2]; 1872 1873 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 1874 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 1875 op(&dst.xyzw[chan], src); 1876 } 1877 } 1878 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1879 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1880 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 1881 } 1882 } 1883} 1884 1885static void 1886exec_vector_trinary(struct tgsi_exec_machine *mach, 1887 const struct tgsi_full_instruction *inst, 1888 micro_op op, 1889 enum tgsi_exec_datatype dst_datatype, 1890 enum tgsi_exec_datatype src_datatype) 1891{ 1892 unsigned int chan; 1893 struct tgsi_exec_vector dst; 1894 1895 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1896 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1897 union tgsi_exec_channel src[3]; 1898 1899 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 1900 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 1901 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 1902 op(&dst.xyzw[chan], src); 1903 } 1904 } 1905 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1906 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1907 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 1908 } 1909 } 1910} 1911 1912static void 1913exec_dp3(struct tgsi_exec_machine *mach, 1914 const struct tgsi_full_instruction *inst) 1915{ 1916 unsigned int chan; 1917 union tgsi_exec_channel arg[3]; 1918 1919 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1920 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1921 micro_mul(&arg[2], &arg[0], &arg[1]); 1922 1923 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { 1924 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 1925 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 1926 micro_mad(&arg[2], arg); 1927 } 1928 1929 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1930 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1931 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1932 } 1933 } 1934} 1935 1936static void 1937exec_dp4(struct tgsi_exec_machine *mach, 1938 const struct tgsi_full_instruction *inst) 1939{ 1940 unsigned int chan; 1941 union tgsi_exec_channel arg[3]; 1942 1943 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1944 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1945 micro_mul(&arg[2], &arg[0], &arg[1]); 1946 1947 for (chan = CHAN_Y; chan <= CHAN_W; chan++) { 1948 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 1949 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 1950 micro_mad(&arg[2], arg); 1951 } 1952 1953 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1954 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1955 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1956 } 1957 } 1958} 1959 1960static void 1961exec_dp2a(struct tgsi_exec_machine *mach, 1962 const struct tgsi_full_instruction *inst) 1963{ 1964 unsigned int chan; 1965 union tgsi_exec_channel arg[3]; 1966 1967 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1968 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1969 micro_mul(&arg[2], &arg[0], &arg[1]); 1970 1971 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 1972 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 1973 micro_mad(&arg[0], arg); 1974 1975 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1976 micro_add(&arg[0], &arg[0], &arg[1]); 1977 1978 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1979 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1980 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1981 } 1982 } 1983} 1984 1985static void 1986exec_dph(struct tgsi_exec_machine *mach, 1987 const struct tgsi_full_instruction *inst) 1988{ 1989 unsigned int chan; 1990 union tgsi_exec_channel arg[3]; 1991 1992 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1993 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1994 micro_mul(&arg[2], &arg[0], &arg[1]); 1995 1996 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 1997 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 1998 micro_mad(&arg[2], arg); 1999 2000 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2001 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2002 micro_mad(&arg[0], arg); 2003 2004 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); 2005 micro_add(&arg[0], &arg[0], &arg[1]); 2006 2007 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2008 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2009 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2010 } 2011 } 2012} 2013 2014static void 2015exec_dp2(struct tgsi_exec_machine *mach, 2016 const struct tgsi_full_instruction *inst) 2017{ 2018 unsigned int chan; 2019 union tgsi_exec_channel arg[3]; 2020 2021 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2022 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2023 micro_mul(&arg[2], &arg[0], &arg[1]); 2024 2025 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2026 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2027 micro_mad(&arg[2], arg); 2028 2029 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2030 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2031 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2032 } 2033 } 2034} 2035 2036static void 2037exec_nrm4(struct tgsi_exec_machine *mach, 2038 const struct tgsi_full_instruction *inst) 2039{ 2040 unsigned int chan; 2041 union tgsi_exec_channel arg[4]; 2042 union tgsi_exec_channel scale; 2043 2044 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2045 micro_mul(&scale, &arg[0], &arg[0]); 2046 2047 for (chan = CHAN_Y; chan <= CHAN_W; chan++) { 2048 union tgsi_exec_channel product; 2049 2050 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2051 micro_mul(&product, &arg[chan], &arg[chan]); 2052 micro_add(&scale, &scale, &product); 2053 } 2054 2055 micro_rsq(&scale, &scale); 2056 2057 for (chan = CHAN_X; chan <= CHAN_W; chan++) { 2058 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2059 micro_mul(&arg[chan], &arg[chan], &scale); 2060 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2061 } 2062 } 2063} 2064 2065static void 2066exec_nrm3(struct tgsi_exec_machine *mach, 2067 const struct tgsi_full_instruction *inst) 2068{ 2069 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 2070 unsigned int chan; 2071 union tgsi_exec_channel arg[3]; 2072 union tgsi_exec_channel scale; 2073 2074 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2075 micro_mul(&scale, &arg[0], &arg[0]); 2076 2077 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { 2078 union tgsi_exec_channel product; 2079 2080 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2081 micro_mul(&product, &arg[chan], &arg[chan]); 2082 micro_add(&scale, &scale, &product); 2083 } 2084 2085 micro_rsq(&scale, &scale); 2086 2087 for (chan = CHAN_X; chan <= CHAN_Z; chan++) { 2088 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2089 micro_mul(&arg[chan], &arg[chan], &scale); 2090 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2091 } 2092 } 2093 } 2094 2095 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2096 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2097 } 2098} 2099 2100static void 2101exec_break(struct tgsi_exec_machine *mach) 2102{ 2103 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 2104 /* turn off loop channels for each enabled exec channel */ 2105 mach->LoopMask &= ~mach->ExecMask; 2106 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2107 UPDATE_EXEC_MASK(mach); 2108 } else { 2109 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 2110 2111 mach->Switch.mask = 0x0; 2112 2113 UPDATE_EXEC_MASK(mach); 2114 } 2115} 2116 2117static void 2118exec_switch(struct tgsi_exec_machine *mach, 2119 const struct tgsi_full_instruction *inst) 2120{ 2121 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 2122 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 2123 2124 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 2125 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); 2126 mach->Switch.mask = 0x0; 2127 mach->Switch.defaultMask = 0x0; 2128 2129 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 2130 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 2131 2132 UPDATE_EXEC_MASK(mach); 2133} 2134 2135static void 2136exec_case(struct tgsi_exec_machine *mach, 2137 const struct tgsi_full_instruction *inst) 2138{ 2139 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 2140 union tgsi_exec_channel src; 2141 uint mask = 0; 2142 2143 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); 2144 2145 if (mach->Switch.selector.u[0] == src.u[0]) { 2146 mask |= 0x1; 2147 } 2148 if (mach->Switch.selector.u[1] == src.u[1]) { 2149 mask |= 0x2; 2150 } 2151 if (mach->Switch.selector.u[2] == src.u[2]) { 2152 mask |= 0x4; 2153 } 2154 if (mach->Switch.selector.u[3] == src.u[3]) { 2155 mask |= 0x8; 2156 } 2157 2158 mach->Switch.defaultMask |= mask; 2159 2160 mach->Switch.mask |= mask & prevMask; 2161 2162 UPDATE_EXEC_MASK(mach); 2163} 2164 2165static void 2166exec_default(struct tgsi_exec_machine *mach) 2167{ 2168 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 2169 2170 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 2171 2172 UPDATE_EXEC_MASK(mach); 2173} 2174 2175static void 2176exec_endswitch(struct tgsi_exec_machine *mach) 2177{ 2178 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 2179 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 2180 2181 UPDATE_EXEC_MASK(mach); 2182} 2183 2184static void 2185micro_i2f(union tgsi_exec_channel *dst, 2186 const union tgsi_exec_channel *src) 2187{ 2188 dst->f[0] = (float)src->i[0]; 2189 dst->f[1] = (float)src->i[1]; 2190 dst->f[2] = (float)src->i[2]; 2191 dst->f[3] = (float)src->i[3]; 2192} 2193 2194static void 2195micro_not(union tgsi_exec_channel *dst, 2196 const union tgsi_exec_channel *src) 2197{ 2198 dst->u[0] = ~src->u[0]; 2199 dst->u[1] = ~src->u[1]; 2200 dst->u[2] = ~src->u[2]; 2201 dst->u[3] = ~src->u[3]; 2202} 2203 2204static void 2205micro_shl(union tgsi_exec_channel *dst, 2206 const union tgsi_exec_channel *src) 2207{ 2208 dst->u[0] = src[0].u[0] << src[1].u[0]; 2209 dst->u[1] = src[0].u[1] << src[1].u[1]; 2210 dst->u[2] = src[0].u[2] << src[1].u[2]; 2211 dst->u[3] = src[0].u[3] << src[1].u[3]; 2212} 2213 2214static void 2215micro_and(union tgsi_exec_channel *dst, 2216 const union tgsi_exec_channel *src) 2217{ 2218 dst->u[0] = src[0].u[0] & src[1].u[0]; 2219 dst->u[1] = src[0].u[1] & src[1].u[1]; 2220 dst->u[2] = src[0].u[2] & src[1].u[2]; 2221 dst->u[3] = src[0].u[3] & src[1].u[3]; 2222} 2223 2224static void 2225micro_or(union tgsi_exec_channel *dst, 2226 const union tgsi_exec_channel *src) 2227{ 2228 dst->u[0] = src[0].u[0] | src[1].u[0]; 2229 dst->u[1] = src[0].u[1] | src[1].u[1]; 2230 dst->u[2] = src[0].u[2] | src[1].u[2]; 2231 dst->u[3] = src[0].u[3] | src[1].u[3]; 2232} 2233 2234static void 2235micro_xor(union tgsi_exec_channel *dst, 2236 const union tgsi_exec_channel *src) 2237{ 2238 dst->u[0] = src[0].u[0] ^ src[1].u[0]; 2239 dst->u[1] = src[0].u[1] ^ src[1].u[1]; 2240 dst->u[2] = src[0].u[2] ^ src[1].u[2]; 2241 dst->u[3] = src[0].u[3] ^ src[1].u[3]; 2242} 2243 2244static void 2245micro_f2i(union tgsi_exec_channel *dst, 2246 const union tgsi_exec_channel *src) 2247{ 2248 dst->i[0] = (int)src->f[0]; 2249 dst->i[1] = (int)src->f[1]; 2250 dst->i[2] = (int)src->f[2]; 2251 dst->i[3] = (int)src->f[3]; 2252} 2253 2254static void 2255micro_idiv(union tgsi_exec_channel *dst, 2256 const union tgsi_exec_channel *src) 2257{ 2258 dst->i[0] = src[0].i[0] / src[1].i[0]; 2259 dst->i[1] = src[0].i[1] / src[1].i[1]; 2260 dst->i[2] = src[0].i[2] / src[1].i[2]; 2261 dst->i[3] = src[0].i[3] / src[1].i[3]; 2262} 2263 2264static void 2265micro_imax(union tgsi_exec_channel *dst, 2266 const union tgsi_exec_channel *src) 2267{ 2268 dst->i[0] = src[0].i[0] > src[1].i[0] ? src[0].i[0] : src[1].i[0]; 2269 dst->i[1] = src[0].i[1] > src[1].i[1] ? src[0].i[1] : src[1].i[1]; 2270 dst->i[2] = src[0].i[2] > src[1].i[2] ? src[0].i[2] : src[1].i[2]; 2271 dst->i[3] = src[0].i[3] > src[1].i[3] ? src[0].i[3] : src[1].i[3]; 2272} 2273 2274static void 2275micro_imin(union tgsi_exec_channel *dst, 2276 const union tgsi_exec_channel *src) 2277{ 2278 dst->i[0] = src[0].i[0] < src[1].i[0] ? src[0].i[0] : src[1].i[0]; 2279 dst->i[1] = src[0].i[1] < src[1].i[1] ? src[0].i[1] : src[1].i[1]; 2280 dst->i[2] = src[0].i[2] < src[1].i[2] ? src[0].i[2] : src[1].i[2]; 2281 dst->i[3] = src[0].i[3] < src[1].i[3] ? src[0].i[3] : src[1].i[3]; 2282} 2283 2284static void 2285micro_isge(union tgsi_exec_channel *dst, 2286 const union tgsi_exec_channel *src) 2287{ 2288 dst->i[0] = src[0].i[0] >= src[1].i[0] ? -1 : 0; 2289 dst->i[1] = src[0].i[1] >= src[1].i[1] ? -1 : 0; 2290 dst->i[2] = src[0].i[2] >= src[1].i[2] ? -1 : 0; 2291 dst->i[3] = src[0].i[3] >= src[1].i[3] ? -1 : 0; 2292} 2293 2294static void 2295micro_ishr(union tgsi_exec_channel *dst, 2296 const union tgsi_exec_channel *src) 2297{ 2298 dst->i[0] = src[0].i[0] >> src[1].i[0]; 2299 dst->i[1] = src[0].i[1] >> src[1].i[1]; 2300 dst->i[2] = src[0].i[2] >> src[1].i[2]; 2301 dst->i[3] = src[0].i[3] >> src[1].i[3]; 2302} 2303 2304static void 2305micro_islt(union tgsi_exec_channel *dst, 2306 const union tgsi_exec_channel *src) 2307{ 2308 dst->i[0] = src[0].i[0] < src[1].i[0] ? -1 : 0; 2309 dst->i[1] = src[0].i[1] < src[1].i[1] ? -1 : 0; 2310 dst->i[2] = src[0].i[2] < src[1].i[2] ? -1 : 0; 2311 dst->i[3] = src[0].i[3] < src[1].i[3] ? -1 : 0; 2312} 2313 2314static void 2315micro_f2u(union tgsi_exec_channel *dst, 2316 const union tgsi_exec_channel *src) 2317{ 2318 dst->u[0] = (uint)src->f[0]; 2319 dst->u[1] = (uint)src->f[1]; 2320 dst->u[2] = (uint)src->f[2]; 2321 dst->u[3] = (uint)src->f[3]; 2322} 2323 2324static void 2325micro_u2f(union tgsi_exec_channel *dst, 2326 const union tgsi_exec_channel *src) 2327{ 2328 dst->f[0] = (float)src->u[0]; 2329 dst->f[1] = (float)src->u[1]; 2330 dst->f[2] = (float)src->u[2]; 2331 dst->f[3] = (float)src->u[3]; 2332} 2333 2334static void 2335micro_uadd(union tgsi_exec_channel *dst, 2336 const union tgsi_exec_channel *src) 2337{ 2338 dst->u[0] = src[0].u[0] + src[1].u[0]; 2339 dst->u[1] = src[0].u[1] + src[1].u[1]; 2340 dst->u[2] = src[0].u[2] + src[1].u[2]; 2341 dst->u[3] = src[0].u[3] + src[1].u[3]; 2342} 2343 2344static void 2345micro_udiv(union tgsi_exec_channel *dst, 2346 const union tgsi_exec_channel *src) 2347{ 2348 dst->u[0] = src[0].u[0] / src[1].u[0]; 2349 dst->u[1] = src[0].u[1] / src[1].u[1]; 2350 dst->u[2] = src[0].u[2] / src[1].u[2]; 2351 dst->u[3] = src[0].u[3] / src[1].u[3]; 2352} 2353 2354static void 2355micro_umad(union tgsi_exec_channel *dst, 2356 const union tgsi_exec_channel *src) 2357{ 2358 dst->u[0] = src[0].u[0] * src[1].u[0] + src[2].u[0]; 2359 dst->u[1] = src[0].u[1] * src[1].u[1] + src[2].u[1]; 2360 dst->u[2] = src[0].u[2] * src[1].u[2] + src[2].u[2]; 2361 dst->u[3] = src[0].u[3] * src[1].u[3] + src[2].u[3]; 2362} 2363 2364static void 2365micro_umax(union tgsi_exec_channel *dst, 2366 const union tgsi_exec_channel *src) 2367{ 2368 dst->u[0] = src[0].u[0] > src[1].u[0] ? src[0].u[0] : src[1].u[0]; 2369 dst->u[1] = src[0].u[1] > src[1].u[1] ? src[0].u[1] : src[1].u[1]; 2370 dst->u[2] = src[0].u[2] > src[1].u[2] ? src[0].u[2] : src[1].u[2]; 2371 dst->u[3] = src[0].u[3] > src[1].u[3] ? src[0].u[3] : src[1].u[3]; 2372} 2373 2374static void 2375micro_umin(union tgsi_exec_channel *dst, 2376 const union tgsi_exec_channel *src) 2377{ 2378 dst->u[0] = src[0].u[0] < src[1].u[0] ? src[0].u[0] : src[1].u[0]; 2379 dst->u[1] = src[0].u[1] < src[1].u[1] ? src[0].u[1] : src[1].u[1]; 2380 dst->u[2] = src[0].u[2] < src[1].u[2] ? src[0].u[2] : src[1].u[2]; 2381 dst->u[3] = src[0].u[3] < src[1].u[3] ? src[0].u[3] : src[1].u[3]; 2382} 2383 2384static void 2385micro_umod(union tgsi_exec_channel *dst, 2386 const union tgsi_exec_channel *src) 2387{ 2388 dst->u[0] = src[0].u[0] % src[1].u[0]; 2389 dst->u[1] = src[0].u[1] % src[1].u[1]; 2390 dst->u[2] = src[0].u[2] % src[1].u[2]; 2391 dst->u[3] = src[0].u[3] % src[1].u[3]; 2392} 2393 2394static void 2395micro_umul(union tgsi_exec_channel *dst, 2396 const union tgsi_exec_channel *src) 2397{ 2398 dst->u[0] = src[0].u[0] * src[1].u[0]; 2399 dst->u[1] = src[0].u[1] * src[1].u[1]; 2400 dst->u[2] = src[0].u[2] * src[1].u[2]; 2401 dst->u[3] = src[0].u[3] * src[1].u[3]; 2402} 2403 2404static void 2405micro_useq(union tgsi_exec_channel *dst, 2406 const union tgsi_exec_channel *src) 2407{ 2408 dst->u[0] = src[0].u[0] == src[1].u[0] ? ~0 : 0; 2409 dst->u[1] = src[0].u[1] == src[1].u[1] ? ~0 : 0; 2410 dst->u[2] = src[0].u[2] == src[1].u[2] ? ~0 : 0; 2411 dst->u[3] = src[0].u[3] == src[1].u[3] ? ~0 : 0; 2412} 2413 2414static void 2415micro_usge(union tgsi_exec_channel *dst, 2416 const union tgsi_exec_channel *src) 2417{ 2418 dst->u[0] = src[0].u[0] >= src[1].u[0] ? ~0 : 0; 2419 dst->u[1] = src[0].u[1] >= src[1].u[1] ? ~0 : 0; 2420 dst->u[2] = src[0].u[2] >= src[1].u[2] ? ~0 : 0; 2421 dst->u[3] = src[0].u[3] >= src[1].u[3] ? ~0 : 0; 2422} 2423 2424static void 2425micro_ushr(union tgsi_exec_channel *dst, 2426 const union tgsi_exec_channel *src) 2427{ 2428 dst->u[0] = src[0].u[0] >> src[1].u[0]; 2429 dst->u[1] = src[0].u[1] >> src[1].u[1]; 2430 dst->u[2] = src[0].u[2] >> src[1].u[2]; 2431 dst->u[3] = src[0].u[3] >> src[1].u[3]; 2432} 2433 2434static void 2435micro_uslt(union tgsi_exec_channel *dst, 2436 const union tgsi_exec_channel *src) 2437{ 2438 dst->u[0] = src[0].u[0] < src[1].u[0] ? ~0 : 0; 2439 dst->u[1] = src[0].u[1] < src[1].u[1] ? ~0 : 0; 2440 dst->u[2] = src[0].u[2] < src[1].u[2] ? ~0 : 0; 2441 dst->u[3] = src[0].u[3] < src[1].u[3] ? ~0 : 0; 2442} 2443 2444static void 2445micro_usne(union tgsi_exec_channel *dst, 2446 const union tgsi_exec_channel *src) 2447{ 2448 dst->u[0] = src[0].u[0] != src[1].u[0] ? ~0 : 0; 2449 dst->u[1] = src[0].u[1] != src[1].u[1] ? ~0 : 0; 2450 dst->u[2] = src[0].u[2] != src[1].u[2] ? ~0 : 0; 2451 dst->u[3] = src[0].u[3] != src[1].u[3] ? ~0 : 0; 2452} 2453 2454static void 2455exec_instruction( 2456 struct tgsi_exec_machine *mach, 2457 const struct tgsi_full_instruction *inst, 2458 int *pc ) 2459{ 2460 uint chan_index; 2461 union tgsi_exec_channel r[10]; 2462 union tgsi_exec_channel d[8]; 2463 2464 (*pc)++; 2465 2466 switch (inst->Instruction.Opcode) { 2467 case TGSI_OPCODE_ARL: 2468 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 2469 break; 2470 2471 case TGSI_OPCODE_MOV: 2472 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 2473 break; 2474 2475 case TGSI_OPCODE_LIT: 2476 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2477 FETCH( &r[0], 0, CHAN_X ); 2478 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2479 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2480 } 2481 2482 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2483 FETCH( &r[1], 0, CHAN_Y ); 2484 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2485 2486 FETCH( &r[2], 0, CHAN_W ); 2487 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2488 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2489 micro_pow( &r[1], &r[1], &r[2] ); 2490 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2491 } 2492 2493 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2494 STORE(&d[CHAN_Y], 0, CHAN_Y); 2495 } 2496 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2497 STORE(&d[CHAN_Z], 0, CHAN_Z); 2498 } 2499 } 2500 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2501 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2502 } 2503 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2504 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2505 } 2506 break; 2507 2508 case TGSI_OPCODE_RCP: 2509 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2510 break; 2511 2512 case TGSI_OPCODE_RSQ: 2513 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2514 break; 2515 2516 case TGSI_OPCODE_EXP: 2517 FETCH( &r[0], 0, CHAN_X ); 2518 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2519 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2520 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2521 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2522 } 2523 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2524 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2525 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2526 } 2527 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2528 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2529 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2530 } 2531 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2532 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2533 } 2534 break; 2535 2536 case TGSI_OPCODE_LOG: 2537 FETCH( &r[0], 0, CHAN_X ); 2538 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2539 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2540 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2541 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2542 STORE( &r[0], 0, CHAN_X ); 2543 } 2544 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2545 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2546 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2547 STORE( &r[0], 0, CHAN_Y ); 2548 } 2549 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2550 STORE( &r[1], 0, CHAN_Z ); 2551 } 2552 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2553 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2554 } 2555 break; 2556 2557 case TGSI_OPCODE_MUL: 2558 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2559 FETCH(&r[0], 0, chan_index); 2560 FETCH(&r[1], 1, chan_index); 2561 micro_mul(&d[chan_index], &r[0], &r[1]); 2562 } 2563 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2564 STORE(&d[chan_index], 0, chan_index); 2565 } 2566 break; 2567 2568 case TGSI_OPCODE_ADD: 2569 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2570 FETCH( &r[0], 0, chan_index ); 2571 FETCH( &r[1], 1, chan_index ); 2572 micro_add(&d[chan_index], &r[0], &r[1]); 2573 } 2574 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2575 STORE(&d[chan_index], 0, chan_index); 2576 } 2577 break; 2578 2579 case TGSI_OPCODE_DP3: 2580 exec_dp3(mach, inst); 2581 break; 2582 2583 case TGSI_OPCODE_DP4: 2584 exec_dp4(mach, inst); 2585 break; 2586 2587 case TGSI_OPCODE_DST: 2588 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2589 FETCH( &r[0], 0, CHAN_Y ); 2590 FETCH( &r[1], 1, CHAN_Y); 2591 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2592 } 2593 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2594 FETCH(&d[CHAN_Z], 0, CHAN_Z); 2595 } 2596 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2597 FETCH(&d[CHAN_W], 1, CHAN_W); 2598 } 2599 2600 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2601 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); 2602 } 2603 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2604 STORE(&d[CHAN_Y], 0, CHAN_Y); 2605 } 2606 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2607 STORE(&d[CHAN_Z], 0, CHAN_Z); 2608 } 2609 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2610 STORE(&d[CHAN_W], 0, CHAN_W); 2611 } 2612 break; 2613 2614 case TGSI_OPCODE_MIN: 2615 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2616 FETCH(&r[0], 0, chan_index); 2617 FETCH(&r[1], 1, chan_index); 2618 2619 /* XXX use micro_min()?? */ 2620 micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]); 2621 } 2622 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2623 STORE(&d[chan_index], 0, chan_index); 2624 } 2625 break; 2626 2627 case TGSI_OPCODE_MAX: 2628 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2629 FETCH(&r[0], 0, chan_index); 2630 FETCH(&r[1], 1, chan_index); 2631 2632 /* XXX use micro_max()?? */ 2633 micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] ); 2634 } 2635 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2636 STORE(&d[chan_index], 0, chan_index); 2637 } 2638 break; 2639 2640 case TGSI_OPCODE_SLT: 2641 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2642 break; 2643 2644 case TGSI_OPCODE_SGE: 2645 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2646 break; 2647 2648 case TGSI_OPCODE_MAD: 2649 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2650 break; 2651 2652 case TGSI_OPCODE_SUB: 2653 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2654 FETCH(&r[0], 0, chan_index); 2655 FETCH(&r[1], 1, chan_index); 2656 micro_sub(&d[chan_index], &r[0], &r[1]); 2657 } 2658 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2659 STORE(&d[chan_index], 0, chan_index); 2660 } 2661 break; 2662 2663 case TGSI_OPCODE_LRP: 2664 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2665 break; 2666 2667 case TGSI_OPCODE_CND: 2668 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2669 FETCH(&r[0], 0, chan_index); 2670 FETCH(&r[1], 1, chan_index); 2671 FETCH(&r[2], 2, chan_index); 2672 micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2673 } 2674 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2675 STORE(&d[chan_index], 0, chan_index); 2676 } 2677 break; 2678 2679 case TGSI_OPCODE_DP2A: 2680 exec_dp2a(mach, inst); 2681 break; 2682 2683 case TGSI_OPCODE_FRC: 2684 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2685 break; 2686 2687 case TGSI_OPCODE_CLAMP: 2688 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2689 FETCH(&r[0], 0, chan_index); 2690 FETCH(&r[1], 1, chan_index); 2691 micro_max(&r[0], &r[0], &r[1]); 2692 FETCH(&r[1], 2, chan_index); 2693 micro_min(&d[chan_index], &r[0], &r[1]); 2694 } 2695 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2696 STORE(&d[chan_index], 0, chan_index); 2697 } 2698 break; 2699 2700 case TGSI_OPCODE_FLR: 2701 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2702 break; 2703 2704 case TGSI_OPCODE_ROUND: 2705 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2706 break; 2707 2708 case TGSI_OPCODE_EX2: 2709 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2710 break; 2711 2712 case TGSI_OPCODE_LG2: 2713 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2714 break; 2715 2716 case TGSI_OPCODE_POW: 2717 FETCH(&r[0], 0, CHAN_X); 2718 FETCH(&r[1], 1, CHAN_X); 2719 2720 micro_pow( &r[0], &r[0], &r[1] ); 2721 2722 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2723 STORE( &r[0], 0, chan_index ); 2724 } 2725 break; 2726 2727 case TGSI_OPCODE_XPD: 2728 FETCH(&r[0], 0, CHAN_Y); 2729 FETCH(&r[1], 1, CHAN_Z); 2730 2731 micro_mul( &r[2], &r[0], &r[1] ); 2732 2733 FETCH(&r[3], 0, CHAN_Z); 2734 FETCH(&r[4], 1, CHAN_Y); 2735 2736 micro_mul( &r[5], &r[3], &r[4] ); 2737 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2738 2739 FETCH(&r[2], 1, CHAN_X); 2740 2741 micro_mul( &r[3], &r[3], &r[2] ); 2742 2743 FETCH(&r[5], 0, CHAN_X); 2744 2745 micro_mul( &r[1], &r[1], &r[5] ); 2746 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2747 2748 micro_mul( &r[5], &r[5], &r[4] ); 2749 micro_mul( &r[0], &r[0], &r[2] ); 2750 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2751 2752 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2753 STORE(&d[CHAN_X], 0, CHAN_X); 2754 } 2755 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2756 STORE(&d[CHAN_Y], 0, CHAN_Y); 2757 } 2758 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2759 STORE(&d[CHAN_Z], 0, CHAN_Z); 2760 } 2761 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2762 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2763 } 2764 break; 2765 2766 case TGSI_OPCODE_ABS: 2767 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2768 break; 2769 2770 case TGSI_OPCODE_RCC: 2771 FETCH(&r[0], 0, CHAN_X); 2772 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2773 micro_float_clamp(&r[0], &r[0]); 2774 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2775 STORE(&r[0], 0, chan_index); 2776 } 2777 break; 2778 2779 case TGSI_OPCODE_DPH: 2780 exec_dph(mach, inst); 2781 break; 2782 2783 case TGSI_OPCODE_COS: 2784 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2785 break; 2786 2787 case TGSI_OPCODE_DDX: 2788 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2789 break; 2790 2791 case TGSI_OPCODE_DDY: 2792 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2793 break; 2794 2795 case TGSI_OPCODE_KILP: 2796 exec_kilp (mach, inst); 2797 break; 2798 2799 case TGSI_OPCODE_KIL: 2800 exec_kil (mach, inst); 2801 break; 2802 2803 case TGSI_OPCODE_PK2H: 2804 assert (0); 2805 break; 2806 2807 case TGSI_OPCODE_PK2US: 2808 assert (0); 2809 break; 2810 2811 case TGSI_OPCODE_PK4B: 2812 assert (0); 2813 break; 2814 2815 case TGSI_OPCODE_PK4UB: 2816 assert (0); 2817 break; 2818 2819 case TGSI_OPCODE_RFL: 2820 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2821 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2822 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2823 /* r0 = dp3(src0, src0) */ 2824 FETCH(&r[2], 0, CHAN_X); 2825 micro_mul(&r[0], &r[2], &r[2]); 2826 FETCH(&r[4], 0, CHAN_Y); 2827 micro_mul(&r[8], &r[4], &r[4]); 2828 micro_add(&r[0], &r[0], &r[8]); 2829 FETCH(&r[6], 0, CHAN_Z); 2830 micro_mul(&r[8], &r[6], &r[6]); 2831 micro_add(&r[0], &r[0], &r[8]); 2832 2833 /* r1 = dp3(src0, src1) */ 2834 FETCH(&r[3], 1, CHAN_X); 2835 micro_mul(&r[1], &r[2], &r[3]); 2836 FETCH(&r[5], 1, CHAN_Y); 2837 micro_mul(&r[8], &r[4], &r[5]); 2838 micro_add(&r[1], &r[1], &r[8]); 2839 FETCH(&r[7], 1, CHAN_Z); 2840 micro_mul(&r[8], &r[6], &r[7]); 2841 micro_add(&r[1], &r[1], &r[8]); 2842 2843 /* r1 = 2 * r1 / r0 */ 2844 micro_add(&r[1], &r[1], &r[1]); 2845 micro_div(&r[1], &r[1], &r[0]); 2846 2847 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2848 micro_mul(&r[2], &r[2], &r[1]); 2849 micro_sub(&r[2], &r[2], &r[3]); 2850 STORE(&r[2], 0, CHAN_X); 2851 } 2852 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2853 micro_mul(&r[4], &r[4], &r[1]); 2854 micro_sub(&r[4], &r[4], &r[5]); 2855 STORE(&r[4], 0, CHAN_Y); 2856 } 2857 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2858 micro_mul(&r[6], &r[6], &r[1]); 2859 micro_sub(&r[6], &r[6], &r[7]); 2860 STORE(&r[6], 0, CHAN_Z); 2861 } 2862 } 2863 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2864 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2865 } 2866 break; 2867 2868 case TGSI_OPCODE_SEQ: 2869 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2870 break; 2871 2872 case TGSI_OPCODE_SFL: 2873 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2874 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2875 } 2876 break; 2877 2878 case TGSI_OPCODE_SGT: 2879 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2880 break; 2881 2882 case TGSI_OPCODE_SIN: 2883 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2884 break; 2885 2886 case TGSI_OPCODE_SLE: 2887 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2888 break; 2889 2890 case TGSI_OPCODE_SNE: 2891 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2892 break; 2893 2894 case TGSI_OPCODE_STR: 2895 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2896 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2897 } 2898 break; 2899 2900 case TGSI_OPCODE_TEX: 2901 /* simple texture lookup */ 2902 /* src[0] = texcoord */ 2903 /* src[1] = sampler unit */ 2904 exec_tex(mach, inst, TEX_MODIFIER_NONE); 2905 break; 2906 2907 case TGSI_OPCODE_TXB: 2908 /* Texture lookup with lod bias */ 2909 /* src[0] = texcoord (src[0].w = LOD bias) */ 2910 /* src[1] = sampler unit */ 2911 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); 2912 break; 2913 2914 case TGSI_OPCODE_TXD: 2915 /* Texture lookup with explict partial derivatives */ 2916 /* src[0] = texcoord */ 2917 /* src[1] = d[strq]/dx */ 2918 /* src[2] = d[strq]/dy */ 2919 /* src[3] = sampler unit */ 2920 exec_txd(mach, inst); 2921 break; 2922 2923 case TGSI_OPCODE_TXL: 2924 /* Texture lookup with explit LOD */ 2925 /* src[0] = texcoord (src[0].w = LOD) */ 2926 /* src[1] = sampler unit */ 2927 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); 2928 break; 2929 2930 case TGSI_OPCODE_TXP: 2931 /* Texture lookup with projection */ 2932 /* src[0] = texcoord (src[0].w = projection) */ 2933 /* src[1] = sampler unit */ 2934 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); 2935 break; 2936 2937 case TGSI_OPCODE_UP2H: 2938 assert (0); 2939 break; 2940 2941 case TGSI_OPCODE_UP2US: 2942 assert (0); 2943 break; 2944 2945 case TGSI_OPCODE_UP4B: 2946 assert (0); 2947 break; 2948 2949 case TGSI_OPCODE_UP4UB: 2950 assert (0); 2951 break; 2952 2953 case TGSI_OPCODE_X2D: 2954 FETCH(&r[0], 1, CHAN_X); 2955 FETCH(&r[1], 1, CHAN_Y); 2956 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2957 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2958 FETCH(&r[2], 2, CHAN_X); 2959 micro_mul(&r[2], &r[2], &r[0]); 2960 FETCH(&r[3], 2, CHAN_Y); 2961 micro_mul(&r[3], &r[3], &r[1]); 2962 micro_add(&r[2], &r[2], &r[3]); 2963 FETCH(&r[3], 0, CHAN_X); 2964 micro_add(&d[CHAN_X], &r[2], &r[3]); 2965 2966 } 2967 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2968 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2969 FETCH(&r[2], 2, CHAN_Z); 2970 micro_mul(&r[2], &r[2], &r[0]); 2971 FETCH(&r[3], 2, CHAN_W); 2972 micro_mul(&r[3], &r[3], &r[1]); 2973 micro_add(&r[2], &r[2], &r[3]); 2974 FETCH(&r[3], 0, CHAN_Y); 2975 micro_add(&d[CHAN_Y], &r[2], &r[3]); 2976 2977 } 2978 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2979 STORE(&d[CHAN_X], 0, CHAN_X); 2980 } 2981 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2982 STORE(&d[CHAN_Y], 0, CHAN_Y); 2983 } 2984 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2985 STORE(&d[CHAN_X], 0, CHAN_Z); 2986 } 2987 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2988 STORE(&d[CHAN_Y], 0, CHAN_W); 2989 } 2990 break; 2991 2992 case TGSI_OPCODE_ARA: 2993 assert (0); 2994 break; 2995 2996 case TGSI_OPCODE_ARR: 2997 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 2998 break; 2999 3000 case TGSI_OPCODE_BRA: 3001 assert (0); 3002 break; 3003 3004 case TGSI_OPCODE_CAL: 3005 /* skip the call if no execution channels are enabled */ 3006 if (mach->ExecMask) { 3007 /* do the call */ 3008 3009 /* First, record the depths of the execution stacks. 3010 * This is important for deeply nested/looped return statements. 3011 * We have to unwind the stacks by the correct amount. For a 3012 * real code generator, we could determine the number of entries 3013 * to pop off each stack with simple static analysis and avoid 3014 * implementing this data structure at run time. 3015 */ 3016 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 3017 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 3018 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 3019 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 3020 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 3021 /* note that PC was already incremented above */ 3022 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 3023 3024 mach->CallStackTop++; 3025 3026 /* Second, push the Cond, Loop, Cont, Func stacks */ 3027 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 3028 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3029 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3030 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3031 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3032 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 3033 3034 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 3035 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3036 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3037 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3038 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3039 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 3040 3041 /* Finally, jump to the subroutine */ 3042 *pc = inst->Label.Label; 3043 } 3044 break; 3045 3046 case TGSI_OPCODE_RET: 3047 mach->FuncMask &= ~mach->ExecMask; 3048 UPDATE_EXEC_MASK(mach); 3049 3050 if (mach->FuncMask == 0x0) { 3051 /* really return now (otherwise, keep executing */ 3052 3053 if (mach->CallStackTop == 0) { 3054 /* returning from main() */ 3055 *pc = -1; 3056 return; 3057 } 3058 3059 assert(mach->CallStackTop > 0); 3060 mach->CallStackTop--; 3061 3062 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3063 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3064 3065 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3066 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3067 3068 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3069 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3070 3071 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 3072 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 3073 3074 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 3075 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 3076 3077 assert(mach->FuncStackTop > 0); 3078 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3079 3080 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3081 3082 UPDATE_EXEC_MASK(mach); 3083 } 3084 break; 3085 3086 case TGSI_OPCODE_SSG: 3087 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3088 break; 3089 3090 case TGSI_OPCODE_CMP: 3091 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 3092 FETCH(&r[0], 0, chan_index); 3093 FETCH(&r[1], 1, chan_index); 3094 FETCH(&r[2], 2, chan_index); 3095 micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]); 3096 } 3097 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3098 STORE(&d[chan_index], 0, chan_index); 3099 } 3100 break; 3101 3102 case TGSI_OPCODE_SCS: 3103 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 3104 FETCH( &r[0], 0, CHAN_X ); 3105 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 3106 micro_cos(&r[1], &r[0]); 3107 STORE(&r[1], 0, CHAN_X); 3108 } 3109 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 3110 micro_sin(&r[1], &r[0]); 3111 STORE(&r[1], 0, CHAN_Y); 3112 } 3113 } 3114 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 3115 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 3116 } 3117 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 3118 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 3119 } 3120 break; 3121 3122 case TGSI_OPCODE_NRM: 3123 exec_nrm3(mach, inst); 3124 break; 3125 3126 case TGSI_OPCODE_NRM4: 3127 exec_nrm4(mach, inst); 3128 break; 3129 3130 case TGSI_OPCODE_DIV: 3131 assert( 0 ); 3132 break; 3133 3134 case TGSI_OPCODE_DP2: 3135 exec_dp2(mach, inst); 3136 break; 3137 3138 case TGSI_OPCODE_IF: 3139 /* push CondMask */ 3140 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 3141 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 3142 FETCH( &r[0], 0, CHAN_X ); 3143 /* update CondMask */ 3144 if( ! r[0].u[0] ) { 3145 mach->CondMask &= ~0x1; 3146 } 3147 if( ! r[0].u[1] ) { 3148 mach->CondMask &= ~0x2; 3149 } 3150 if( ! r[0].u[2] ) { 3151 mach->CondMask &= ~0x4; 3152 } 3153 if( ! r[0].u[3] ) { 3154 mach->CondMask &= ~0x8; 3155 } 3156 UPDATE_EXEC_MASK(mach); 3157 /* Todo: If CondMask==0, jump to ELSE */ 3158 break; 3159 3160 case TGSI_OPCODE_ELSE: 3161 /* invert CondMask wrt previous mask */ 3162 { 3163 uint prevMask; 3164 assert(mach->CondStackTop > 0); 3165 prevMask = mach->CondStack[mach->CondStackTop - 1]; 3166 mach->CondMask = ~mach->CondMask & prevMask; 3167 UPDATE_EXEC_MASK(mach); 3168 /* Todo: If CondMask==0, jump to ENDIF */ 3169 } 3170 break; 3171 3172 case TGSI_OPCODE_ENDIF: 3173 /* pop CondMask */ 3174 assert(mach->CondStackTop > 0); 3175 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 3176 UPDATE_EXEC_MASK(mach); 3177 break; 3178 3179 case TGSI_OPCODE_END: 3180 /* halt execution */ 3181 *pc = -1; 3182 break; 3183 3184 case TGSI_OPCODE_REP: 3185 assert (0); 3186 break; 3187 3188 case TGSI_OPCODE_ENDREP: 3189 assert (0); 3190 break; 3191 3192 case TGSI_OPCODE_PUSHA: 3193 assert (0); 3194 break; 3195 3196 case TGSI_OPCODE_POPA: 3197 assert (0); 3198 break; 3199 3200 case TGSI_OPCODE_CEIL: 3201 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3202 break; 3203 3204 case TGSI_OPCODE_I2F: 3205 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 3206 break; 3207 3208 case TGSI_OPCODE_NOT: 3209 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3210 break; 3211 3212 case TGSI_OPCODE_TRUNC: 3213 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3214 break; 3215 3216 case TGSI_OPCODE_SHL: 3217 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3218 break; 3219 3220 case TGSI_OPCODE_AND: 3221 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3222 break; 3223 3224 case TGSI_OPCODE_OR: 3225 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3226 break; 3227 3228 case TGSI_OPCODE_MOD: 3229 assert (0); 3230 break; 3231 3232 case TGSI_OPCODE_XOR: 3233 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3234 break; 3235 3236 case TGSI_OPCODE_SAD: 3237 assert (0); 3238 break; 3239 3240 case TGSI_OPCODE_TXF: 3241 assert (0); 3242 break; 3243 3244 case TGSI_OPCODE_TXQ: 3245 assert (0); 3246 break; 3247 3248 case TGSI_OPCODE_EMIT: 3249 emit_vertex(mach); 3250 break; 3251 3252 case TGSI_OPCODE_ENDPRIM: 3253 emit_primitive(mach); 3254 break; 3255 3256 case TGSI_OPCODE_BGNFOR: 3257 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3258 for (chan_index = 0; chan_index < 3; chan_index++) { 3259 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3260 } 3261 ++mach->LoopCounterStackTop; 3262 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); 3263 /* update LoopMask */ 3264 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3265 mach->LoopMask &= ~0x1; 3266 } 3267 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3268 mach->LoopMask &= ~0x2; 3269 } 3270 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3271 mach->LoopMask &= ~0x4; 3272 } 3273 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3274 mach->LoopMask &= ~0x8; 3275 } 3276 /* TODO: if mach->LoopMask == 0, jump to end of loop */ 3277 UPDATE_EXEC_MASK(mach); 3278 /* fall-through (for now) */ 3279 case TGSI_OPCODE_BGNLOOP: 3280 /* push LoopMask and ContMasks */ 3281 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3282 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3283 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3284 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3285 3286 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3287 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3288 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3289 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3290 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 3291 break; 3292 3293 case TGSI_OPCODE_ENDFOR: 3294 assert(mach->LoopCounterStackTop > 0); 3295 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3296 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3297 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 3298 /* update LoopMask */ 3299 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3300 mach->LoopMask &= ~0x1; 3301 } 3302 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3303 mach->LoopMask &= ~0x2; 3304 } 3305 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3306 mach->LoopMask &= ~0x4; 3307 } 3308 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3309 mach->LoopMask &= ~0x8; 3310 } 3311 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3312 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3313 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3314 assert(mach->LoopLabelStackTop > 0); 3315 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3316 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); 3317 /* Restore ContMask, but don't pop */ 3318 assert(mach->ContStackTop > 0); 3319 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3320 UPDATE_EXEC_MASK(mach); 3321 if (mach->ExecMask) { 3322 /* repeat loop: jump to instruction just past BGNLOOP */ 3323 assert(mach->LoopLabelStackTop > 0); 3324 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3325 } 3326 else { 3327 /* exit loop: pop LoopMask */ 3328 assert(mach->LoopStackTop > 0); 3329 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3330 /* pop ContMask */ 3331 assert(mach->ContStackTop > 0); 3332 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3333 assert(mach->LoopLabelStackTop > 0); 3334 --mach->LoopLabelStackTop; 3335 assert(mach->LoopCounterStackTop > 0); 3336 --mach->LoopCounterStackTop; 3337 3338 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3339 } 3340 UPDATE_EXEC_MASK(mach); 3341 break; 3342 3343 case TGSI_OPCODE_ENDLOOP: 3344 /* Restore ContMask, but don't pop */ 3345 assert(mach->ContStackTop > 0); 3346 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3347 UPDATE_EXEC_MASK(mach); 3348 if (mach->ExecMask) { 3349 /* repeat loop: jump to instruction just past BGNLOOP */ 3350 assert(mach->LoopLabelStackTop > 0); 3351 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3352 } 3353 else { 3354 /* exit loop: pop LoopMask */ 3355 assert(mach->LoopStackTop > 0); 3356 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3357 /* pop ContMask */ 3358 assert(mach->ContStackTop > 0); 3359 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3360 assert(mach->LoopLabelStackTop > 0); 3361 --mach->LoopLabelStackTop; 3362 3363 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3364 } 3365 UPDATE_EXEC_MASK(mach); 3366 break; 3367 3368 case TGSI_OPCODE_BRK: 3369 exec_break(mach); 3370 break; 3371 3372 case TGSI_OPCODE_CONT: 3373 /* turn off cont channels for each enabled exec channel */ 3374 mach->ContMask &= ~mach->ExecMask; 3375 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3376 UPDATE_EXEC_MASK(mach); 3377 break; 3378 3379 case TGSI_OPCODE_BGNSUB: 3380 /* no-op */ 3381 break; 3382 3383 case TGSI_OPCODE_ENDSUB: 3384 /* 3385 * XXX: This really should be a no-op. We should never reach this opcode. 3386 */ 3387 3388 assert(mach->CallStackTop > 0); 3389 mach->CallStackTop--; 3390 3391 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3392 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3393 3394 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3395 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3396 3397 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3398 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3399 3400 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 3401 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 3402 3403 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 3404 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 3405 3406 assert(mach->FuncStackTop > 0); 3407 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3408 3409 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3410 3411 UPDATE_EXEC_MASK(mach); 3412 break; 3413 3414 case TGSI_OPCODE_NOP: 3415 break; 3416 3417 case TGSI_OPCODE_BREAKC: 3418 FETCH(&r[0], 0, CHAN_X); 3419 /* update CondMask */ 3420 if (r[0].u[0] && (mach->ExecMask & 0x1)) { 3421 mach->LoopMask &= ~0x1; 3422 } 3423 if (r[0].u[1] && (mach->ExecMask & 0x2)) { 3424 mach->LoopMask &= ~0x2; 3425 } 3426 if (r[0].u[2] && (mach->ExecMask & 0x4)) { 3427 mach->LoopMask &= ~0x4; 3428 } 3429 if (r[0].u[3] && (mach->ExecMask & 0x8)) { 3430 mach->LoopMask &= ~0x8; 3431 } 3432 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3433 UPDATE_EXEC_MASK(mach); 3434 break; 3435 3436 case TGSI_OPCODE_F2I: 3437 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 3438 break; 3439 3440 case TGSI_OPCODE_IDIV: 3441 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3442 break; 3443 3444 case TGSI_OPCODE_IMAX: 3445 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3446 break; 3447 3448 case TGSI_OPCODE_IMIN: 3449 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3450 break; 3451 3452 case TGSI_OPCODE_INEG: 3453 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3454 break; 3455 3456 case TGSI_OPCODE_ISGE: 3457 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3458 break; 3459 3460 case TGSI_OPCODE_ISHR: 3461 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3462 break; 3463 3464 case TGSI_OPCODE_ISLT: 3465 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3466 break; 3467 3468 case TGSI_OPCODE_F2U: 3469 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 3470 break; 3471 3472 case TGSI_OPCODE_U2F: 3473 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 3474 break; 3475 3476 case TGSI_OPCODE_UADD: 3477 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3478 break; 3479 3480 case TGSI_OPCODE_UDIV: 3481 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3482 break; 3483 3484 case TGSI_OPCODE_UMAD: 3485 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3486 break; 3487 3488 case TGSI_OPCODE_UMAX: 3489 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3490 break; 3491 3492 case TGSI_OPCODE_UMIN: 3493 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3494 break; 3495 3496 case TGSI_OPCODE_UMOD: 3497 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3498 break; 3499 3500 case TGSI_OPCODE_UMUL: 3501 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3502 break; 3503 3504 case TGSI_OPCODE_USEQ: 3505 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3506 break; 3507 3508 case TGSI_OPCODE_USGE: 3509 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3510 break; 3511 3512 case TGSI_OPCODE_USHR: 3513 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3514 break; 3515 3516 case TGSI_OPCODE_USLT: 3517 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3518 break; 3519 3520 case TGSI_OPCODE_USNE: 3521 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3522 break; 3523 3524 case TGSI_OPCODE_SWITCH: 3525 exec_switch(mach, inst); 3526 break; 3527 3528 case TGSI_OPCODE_CASE: 3529 exec_case(mach, inst); 3530 break; 3531 3532 case TGSI_OPCODE_DEFAULT: 3533 exec_default(mach); 3534 break; 3535 3536 case TGSI_OPCODE_ENDSWITCH: 3537 exec_endswitch(mach); 3538 break; 3539 3540 default: 3541 assert( 0 ); 3542 } 3543} 3544 3545 3546#define DEBUG_EXECUTION 0 3547 3548 3549/** 3550 * Run TGSI interpreter. 3551 * \return bitmask of "alive" quad components 3552 */ 3553uint 3554tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3555{ 3556 uint i; 3557 int pc = 0; 3558 3559 mach->CondMask = 0xf; 3560 mach->LoopMask = 0xf; 3561 mach->ContMask = 0xf; 3562 mach->FuncMask = 0xf; 3563 mach->ExecMask = 0xf; 3564 3565 mach->Switch.mask = 0xf; 3566 3567 assert(mach->CondStackTop == 0); 3568 assert(mach->LoopStackTop == 0); 3569 assert(mach->ContStackTop == 0); 3570 assert(mach->SwitchStackTop == 0); 3571 assert(mach->BreakStackTop == 0); 3572 assert(mach->CallStackTop == 0); 3573 3574 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3575 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3576 3577 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3578 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3579 mach->Primitives[0] = 0; 3580 } 3581 3582 for (i = 0; i < QUAD_SIZE; i++) { 3583 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3584 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3585 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3586 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3587 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3588 } 3589 3590 /* execute declarations (interpolants) */ 3591 for (i = 0; i < mach->NumDeclarations; i++) { 3592 exec_declaration( mach, mach->Declarations+i ); 3593 } 3594 3595 { 3596#if DEBUG_EXECUTION 3597 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 3598 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 3599 uint inst = 1; 3600 3601 memcpy(temps, mach->Temps, sizeof(temps)); 3602 memcpy(outputs, mach->Outputs, sizeof(outputs)); 3603#endif 3604 3605 /* execute instructions, until pc is set to -1 */ 3606 while (pc != -1) { 3607 3608#if DEBUG_EXECUTION 3609 uint i; 3610 3611 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 3612#endif 3613 3614 assert(pc < (int) mach->NumInstructions); 3615 exec_instruction(mach, mach->Instructions + pc, &pc); 3616 3617#if DEBUG_EXECUTION 3618 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 3619 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 3620 uint j; 3621 3622 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 3623 debug_printf("TEMP[%2u] = ", i); 3624 for (j = 0; j < 4; j++) { 3625 if (j > 0) { 3626 debug_printf(" "); 3627 } 3628 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3629 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 3630 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 3631 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 3632 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 3633 } 3634 } 3635 } 3636 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 3637 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 3638 uint j; 3639 3640 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 3641 debug_printf("OUT[%2u] = ", i); 3642 for (j = 0; j < 4; j++) { 3643 if (j > 0) { 3644 debug_printf(" "); 3645 } 3646 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3647 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 3648 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 3649 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 3650 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 3651 } 3652 } 3653 } 3654#endif 3655 } 3656 } 3657 3658#if 0 3659 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3660 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3661 /* 3662 * Scale back depth component. 3663 */ 3664 for (i = 0; i < 4; i++) 3665 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3666 } 3667#endif 3668 3669 assert(mach->CondStackTop == 0); 3670 assert(mach->LoopStackTop == 0); 3671 assert(mach->ContStackTop == 0); 3672 assert(mach->SwitchStackTop == 0); 3673 assert(mach->BreakStackTop == 0); 3674 assert(mach->CallStackTop == 0); 3675 3676 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3677} 3678