tgsi_exec.c revision 0ec30805a4aad945515957e980374f65fbd3b66e
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/u_memory.h" 62#include "util/u_math.h" 63 64 65#define FAST_MATH 0 66 67#define TILE_TOP_LEFT 0 68#define TILE_TOP_RIGHT 1 69#define TILE_BOTTOM_LEFT 2 70#define TILE_BOTTOM_RIGHT 3 71 72static void 73micro_abs(union tgsi_exec_channel *dst, 74 const union tgsi_exec_channel *src) 75{ 76 dst->f[0] = fabsf(src->f[0]); 77 dst->f[1] = fabsf(src->f[1]); 78 dst->f[2] = fabsf(src->f[2]); 79 dst->f[3] = fabsf(src->f[3]); 80} 81 82static void 83micro_arl(union tgsi_exec_channel *dst, 84 const union tgsi_exec_channel *src) 85{ 86 dst->i[0] = (int)floorf(src->f[0]); 87 dst->i[1] = (int)floorf(src->f[1]); 88 dst->i[2] = (int)floorf(src->f[2]); 89 dst->i[3] = (int)floorf(src->f[3]); 90} 91 92static void 93micro_arr(union tgsi_exec_channel *dst, 94 const union tgsi_exec_channel *src) 95{ 96 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 97 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 98 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 99 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 100} 101 102static void 103micro_ceil(union tgsi_exec_channel *dst, 104 const union tgsi_exec_channel *src) 105{ 106 dst->f[0] = ceilf(src->f[0]); 107 dst->f[1] = ceilf(src->f[1]); 108 dst->f[2] = ceilf(src->f[2]); 109 dst->f[3] = ceilf(src->f[3]); 110} 111 112static void 113micro_clamp(union tgsi_exec_channel *dst, 114 const union tgsi_exec_channel *src0, 115 const union tgsi_exec_channel *src1, 116 const union tgsi_exec_channel *src2) 117{ 118 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; 119 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; 120 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; 121 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; 122} 123 124static void 125micro_cmp(union tgsi_exec_channel *dst, 126 const union tgsi_exec_channel *src0, 127 const union tgsi_exec_channel *src1, 128 const union tgsi_exec_channel *src2) 129{ 130 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 131 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 132 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 133 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 134} 135 136static void 137micro_cnd(union tgsi_exec_channel *dst, 138 const union tgsi_exec_channel *src0, 139 const union tgsi_exec_channel *src1, 140 const union tgsi_exec_channel *src2) 141{ 142 dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; 143 dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; 144 dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; 145 dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; 146} 147 148static void 149micro_cos(union tgsi_exec_channel *dst, 150 const union tgsi_exec_channel *src) 151{ 152 dst->f[0] = cosf(src->f[0]); 153 dst->f[1] = cosf(src->f[1]); 154 dst->f[2] = cosf(src->f[2]); 155 dst->f[3] = cosf(src->f[3]); 156} 157 158static void 159micro_ddx(union tgsi_exec_channel *dst, 160 const union tgsi_exec_channel *src) 161{ 162 dst->f[0] = 163 dst->f[1] = 164 dst->f[2] = 165 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 166} 167 168static void 169micro_ddy(union tgsi_exec_channel *dst, 170 const union tgsi_exec_channel *src) 171{ 172 dst->f[0] = 173 dst->f[1] = 174 dst->f[2] = 175 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 176} 177 178static void 179micro_exp2(union tgsi_exec_channel *dst, 180 const union tgsi_exec_channel *src) 181{ 182#if FAST_MATH 183 dst->f[0] = util_fast_exp2(src->f[0]); 184 dst->f[1] = util_fast_exp2(src->f[1]); 185 dst->f[2] = util_fast_exp2(src->f[2]); 186 dst->f[3] = util_fast_exp2(src->f[3]); 187#else 188#if DEBUG 189 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 190 uint i; 191 union tgsi_exec_channel clamped; 192 193 for (i = 0; i < 4; i++) { 194 if (src->f[i] > 127.99999f) { 195 clamped.f[i] = 127.99999f; 196 } else if (src->f[i] < -126.99999f) { 197 clamped.f[i] = -126.99999f; 198 } else { 199 clamped.f[i] = src->f[i]; 200 } 201 } 202 src = &clamped; 203#endif /* DEBUG */ 204 205 dst->f[0] = powf(2.0f, src->f[0]); 206 dst->f[1] = powf(2.0f, src->f[1]); 207 dst->f[2] = powf(2.0f, src->f[2]); 208 dst->f[3] = powf(2.0f, src->f[3]); 209#endif /* FAST_MATH */ 210} 211 212static void 213micro_flr(union tgsi_exec_channel *dst, 214 const union tgsi_exec_channel *src) 215{ 216 dst->f[0] = floorf(src->f[0]); 217 dst->f[1] = floorf(src->f[1]); 218 dst->f[2] = floorf(src->f[2]); 219 dst->f[3] = floorf(src->f[3]); 220} 221 222static void 223micro_frc(union tgsi_exec_channel *dst, 224 const union tgsi_exec_channel *src) 225{ 226 dst->f[0] = src->f[0] - floorf(src->f[0]); 227 dst->f[1] = src->f[1] - floorf(src->f[1]); 228 dst->f[2] = src->f[2] - floorf(src->f[2]); 229 dst->f[3] = src->f[3] - floorf(src->f[3]); 230} 231 232static void 233micro_iabs(union tgsi_exec_channel *dst, 234 const union tgsi_exec_channel *src) 235{ 236 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 237 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 238 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 239 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 240} 241 242static void 243micro_ineg(union tgsi_exec_channel *dst, 244 const union tgsi_exec_channel *src) 245{ 246 dst->i[0] = -src->i[0]; 247 dst->i[1] = -src->i[1]; 248 dst->i[2] = -src->i[2]; 249 dst->i[3] = -src->i[3]; 250} 251 252static void 253micro_lg2(union tgsi_exec_channel *dst, 254 const union tgsi_exec_channel *src) 255{ 256#if FAST_MATH 257 dst->f[0] = util_fast_log2(src->f[0]); 258 dst->f[1] = util_fast_log2(src->f[1]); 259 dst->f[2] = util_fast_log2(src->f[2]); 260 dst->f[3] = util_fast_log2(src->f[3]); 261#else 262 dst->f[0] = logf(src->f[0]) * 1.442695f; 263 dst->f[1] = logf(src->f[1]) * 1.442695f; 264 dst->f[2] = logf(src->f[2]) * 1.442695f; 265 dst->f[3] = logf(src->f[3]) * 1.442695f; 266#endif 267} 268 269static void 270micro_lrp(union tgsi_exec_channel *dst, 271 const union tgsi_exec_channel *src0, 272 const union tgsi_exec_channel *src1, 273 const union tgsi_exec_channel *src2) 274{ 275 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 276 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 277 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 278 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 279} 280 281static void 282micro_mad(union tgsi_exec_channel *dst, 283 const union tgsi_exec_channel *src0, 284 const union tgsi_exec_channel *src1, 285 const union tgsi_exec_channel *src2) 286{ 287 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 288 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 289 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 290 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 291} 292 293static void 294micro_mov(union tgsi_exec_channel *dst, 295 const union tgsi_exec_channel *src) 296{ 297 dst->u[0] = src->u[0]; 298 dst->u[1] = src->u[1]; 299 dst->u[2] = src->u[2]; 300 dst->u[3] = src->u[3]; 301} 302 303static void 304micro_rcp(union tgsi_exec_channel *dst, 305 const union tgsi_exec_channel *src) 306{ 307#if 0 /* for debugging */ 308 assert(src->f[0] != 0.0f); 309 assert(src->f[1] != 0.0f); 310 assert(src->f[2] != 0.0f); 311 assert(src->f[3] != 0.0f); 312#endif 313 dst->f[0] = 1.0f / src->f[0]; 314 dst->f[1] = 1.0f / src->f[1]; 315 dst->f[2] = 1.0f / src->f[2]; 316 dst->f[3] = 1.0f / src->f[3]; 317} 318 319static void 320micro_rnd(union tgsi_exec_channel *dst, 321 const union tgsi_exec_channel *src) 322{ 323 dst->f[0] = floorf(src->f[0] + 0.5f); 324 dst->f[1] = floorf(src->f[1] + 0.5f); 325 dst->f[2] = floorf(src->f[2] + 0.5f); 326 dst->f[3] = floorf(src->f[3] + 0.5f); 327} 328 329static void 330micro_rsq(union tgsi_exec_channel *dst, 331 const union tgsi_exec_channel *src) 332{ 333#if 0 /* for debugging */ 334 assert(src->f[0] != 0.0f); 335 assert(src->f[1] != 0.0f); 336 assert(src->f[2] != 0.0f); 337 assert(src->f[3] != 0.0f); 338#endif 339 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); 340 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); 341 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); 342 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); 343} 344 345static void 346micro_seq(union tgsi_exec_channel *dst, 347 const union tgsi_exec_channel *src0, 348 const union tgsi_exec_channel *src1) 349{ 350 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 351 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 352 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 353 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 354} 355 356static void 357micro_sge(union tgsi_exec_channel *dst, 358 const union tgsi_exec_channel *src0, 359 const union tgsi_exec_channel *src1) 360{ 361 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 362 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 363 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 364 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 365} 366 367static void 368micro_sgn(union tgsi_exec_channel *dst, 369 const union tgsi_exec_channel *src) 370{ 371 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 372 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 373 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 374 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 375} 376 377static void 378micro_isgn(union tgsi_exec_channel *dst, 379 const union tgsi_exec_channel *src) 380{ 381 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 382 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 383 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 384 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 385} 386 387static void 388micro_sgt(union tgsi_exec_channel *dst, 389 const union tgsi_exec_channel *src0, 390 const union tgsi_exec_channel *src1) 391{ 392 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 393 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 394 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 395 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 396} 397 398static void 399micro_sin(union tgsi_exec_channel *dst, 400 const union tgsi_exec_channel *src) 401{ 402 dst->f[0] = sinf(src->f[0]); 403 dst->f[1] = sinf(src->f[1]); 404 dst->f[2] = sinf(src->f[2]); 405 dst->f[3] = sinf(src->f[3]); 406} 407 408static void 409micro_sle(union tgsi_exec_channel *dst, 410 const union tgsi_exec_channel *src0, 411 const union tgsi_exec_channel *src1) 412{ 413 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 414 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 415 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 416 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 417} 418 419static void 420micro_slt(union tgsi_exec_channel *dst, 421 const union tgsi_exec_channel *src0, 422 const union tgsi_exec_channel *src1) 423{ 424 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 425 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 426 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 427 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 428} 429 430static void 431micro_sne(union tgsi_exec_channel *dst, 432 const union tgsi_exec_channel *src0, 433 const union tgsi_exec_channel *src1) 434{ 435 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 436 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 437 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 438 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 439} 440 441static void 442micro_sfl(union tgsi_exec_channel *dst) 443{ 444 dst->f[0] = 0.0f; 445 dst->f[1] = 0.0f; 446 dst->f[2] = 0.0f; 447 dst->f[3] = 0.0f; 448} 449 450static void 451micro_str(union tgsi_exec_channel *dst) 452{ 453 dst->f[0] = 1.0f; 454 dst->f[1] = 1.0f; 455 dst->f[2] = 1.0f; 456 dst->f[3] = 1.0f; 457} 458 459static void 460micro_trunc(union tgsi_exec_channel *dst, 461 const union tgsi_exec_channel *src) 462{ 463 dst->f[0] = (float)(int)src->f[0]; 464 dst->f[1] = (float)(int)src->f[1]; 465 dst->f[2] = (float)(int)src->f[2]; 466 dst->f[3] = (float)(int)src->f[3]; 467} 468 469 470#define CHAN_X 0 471#define CHAN_Y 1 472#define CHAN_Z 2 473#define CHAN_W 3 474 475enum tgsi_exec_datatype { 476 TGSI_EXEC_DATA_FLOAT, 477 TGSI_EXEC_DATA_INT, 478 TGSI_EXEC_DATA_UINT 479}; 480 481/* 482 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 483 */ 484#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 485#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 486#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 487#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 488#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 489#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 490 491 492/** The execution mask depends on the conditional mask and the loop mask */ 493#define UPDATE_EXEC_MASK(MACH) \ 494 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 495 496 497static const union tgsi_exec_channel ZeroVec = 498 { { 0.0, 0.0, 0.0, 0.0 } }; 499 500static const union tgsi_exec_channel OneVec = { 501 {1.0f, 1.0f, 1.0f, 1.0f} 502}; 503 504static const union tgsi_exec_channel P128Vec = { 505 {128.0f, 128.0f, 128.0f, 128.0f} 506}; 507 508static const union tgsi_exec_channel M128Vec = { 509 {-128.0f, -128.0f, -128.0f, -128.0f} 510}; 511 512 513/** 514 * Assert that none of the float values in 'chan' are infinite or NaN. 515 * NaN and Inf may occur normally during program execution and should 516 * not lead to crashes, etc. But when debugging, it's helpful to catch 517 * them. 518 */ 519static INLINE void 520check_inf_or_nan(const union tgsi_exec_channel *chan) 521{ 522 assert(!util_is_inf_or_nan((chan)->f[0])); 523 assert(!util_is_inf_or_nan((chan)->f[1])); 524 assert(!util_is_inf_or_nan((chan)->f[2])); 525 assert(!util_is_inf_or_nan((chan)->f[3])); 526} 527 528 529#ifdef DEBUG 530static void 531print_chan(const char *msg, const union tgsi_exec_channel *chan) 532{ 533 debug_printf("%s = {%f, %f, %f, %f}\n", 534 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 535} 536#endif 537 538 539#ifdef DEBUG 540static void 541print_temp(const struct tgsi_exec_machine *mach, uint index) 542{ 543 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 544 int i; 545 debug_printf("Temp[%u] =\n", index); 546 for (i = 0; i < 4; i++) { 547 debug_printf(" %c: { %f, %f, %f, %f }\n", 548 "XYZW"[i], 549 tmp->xyzw[i].f[0], 550 tmp->xyzw[i].f[1], 551 tmp->xyzw[i].f[2], 552 tmp->xyzw[i].f[3]); 553 } 554} 555#endif 556 557 558void 559tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 560 unsigned num_bufs, 561 const void **bufs, 562 const unsigned *buf_sizes) 563{ 564 unsigned i; 565 566 for (i = 0; i < num_bufs; i++) { 567 mach->Consts[i] = bufs[i]; 568 mach->ConstsSize[i] = buf_sizes[i]; 569 } 570} 571 572 573/** 574 * Check if there's a potential src/dst register data dependency when 575 * using SOA execution. 576 * Example: 577 * MOV T, T.yxwz; 578 * This would expand into: 579 * MOV t0, t1; 580 * MOV t1, t0; 581 * MOV t2, t3; 582 * MOV t3, t2; 583 * The second instruction will have the wrong value for t0 if executed as-is. 584 */ 585boolean 586tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 587{ 588 uint i, chan; 589 590 uint writemask = inst->Dst[0].Register.WriteMask; 591 if (writemask == TGSI_WRITEMASK_X || 592 writemask == TGSI_WRITEMASK_Y || 593 writemask == TGSI_WRITEMASK_Z || 594 writemask == TGSI_WRITEMASK_W || 595 writemask == TGSI_WRITEMASK_NONE) { 596 /* no chance of data dependency */ 597 return FALSE; 598 } 599 600 /* loop over src regs */ 601 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 602 if ((inst->Src[i].Register.File == 603 inst->Dst[0].Register.File) && 604 ((inst->Src[i].Register.Index == 605 inst->Dst[0].Register.Index) || 606 inst->Src[i].Register.Indirect || 607 inst->Dst[0].Register.Indirect)) { 608 /* loop over dest channels */ 609 uint channelsWritten = 0x0; 610 for (chan = 0; chan < NUM_CHANNELS; chan++) { 611 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 612 /* check if we're reading a channel that's been written */ 613 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 614 if (channelsWritten & (1 << swizzle)) { 615 return TRUE; 616 } 617 618 channelsWritten |= (1 << chan); 619 } 620 } 621 } 622 } 623 return FALSE; 624} 625 626 627/** 628 * Initialize machine state by expanding tokens to full instructions, 629 * allocating temporary storage, setting up constants, etc. 630 * After this, we can call tgsi_exec_machine_run() many times. 631 */ 632void 633tgsi_exec_machine_bind_shader( 634 struct tgsi_exec_machine *mach, 635 const struct tgsi_token *tokens, 636 uint numSamplers, 637 struct tgsi_sampler **samplers) 638{ 639 uint k; 640 struct tgsi_parse_context parse; 641 struct tgsi_full_instruction *instructions; 642 struct tgsi_full_declaration *declarations; 643 uint maxInstructions = 10, numInstructions = 0; 644 uint maxDeclarations = 10, numDeclarations = 0; 645 646#if 0 647 tgsi_dump(tokens, 0); 648#endif 649 650 util_init_math(); 651 652 if (numSamplers) { 653 assert(samplers); 654 } 655 656 mach->Tokens = tokens; 657 mach->Samplers = samplers; 658 659 if (!tokens) { 660 /* unbind and free all */ 661 if (mach->Declarations) { 662 FREE( mach->Declarations ); 663 } 664 mach->Declarations = NULL; 665 mach->NumDeclarations = 0; 666 667 if (mach->Instructions) { 668 FREE( mach->Instructions ); 669 } 670 mach->Instructions = NULL; 671 mach->NumInstructions = 0; 672 673 return; 674 } 675 676 k = tgsi_parse_init (&parse, mach->Tokens); 677 if (k != TGSI_PARSE_OK) { 678 debug_printf( "Problem parsing!\n" ); 679 return; 680 } 681 682 mach->Processor = parse.FullHeader.Processor.Processor; 683 mach->ImmLimit = 0; 684 685 if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && 686 !mach->UsedGeometryShader) { 687 struct tgsi_exec_vector *inputs; 688 struct tgsi_exec_vector *outputs; 689 690 inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 691 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS, 692 16); 693 694 if (!inputs) 695 return; 696 697 outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 698 TGSI_MAX_TOTAL_VERTICES, 16); 699 700 if (!outputs) { 701 align_free(inputs); 702 return; 703 } 704 705 align_free(mach->Inputs); 706 align_free(mach->Outputs); 707 708 mach->Inputs = inputs; 709 mach->Outputs = outputs; 710 mach->UsedGeometryShader = TRUE; 711 } 712 713 declarations = (struct tgsi_full_declaration *) 714 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 715 716 if (!declarations) { 717 return; 718 } 719 720 instructions = (struct tgsi_full_instruction *) 721 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 722 723 if (!instructions) { 724 FREE( declarations ); 725 return; 726 } 727 728 while( !tgsi_parse_end_of_tokens( &parse ) ) { 729 uint i; 730 731 tgsi_parse_token( &parse ); 732 switch( parse.FullToken.Token.Type ) { 733 case TGSI_TOKEN_TYPE_DECLARATION: 734 /* save expanded declaration */ 735 if (numDeclarations == maxDeclarations) { 736 declarations = REALLOC(declarations, 737 maxDeclarations 738 * sizeof(struct tgsi_full_declaration), 739 (maxDeclarations + 10) 740 * sizeof(struct tgsi_full_declaration)); 741 maxDeclarations += 10; 742 } 743 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 744 unsigned reg; 745 for (reg = parse.FullToken.FullDeclaration.Range.First; 746 reg <= parse.FullToken.FullDeclaration.Range.Last; 747 ++reg) { 748 ++mach->NumOutputs; 749 } 750 } 751 if (parse.FullToken.FullDeclaration.Declaration.File == 752 TGSI_FILE_IMMEDIATE_ARRAY) { 753 unsigned reg; 754 struct tgsi_full_declaration *decl = 755 &parse.FullToken.FullDeclaration; 756 debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES); 757 for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) { 758 for( i = 0; i < 4; i++ ) { 759 int idx = reg * 4 + i; 760 mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float; 761 } 762 } 763 } 764 memcpy(declarations + numDeclarations, 765 &parse.FullToken.FullDeclaration, 766 sizeof(declarations[0])); 767 numDeclarations++; 768 break; 769 770 case TGSI_TOKEN_TYPE_IMMEDIATE: 771 { 772 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 773 assert( size <= 4 ); 774 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 775 776 for( i = 0; i < size; i++ ) { 777 mach->Imms[mach->ImmLimit][i] = 778 parse.FullToken.FullImmediate.u[i].Float; 779 } 780 mach->ImmLimit += 1; 781 } 782 break; 783 784 case TGSI_TOKEN_TYPE_INSTRUCTION: 785 786 /* save expanded instruction */ 787 if (numInstructions == maxInstructions) { 788 instructions = REALLOC(instructions, 789 maxInstructions 790 * sizeof(struct tgsi_full_instruction), 791 (maxInstructions + 10) 792 * sizeof(struct tgsi_full_instruction)); 793 maxInstructions += 10; 794 } 795 796 memcpy(instructions + numInstructions, 797 &parse.FullToken.FullInstruction, 798 sizeof(instructions[0])); 799 800 numInstructions++; 801 break; 802 803 case TGSI_TOKEN_TYPE_PROPERTY: 804 break; 805 806 default: 807 assert( 0 ); 808 } 809 } 810 tgsi_parse_free (&parse); 811 812 if (mach->Declarations) { 813 FREE( mach->Declarations ); 814 } 815 mach->Declarations = declarations; 816 mach->NumDeclarations = numDeclarations; 817 818 if (mach->Instructions) { 819 FREE( mach->Instructions ); 820 } 821 mach->Instructions = instructions; 822 mach->NumInstructions = numInstructions; 823} 824 825 826struct tgsi_exec_machine * 827tgsi_exec_machine_create( void ) 828{ 829 struct tgsi_exec_machine *mach; 830 uint i; 831 832 mach = align_malloc( sizeof *mach, 16 ); 833 if (!mach) 834 goto fail; 835 836 memset(mach, 0, sizeof(*mach)); 837 838 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 839 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 840 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 841 842 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); 843 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_ATTRIBS, 16); 844 if (!mach->Inputs || !mach->Outputs) 845 goto fail; 846 847 /* Setup constants needed by the SSE2 executor. */ 848 for( i = 0; i < 4; i++ ) { 849 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; 850 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; 851 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; 852 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ 853 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; 854 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ 855 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; 856 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; 857 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; 858 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; 859 } 860 861#ifdef DEBUG 862 /* silence warnings */ 863 (void) print_chan; 864 (void) print_temp; 865#endif 866 867 return mach; 868 869fail: 870 if (mach) { 871 align_free(mach->Inputs); 872 align_free(mach->Outputs); 873 align_free(mach); 874 } 875 return NULL; 876} 877 878 879void 880tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 881{ 882 if (mach) { 883 if (mach->Instructions) 884 FREE(mach->Instructions); 885 if (mach->Declarations) 886 FREE(mach->Declarations); 887 888 align_free(mach->Inputs); 889 align_free(mach->Outputs); 890 891 align_free(mach); 892 } 893} 894 895static void 896micro_add(union tgsi_exec_channel *dst, 897 const union tgsi_exec_channel *src0, 898 const union tgsi_exec_channel *src1) 899{ 900 dst->f[0] = src0->f[0] + src1->f[0]; 901 dst->f[1] = src0->f[1] + src1->f[1]; 902 dst->f[2] = src0->f[2] + src1->f[2]; 903 dst->f[3] = src0->f[3] + src1->f[3]; 904} 905 906static void 907micro_div( 908 union tgsi_exec_channel *dst, 909 const union tgsi_exec_channel *src0, 910 const union tgsi_exec_channel *src1 ) 911{ 912 if (src1->f[0] != 0) { 913 dst->f[0] = src0->f[0] / src1->f[0]; 914 } 915 if (src1->f[1] != 0) { 916 dst->f[1] = src0->f[1] / src1->f[1]; 917 } 918 if (src1->f[2] != 0) { 919 dst->f[2] = src0->f[2] / src1->f[2]; 920 } 921 if (src1->f[3] != 0) { 922 dst->f[3] = src0->f[3] / src1->f[3]; 923 } 924} 925 926static void 927micro_rcc(union tgsi_exec_channel *dst, 928 const union tgsi_exec_channel *src) 929{ 930 uint i; 931 932 for (i = 0; i < 4; i++) { 933 float recip = 1.0f / src->f[i]; 934 935 if (recip > 0.0f) { 936 if (recip > 1.884467e+019f) { 937 dst->f[i] = 1.884467e+019f; 938 } 939 else if (recip < 5.42101e-020f) { 940 dst->f[i] = 5.42101e-020f; 941 } 942 else { 943 dst->f[i] = recip; 944 } 945 } 946 else { 947 if (recip < -1.884467e+019f) { 948 dst->f[i] = -1.884467e+019f; 949 } 950 else if (recip > -5.42101e-020f) { 951 dst->f[i] = -5.42101e-020f; 952 } 953 else { 954 dst->f[i] = recip; 955 } 956 } 957 } 958} 959 960static void 961micro_lt( 962 union tgsi_exec_channel *dst, 963 const union tgsi_exec_channel *src0, 964 const union tgsi_exec_channel *src1, 965 const union tgsi_exec_channel *src2, 966 const union tgsi_exec_channel *src3 ) 967{ 968 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 969 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 970 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 971 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 972} 973 974static void 975micro_max(union tgsi_exec_channel *dst, 976 const union tgsi_exec_channel *src0, 977 const union tgsi_exec_channel *src1) 978{ 979 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 980 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 981 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 982 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 983} 984 985static void 986micro_min(union tgsi_exec_channel *dst, 987 const union tgsi_exec_channel *src0, 988 const union tgsi_exec_channel *src1) 989{ 990 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 991 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 992 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 993 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 994} 995 996static void 997micro_mul(union tgsi_exec_channel *dst, 998 const union tgsi_exec_channel *src0, 999 const union tgsi_exec_channel *src1) 1000{ 1001 dst->f[0] = src0->f[0] * src1->f[0]; 1002 dst->f[1] = src0->f[1] * src1->f[1]; 1003 dst->f[2] = src0->f[2] * src1->f[2]; 1004 dst->f[3] = src0->f[3] * src1->f[3]; 1005} 1006 1007static void 1008micro_neg( 1009 union tgsi_exec_channel *dst, 1010 const union tgsi_exec_channel *src ) 1011{ 1012 dst->f[0] = -src->f[0]; 1013 dst->f[1] = -src->f[1]; 1014 dst->f[2] = -src->f[2]; 1015 dst->f[3] = -src->f[3]; 1016} 1017 1018static void 1019micro_pow( 1020 union tgsi_exec_channel *dst, 1021 const union tgsi_exec_channel *src0, 1022 const union tgsi_exec_channel *src1 ) 1023{ 1024#if FAST_MATH 1025 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 1026 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 1027 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 1028 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 1029#else 1030 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1031 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1032 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1033 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1034#endif 1035} 1036 1037static void 1038micro_sub(union tgsi_exec_channel *dst, 1039 const union tgsi_exec_channel *src0, 1040 const union tgsi_exec_channel *src1) 1041{ 1042 dst->f[0] = src0->f[0] - src1->f[0]; 1043 dst->f[1] = src0->f[1] - src1->f[1]; 1044 dst->f[2] = src0->f[2] - src1->f[2]; 1045 dst->f[3] = src0->f[3] - src1->f[3]; 1046} 1047 1048static void 1049fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1050 const uint chan_index, 1051 const uint file, 1052 const uint swizzle, 1053 const union tgsi_exec_channel *index, 1054 const union tgsi_exec_channel *index2D, 1055 union tgsi_exec_channel *chan) 1056{ 1057 uint i; 1058 1059 assert(swizzle < 4); 1060 1061 switch (file) { 1062 case TGSI_FILE_CONSTANT: 1063 for (i = 0; i < QUAD_SIZE; i++) { 1064 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 1065 assert(mach->Consts[index2D->i[i]]); 1066 1067 if (index->i[i] < 0) { 1068 chan->u[i] = 0; 1069 } else { 1070 /* NOTE: copying the const value as a uint instead of float */ 1071 const uint constbuf = index2D->i[i]; 1072 const uint *buf = (const uint *)mach->Consts[constbuf]; 1073 const int pos = index->i[i] * 4 + swizzle; 1074 /* const buffer bounds check */ 1075 if (pos < 0 || pos >= mach->ConstsSize[constbuf]) { 1076 if (0) { 1077 /* Debug: print warning */ 1078 static int count = 0; 1079 if (count++ < 100) 1080 debug_printf("TGSI Exec: const buffer index %d" 1081 " out of bounds\n", pos); 1082 } 1083 chan->u[i] = 0; 1084 } 1085 else 1086 chan->u[i] = buf[pos]; 1087 } 1088 } 1089 break; 1090 1091 case TGSI_FILE_INPUT: 1092 for (i = 0; i < QUAD_SIZE; i++) { 1093 /* 1094 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1095 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1096 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1097 index2D->i[i], index->i[i]); 1098 }*/ 1099 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 1100 assert(pos >= 0); 1101 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 1102 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 1103 } 1104 break; 1105 1106 case TGSI_FILE_SYSTEM_VALUE: 1107 /* XXX no swizzling at this point. Will be needed if we put 1108 * gl_FragCoord, for example, in a sys value register. 1109 */ 1110 for (i = 0; i < QUAD_SIZE; i++) { 1111 chan->u[i] = mach->SystemValue[index->i[i]].u[i]; 1112 } 1113 break; 1114 1115 case TGSI_FILE_TEMPORARY: 1116 for (i = 0; i < QUAD_SIZE; i++) { 1117 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1118 assert(index2D->i[i] == 0); 1119 1120 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1121 } 1122 break; 1123 1124 case TGSI_FILE_TEMPORARY_ARRAY: 1125 for (i = 0; i < QUAD_SIZE; i++) { 1126 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1127 assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS); 1128 1129 chan->u[i] = 1130 mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i]; 1131 } 1132 break; 1133 1134 case TGSI_FILE_IMMEDIATE: 1135 for (i = 0; i < QUAD_SIZE; i++) { 1136 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1137 assert(index2D->i[i] == 0); 1138 1139 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1140 } 1141 break; 1142 1143 case TGSI_FILE_IMMEDIATE_ARRAY: 1144 for (i = 0; i < QUAD_SIZE; i++) { 1145 assert(index2D->i[i] == 0); 1146 1147 chan->f[i] = mach->ImmArray[index->i[i]][swizzle]; 1148 } 1149 break; 1150 1151 case TGSI_FILE_ADDRESS: 1152 for (i = 0; i < QUAD_SIZE; i++) { 1153 assert(index->i[i] >= 0); 1154 assert(index2D->i[i] == 0); 1155 1156 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1157 } 1158 break; 1159 1160 case TGSI_FILE_PREDICATE: 1161 for (i = 0; i < QUAD_SIZE; i++) { 1162 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); 1163 assert(index2D->i[i] == 0); 1164 1165 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; 1166 } 1167 break; 1168 1169 case TGSI_FILE_OUTPUT: 1170 /* vertex/fragment output vars can be read too */ 1171 for (i = 0; i < QUAD_SIZE; i++) { 1172 assert(index->i[i] >= 0); 1173 assert(index2D->i[i] == 0); 1174 1175 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1176 } 1177 break; 1178 1179 default: 1180 assert(0); 1181 for (i = 0; i < QUAD_SIZE; i++) { 1182 chan->u[i] = 0; 1183 } 1184 } 1185} 1186 1187static void 1188fetch_source(const struct tgsi_exec_machine *mach, 1189 union tgsi_exec_channel *chan, 1190 const struct tgsi_full_src_register *reg, 1191 const uint chan_index, 1192 enum tgsi_exec_datatype src_datatype) 1193{ 1194 union tgsi_exec_channel index; 1195 union tgsi_exec_channel index2D; 1196 uint swizzle; 1197 1198 /* We start with a direct index into a register file. 1199 * 1200 * file[1], 1201 * where: 1202 * file = Register.File 1203 * [1] = Register.Index 1204 */ 1205 index.i[0] = 1206 index.i[1] = 1207 index.i[2] = 1208 index.i[3] = reg->Register.Index; 1209 1210 /* There is an extra source register that indirectly subscripts 1211 * a register file. The direct index now becomes an offset 1212 * that is being added to the indirect register. 1213 * 1214 * file[ind[2].x+1], 1215 * where: 1216 * ind = Indirect.File 1217 * [2] = Indirect.Index 1218 * .x = Indirect.SwizzleX 1219 */ 1220 if (reg->Register.Indirect) { 1221 union tgsi_exec_channel index2; 1222 union tgsi_exec_channel indir_index; 1223 const uint execmask = mach->ExecMask; 1224 uint i; 1225 1226 /* which address register (always zero now) */ 1227 index2.i[0] = 1228 index2.i[1] = 1229 index2.i[2] = 1230 index2.i[3] = reg->Indirect.Index; 1231 assert(reg->Indirect.File == TGSI_FILE_ADDRESS); 1232 /* get current value of address register[swizzle] */ 1233 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1234 fetch_src_file_channel(mach, 1235 chan_index, 1236 reg->Indirect.File, 1237 swizzle, 1238 &index2, 1239 &ZeroVec, 1240 &indir_index); 1241 1242 /* add value of address register to the offset */ 1243 index.i[0] += indir_index.i[0]; 1244 index.i[1] += indir_index.i[1]; 1245 index.i[2] += indir_index.i[2]; 1246 index.i[3] += indir_index.i[3]; 1247 1248 /* for disabled execution channels, zero-out the index to 1249 * avoid using a potential garbage value. 1250 */ 1251 for (i = 0; i < QUAD_SIZE; i++) { 1252 if ((execmask & (1 << i)) == 0) 1253 index.i[i] = 0; 1254 } 1255 } 1256 1257 /* There is an extra source register that is a second 1258 * subscript to a register file. Effectively it means that 1259 * the register file is actually a 2D array of registers. 1260 * 1261 * file[3][1], 1262 * where: 1263 * [3] = Dimension.Index 1264 */ 1265 if (reg->Register.Dimension) { 1266 index2D.i[0] = 1267 index2D.i[1] = 1268 index2D.i[2] = 1269 index2D.i[3] = reg->Dimension.Index; 1270 1271 /* Again, the second subscript index can be addressed indirectly 1272 * identically to the first one. 1273 * Nothing stops us from indirectly addressing the indirect register, 1274 * but there is no need for that, so we won't exercise it. 1275 * 1276 * file[ind[4].y+3][1], 1277 * where: 1278 * ind = DimIndirect.File 1279 * [4] = DimIndirect.Index 1280 * .y = DimIndirect.SwizzleX 1281 */ 1282 if (reg->Dimension.Indirect) { 1283 union tgsi_exec_channel index2; 1284 union tgsi_exec_channel indir_index; 1285 const uint execmask = mach->ExecMask; 1286 uint i; 1287 1288 index2.i[0] = 1289 index2.i[1] = 1290 index2.i[2] = 1291 index2.i[3] = reg->DimIndirect.Index; 1292 1293 swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); 1294 fetch_src_file_channel(mach, 1295 chan_index, 1296 reg->DimIndirect.File, 1297 swizzle, 1298 &index2, 1299 &ZeroVec, 1300 &indir_index); 1301 1302 index2D.i[0] += indir_index.i[0]; 1303 index2D.i[1] += indir_index.i[1]; 1304 index2D.i[2] += indir_index.i[2]; 1305 index2D.i[3] += indir_index.i[3]; 1306 1307 /* for disabled execution channels, zero-out the index to 1308 * avoid using a potential garbage value. 1309 */ 1310 for (i = 0; i < QUAD_SIZE; i++) { 1311 if ((execmask & (1 << i)) == 0) { 1312 index2D.i[i] = 0; 1313 } 1314 } 1315 } 1316 1317 /* If by any chance there was a need for a 3D array of register 1318 * files, we would have to check whether Dimension is followed 1319 * by a dimension register and continue the saga. 1320 */ 1321 } else { 1322 index2D.i[0] = 1323 index2D.i[1] = 1324 index2D.i[2] = 1325 index2D.i[3] = 0; 1326 } 1327 1328 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1329 fetch_src_file_channel(mach, 1330 chan_index, 1331 reg->Register.File, 1332 swizzle, 1333 &index, 1334 &index2D, 1335 chan); 1336 1337 if (reg->Register.Absolute) { 1338 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1339 micro_abs(chan, chan); 1340 } else { 1341 micro_iabs(chan, chan); 1342 } 1343 } 1344 1345 if (reg->Register.Negate) { 1346 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1347 micro_neg(chan, chan); 1348 } else { 1349 micro_ineg(chan, chan); 1350 } 1351 } 1352} 1353 1354static void 1355store_dest(struct tgsi_exec_machine *mach, 1356 const union tgsi_exec_channel *chan, 1357 const struct tgsi_full_dst_register *reg, 1358 const struct tgsi_full_instruction *inst, 1359 uint chan_index, 1360 enum tgsi_exec_datatype dst_datatype) 1361{ 1362 uint i; 1363 union tgsi_exec_channel null; 1364 union tgsi_exec_channel *dst; 1365 union tgsi_exec_channel index2D; 1366 uint execmask = mach->ExecMask; 1367 int offset = 0; /* indirection offset */ 1368 int index; 1369 1370 /* for debugging */ 1371 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1372 check_inf_or_nan(chan); 1373 } 1374 1375 /* There is an extra source register that indirectly subscripts 1376 * a register file. The direct index now becomes an offset 1377 * that is being added to the indirect register. 1378 * 1379 * file[ind[2].x+1], 1380 * where: 1381 * ind = Indirect.File 1382 * [2] = Indirect.Index 1383 * .x = Indirect.SwizzleX 1384 */ 1385 if (reg->Register.Indirect) { 1386 union tgsi_exec_channel index; 1387 union tgsi_exec_channel indir_index; 1388 uint swizzle; 1389 1390 /* which address register (always zero for now) */ 1391 index.i[0] = 1392 index.i[1] = 1393 index.i[2] = 1394 index.i[3] = reg->Indirect.Index; 1395 1396 /* get current value of address register[swizzle] */ 1397 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1398 1399 /* fetch values from the address/indirection register */ 1400 fetch_src_file_channel(mach, 1401 chan_index, 1402 reg->Indirect.File, 1403 swizzle, 1404 &index, 1405 &ZeroVec, 1406 &indir_index); 1407 1408 /* save indirection offset */ 1409 offset = indir_index.i[0]; 1410 } 1411 1412 /* There is an extra source register that is a second 1413 * subscript to a register file. Effectively it means that 1414 * the register file is actually a 2D array of registers. 1415 * 1416 * file[3][1], 1417 * where: 1418 * [3] = Dimension.Index 1419 */ 1420 if (reg->Register.Dimension) { 1421 index2D.i[0] = 1422 index2D.i[1] = 1423 index2D.i[2] = 1424 index2D.i[3] = reg->Dimension.Index; 1425 1426 /* Again, the second subscript index can be addressed indirectly 1427 * identically to the first one. 1428 * Nothing stops us from indirectly addressing the indirect register, 1429 * but there is no need for that, so we won't exercise it. 1430 * 1431 * file[ind[4].y+3][1], 1432 * where: 1433 * ind = DimIndirect.File 1434 * [4] = DimIndirect.Index 1435 * .y = DimIndirect.SwizzleX 1436 */ 1437 if (reg->Dimension.Indirect) { 1438 union tgsi_exec_channel index2; 1439 union tgsi_exec_channel indir_index; 1440 const uint execmask = mach->ExecMask; 1441 unsigned swizzle; 1442 uint i; 1443 1444 index2.i[0] = 1445 index2.i[1] = 1446 index2.i[2] = 1447 index2.i[3] = reg->DimIndirect.Index; 1448 1449 swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); 1450 fetch_src_file_channel(mach, 1451 chan_index, 1452 reg->DimIndirect.File, 1453 swizzle, 1454 &index2, 1455 &ZeroVec, 1456 &indir_index); 1457 1458 index2D.i[0] += indir_index.i[0]; 1459 index2D.i[1] += indir_index.i[1]; 1460 index2D.i[2] += indir_index.i[2]; 1461 index2D.i[3] += indir_index.i[3]; 1462 1463 /* for disabled execution channels, zero-out the index to 1464 * avoid using a potential garbage value. 1465 */ 1466 for (i = 0; i < QUAD_SIZE; i++) { 1467 if ((execmask & (1 << i)) == 0) { 1468 index2D.i[i] = 0; 1469 } 1470 } 1471 } 1472 1473 /* If by any chance there was a need for a 3D array of register 1474 * files, we would have to check whether Dimension is followed 1475 * by a dimension register and continue the saga. 1476 */ 1477 } else { 1478 index2D.i[0] = 1479 index2D.i[1] = 1480 index2D.i[2] = 1481 index2D.i[3] = 0; 1482 } 1483 1484 switch (reg->Register.File) { 1485 case TGSI_FILE_NULL: 1486 dst = &null; 1487 break; 1488 1489 case TGSI_FILE_OUTPUT: 1490 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1491 + reg->Register.Index; 1492 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1493#if 0 1494 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1495 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1496 for (i = 0; i < QUAD_SIZE; i++) 1497 if (execmask & (1 << i)) 1498 fprintf(stderr, "%f, ", chan->f[i]); 1499 fprintf(stderr, ")\n"); 1500 } 1501#endif 1502 break; 1503 1504 case TGSI_FILE_TEMPORARY: 1505 index = reg->Register.Index; 1506 assert( index < TGSI_EXEC_NUM_TEMPS ); 1507 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1508 break; 1509 1510 case TGSI_FILE_TEMPORARY_ARRAY: 1511 index = reg->Register.Index; 1512 assert( index < TGSI_EXEC_NUM_TEMPS ); 1513 assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS ); 1514 /* XXX we use index2D.i[0] here but somehow we might 1515 * end up with someone trying to store indirectly in 1516 * different buffers */ 1517 dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index]; 1518 break; 1519 1520 case TGSI_FILE_ADDRESS: 1521 index = reg->Register.Index; 1522 dst = &mach->Addrs[index].xyzw[chan_index]; 1523 break; 1524 1525 case TGSI_FILE_PREDICATE: 1526 index = reg->Register.Index; 1527 assert(index < TGSI_EXEC_NUM_PREDS); 1528 dst = &mach->Predicates[index].xyzw[chan_index]; 1529 break; 1530 1531 default: 1532 assert( 0 ); 1533 return; 1534 } 1535 1536 if (inst->Instruction.Predicate) { 1537 uint swizzle; 1538 union tgsi_exec_channel *pred; 1539 1540 switch (chan_index) { 1541 case CHAN_X: 1542 swizzle = inst->Predicate.SwizzleX; 1543 break; 1544 case CHAN_Y: 1545 swizzle = inst->Predicate.SwizzleY; 1546 break; 1547 case CHAN_Z: 1548 swizzle = inst->Predicate.SwizzleZ; 1549 break; 1550 case CHAN_W: 1551 swizzle = inst->Predicate.SwizzleW; 1552 break; 1553 default: 1554 assert(0); 1555 return; 1556 } 1557 1558 assert(inst->Predicate.Index == 0); 1559 1560 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; 1561 1562 if (inst->Predicate.Negate) { 1563 for (i = 0; i < QUAD_SIZE; i++) { 1564 if (pred->u[i]) { 1565 execmask &= ~(1 << i); 1566 } 1567 } 1568 } else { 1569 for (i = 0; i < QUAD_SIZE; i++) { 1570 if (!pred->u[i]) { 1571 execmask &= ~(1 << i); 1572 } 1573 } 1574 } 1575 } 1576 1577 switch (inst->Instruction.Saturate) { 1578 case TGSI_SAT_NONE: 1579 for (i = 0; i < QUAD_SIZE; i++) 1580 if (execmask & (1 << i)) 1581 dst->i[i] = chan->i[i]; 1582 break; 1583 1584 case TGSI_SAT_ZERO_ONE: 1585 for (i = 0; i < QUAD_SIZE; i++) 1586 if (execmask & (1 << i)) { 1587 if (chan->f[i] < 0.0f) 1588 dst->f[i] = 0.0f; 1589 else if (chan->f[i] > 1.0f) 1590 dst->f[i] = 1.0f; 1591 else 1592 dst->i[i] = chan->i[i]; 1593 } 1594 break; 1595 1596 case TGSI_SAT_MINUS_PLUS_ONE: 1597 for (i = 0; i < QUAD_SIZE; i++) 1598 if (execmask & (1 << i)) { 1599 if (chan->f[i] < -1.0f) 1600 dst->f[i] = -1.0f; 1601 else if (chan->f[i] > 1.0f) 1602 dst->f[i] = 1.0f; 1603 else 1604 dst->i[i] = chan->i[i]; 1605 } 1606 break; 1607 1608 default: 1609 assert( 0 ); 1610 } 1611} 1612 1613#define FETCH(VAL,INDEX,CHAN)\ 1614 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1615 1616#define IFETCH(VAL,INDEX,CHAN)\ 1617 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 1618 1619 1620/** 1621 * Execute ARB-style KIL which is predicated by a src register. 1622 * Kill fragment if any of the four values is less than zero. 1623 */ 1624static void 1625exec_kil(struct tgsi_exec_machine *mach, 1626 const struct tgsi_full_instruction *inst) 1627{ 1628 uint uniquemask; 1629 uint chan_index; 1630 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1631 union tgsi_exec_channel r[1]; 1632 1633 /* This mask stores component bits that were already tested. */ 1634 uniquemask = 0; 1635 1636 for (chan_index = 0; chan_index < 4; chan_index++) 1637 { 1638 uint swizzle; 1639 uint i; 1640 1641 /* unswizzle channel */ 1642 swizzle = tgsi_util_get_full_src_register_swizzle ( 1643 &inst->Src[0], 1644 chan_index); 1645 1646 /* check if the component has not been already tested */ 1647 if (uniquemask & (1 << swizzle)) 1648 continue; 1649 uniquemask |= 1 << swizzle; 1650 1651 FETCH(&r[0], 0, chan_index); 1652 for (i = 0; i < 4; i++) 1653 if (r[0].f[i] < 0.0f) 1654 kilmask |= 1 << i; 1655 } 1656 1657 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1658} 1659 1660/** 1661 * Execute NVIDIA-style KIL which is predicated by a condition code. 1662 * Kill fragment if the condition code is TRUE. 1663 */ 1664static void 1665exec_kilp(struct tgsi_exec_machine *mach, 1666 const struct tgsi_full_instruction *inst) 1667{ 1668 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1669 1670 /* "unconditional" kil */ 1671 kilmask = mach->ExecMask; 1672 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1673} 1674 1675static void 1676emit_vertex(struct tgsi_exec_machine *mach) 1677{ 1678 /* FIXME: check for exec mask correctly 1679 unsigned i; 1680 for (i = 0; i < QUAD_SIZE; ++i) { 1681 if ((mach->ExecMask & (1 << i))) 1682 */ 1683 if (mach->ExecMask) { 1684 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 1685 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 1686 } 1687} 1688 1689static void 1690emit_primitive(struct tgsi_exec_machine *mach) 1691{ 1692 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 1693 /* FIXME: check for exec mask correctly 1694 unsigned i; 1695 for (i = 0; i < QUAD_SIZE; ++i) { 1696 if ((mach->ExecMask & (1 << i))) 1697 */ 1698 if (mach->ExecMask) { 1699 ++(*prim_count); 1700 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 1701 mach->Primitives[*prim_count] = 0; 1702 } 1703} 1704 1705static void 1706conditional_emit_primitive(struct tgsi_exec_machine *mach) 1707{ 1708 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1709 int emitted_verts = 1710 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; 1711 if (emitted_verts) { 1712 emit_primitive(mach); 1713 } 1714 } 1715} 1716 1717 1718/* 1719 * Fetch four texture samples using STR texture coordinates. 1720 */ 1721static void 1722fetch_texel( struct tgsi_sampler *sampler, 1723 const union tgsi_exec_channel *s, 1724 const union tgsi_exec_channel *t, 1725 const union tgsi_exec_channel *p, 1726 const union tgsi_exec_channel *c0, 1727 enum tgsi_sampler_control control, 1728 union tgsi_exec_channel *r, 1729 union tgsi_exec_channel *g, 1730 union tgsi_exec_channel *b, 1731 union tgsi_exec_channel *a ) 1732{ 1733 uint j; 1734 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1735 1736 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); 1737 1738 for (j = 0; j < 4; j++) { 1739 r->f[j] = rgba[0][j]; 1740 g->f[j] = rgba[1][j]; 1741 b->f[j] = rgba[2][j]; 1742 a->f[j] = rgba[3][j]; 1743 } 1744} 1745 1746 1747#define TEX_MODIFIER_NONE 0 1748#define TEX_MODIFIER_PROJECTED 1 1749#define TEX_MODIFIER_LOD_BIAS 2 1750#define TEX_MODIFIER_EXPLICIT_LOD 3 1751 1752 1753static void 1754exec_tex(struct tgsi_exec_machine *mach, 1755 const struct tgsi_full_instruction *inst, 1756 uint modifier) 1757{ 1758 const uint unit = inst->Src[1].Register.Index; 1759 union tgsi_exec_channel r[4]; 1760 const union tgsi_exec_channel *lod = &ZeroVec; 1761 enum tgsi_sampler_control control; 1762 uint chan; 1763 1764 if (modifier != TEX_MODIFIER_NONE) { 1765 FETCH(&r[3], 0, CHAN_W); 1766 if (modifier != TEX_MODIFIER_PROJECTED) { 1767 lod = &r[3]; 1768 } 1769 } 1770 1771 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 1772 control = tgsi_sampler_lod_explicit; 1773 } else { 1774 control = tgsi_sampler_lod_bias; 1775 } 1776 1777 switch (inst->Texture.Texture) { 1778 case TGSI_TEXTURE_1D: 1779 FETCH(&r[0], 0, CHAN_X); 1780 1781 if (modifier == TEX_MODIFIER_PROJECTED) { 1782 micro_div(&r[0], &r[0], &r[3]); 1783 } 1784 1785 fetch_texel(mach->Samplers[unit], 1786 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ 1787 control, 1788 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1789 break; 1790 case TGSI_TEXTURE_SHADOW1D: 1791 FETCH(&r[0], 0, CHAN_X); 1792 FETCH(&r[2], 0, CHAN_Z); 1793 1794 if (modifier == TEX_MODIFIER_PROJECTED) { 1795 micro_div(&r[0], &r[0], &r[3]); 1796 } 1797 1798 fetch_texel(mach->Samplers[unit], 1799 &r[0], &ZeroVec, &r[2], lod, /* S, T, P, LOD */ 1800 control, 1801 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1802 break; 1803 1804 case TGSI_TEXTURE_2D: 1805 case TGSI_TEXTURE_RECT: 1806 case TGSI_TEXTURE_SHADOW2D: 1807 case TGSI_TEXTURE_SHADOWRECT: 1808 FETCH(&r[0], 0, CHAN_X); 1809 FETCH(&r[1], 0, CHAN_Y); 1810 FETCH(&r[2], 0, CHAN_Z); 1811 1812 if (modifier == TEX_MODIFIER_PROJECTED) { 1813 micro_div(&r[0], &r[0], &r[3]); 1814 micro_div(&r[1], &r[1], &r[3]); 1815 micro_div(&r[2], &r[2], &r[3]); 1816 } 1817 1818 fetch_texel(mach->Samplers[unit], 1819 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ 1820 control, 1821 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1822 break; 1823 1824 case TGSI_TEXTURE_1D_ARRAY: 1825 FETCH(&r[0], 0, CHAN_X); 1826 FETCH(&r[1], 0, CHAN_Y); 1827 1828 if (modifier == TEX_MODIFIER_PROJECTED) { 1829 micro_div(&r[0], &r[0], &r[3]); 1830 } 1831 1832 fetch_texel(mach->Samplers[unit], 1833 &r[0], &r[1], &ZeroVec, lod, /* S, T, P, LOD */ 1834 control, 1835 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1836 break; 1837 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1838 FETCH(&r[0], 0, CHAN_X); 1839 FETCH(&r[1], 0, CHAN_Y); 1840 FETCH(&r[2], 0, CHAN_Z); 1841 1842 if (modifier == TEX_MODIFIER_PROJECTED) { 1843 micro_div(&r[0], &r[0], &r[3]); 1844 } 1845 1846 fetch_texel(mach->Samplers[unit], 1847 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ 1848 control, 1849 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1850 break; 1851 1852 case TGSI_TEXTURE_2D_ARRAY: 1853 FETCH(&r[0], 0, CHAN_X); 1854 FETCH(&r[1], 0, CHAN_Y); 1855 FETCH(&r[2], 0, CHAN_Z); 1856 1857 if (modifier == TEX_MODIFIER_PROJECTED) { 1858 micro_div(&r[0], &r[0], &r[3]); 1859 micro_div(&r[1], &r[1], &r[3]); 1860 } 1861 1862 fetch_texel(mach->Samplers[unit], 1863 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ 1864 control, 1865 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1866 break; 1867 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1868 case TGSI_TEXTURE_SHADOWCUBE: 1869 FETCH(&r[0], 0, CHAN_X); 1870 FETCH(&r[1], 0, CHAN_Y); 1871 FETCH(&r[2], 0, CHAN_Z); 1872 FETCH(&r[3], 0, CHAN_W); 1873 1874 fetch_texel(mach->Samplers[unit], 1875 &r[0], &r[1], &r[2], &r[3], /* S, T, P, LOD */ 1876 control, 1877 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1878 break; 1879 case TGSI_TEXTURE_3D: 1880 case TGSI_TEXTURE_CUBE: 1881 FETCH(&r[0], 0, CHAN_X); 1882 FETCH(&r[1], 0, CHAN_Y); 1883 FETCH(&r[2], 0, CHAN_Z); 1884 1885 if (modifier == TEX_MODIFIER_PROJECTED) { 1886 micro_div(&r[0], &r[0], &r[3]); 1887 micro_div(&r[1], &r[1], &r[3]); 1888 micro_div(&r[2], &r[2], &r[3]); 1889 } 1890 1891 fetch_texel(mach->Samplers[unit], 1892 &r[0], &r[1], &r[2], lod, 1893 control, 1894 &r[0], &r[1], &r[2], &r[3]); 1895 break; 1896 1897 default: 1898 assert(0); 1899 } 1900 1901#if 0 1902 debug_printf("fetch r: %g %g %g %g\n", 1903 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 1904 debug_printf("fetch g: %g %g %g %g\n", 1905 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 1906 debug_printf("fetch b: %g %g %g %g\n", 1907 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 1908 debug_printf("fetch a: %g %g %g %g\n", 1909 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 1910#endif 1911 1912 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1913 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1914 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1915 } 1916 } 1917} 1918 1919static void 1920exec_txd(struct tgsi_exec_machine *mach, 1921 const struct tgsi_full_instruction *inst) 1922{ 1923 const uint unit = inst->Src[3].Register.Index; 1924 union tgsi_exec_channel r[4]; 1925 uint chan; 1926 1927 /* 1928 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1929 */ 1930 1931 switch (inst->Texture.Texture) { 1932 case TGSI_TEXTURE_1D: 1933 case TGSI_TEXTURE_SHADOW1D: 1934 1935 FETCH(&r[0], 0, CHAN_X); 1936 1937 fetch_texel(mach->Samplers[unit], 1938 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ 1939 tgsi_sampler_lod_bias, 1940 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1941 break; 1942 1943 case TGSI_TEXTURE_1D_ARRAY: 1944 case TGSI_TEXTURE_2D: 1945 case TGSI_TEXTURE_RECT: 1946 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1947 case TGSI_TEXTURE_SHADOW2D: 1948 case TGSI_TEXTURE_SHADOWRECT: 1949 1950 FETCH(&r[0], 0, CHAN_X); 1951 FETCH(&r[1], 0, CHAN_Y); 1952 FETCH(&r[2], 0, CHAN_Z); 1953 1954 fetch_texel(mach->Samplers[unit], 1955 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ 1956 tgsi_sampler_lod_bias, 1957 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1958 break; 1959 1960 case TGSI_TEXTURE_2D_ARRAY: 1961 case TGSI_TEXTURE_3D: 1962 case TGSI_TEXTURE_CUBE: 1963 1964 FETCH(&r[0], 0, CHAN_X); 1965 FETCH(&r[1], 0, CHAN_Y); 1966 FETCH(&r[2], 0, CHAN_Z); 1967 1968 fetch_texel(mach->Samplers[unit], 1969 &r[0], &r[1], &r[2], &ZeroVec, 1970 tgsi_sampler_lod_bias, 1971 &r[0], &r[1], &r[2], &r[3]); 1972 break; 1973 1974 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1975 1976 FETCH(&r[0], 0, CHAN_X); 1977 FETCH(&r[1], 0, CHAN_Y); 1978 FETCH(&r[2], 0, CHAN_Z); 1979 FETCH(&r[3], 0, CHAN_W); 1980 1981 fetch_texel(mach->Samplers[unit], 1982 &r[0], &r[1], &r[2], &r[3], 1983 tgsi_sampler_lod_bias, 1984 &r[0], &r[1], &r[2], &r[3]); 1985 break; 1986 1987 default: 1988 assert(0); 1989 } 1990 1991 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1992 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1993 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1994 } 1995 } 1996} 1997 1998 1999static void 2000exec_txf(struct tgsi_exec_machine *mach, 2001 const struct tgsi_full_instruction *inst) 2002{ 2003 struct tgsi_sampler *sampler; 2004 const uint unit = inst->Src[2].Register.Index; 2005 union tgsi_exec_channel r[4]; 2006 union tgsi_exec_channel offset[3]; 2007 uint chan; 2008 float rgba[NUM_CHANNELS][QUAD_SIZE]; 2009 int j; 2010 int8_t offsets[3]; 2011 2012 if (inst->Texture.NumOffsets == 1) { 2013 union tgsi_exec_channel index; 2014 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 2015 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2016 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 2017 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2018 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 2019 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 2020 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 2021 offsets[0] = offset[0].i[0]; 2022 offsets[1] = offset[1].i[0]; 2023 offsets[2] = offset[2].i[0]; 2024 } else 2025 offsets[0] = offsets[1] = offsets[2] = 0; 2026 2027 IFETCH(&r[3], 0, CHAN_W); 2028 2029 switch(inst->Texture.Texture) { 2030 case TGSI_TEXTURE_3D: 2031 case TGSI_TEXTURE_2D_ARRAY: 2032 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2033 IFETCH(&r[2], 0, CHAN_Z); 2034 /* fallthrough */ 2035 case TGSI_TEXTURE_2D: 2036 case TGSI_TEXTURE_RECT: 2037 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2038 case TGSI_TEXTURE_SHADOW2D: 2039 case TGSI_TEXTURE_SHADOWRECT: 2040 case TGSI_TEXTURE_1D_ARRAY: 2041 IFETCH(&r[1], 0, CHAN_Y); 2042 /* fallthrough */ 2043 case TGSI_TEXTURE_1D: 2044 case TGSI_TEXTURE_SHADOW1D: 2045 IFETCH(&r[0], 0, CHAN_X); 2046 break; 2047 default: 2048 assert(0); 2049 break; 2050 } 2051 2052 sampler = mach->Samplers[unit]; 2053 sampler->get_texel(sampler, r[0].i, r[1].i, r[2].i, r[3].i, 2054 offsets, rgba); 2055 2056 for (j = 0; j < QUAD_SIZE; j++) { 2057 r[0].f[j] = rgba[0][j]; 2058 r[1].f[j] = rgba[1][j]; 2059 r[2].f[j] = rgba[2][j]; 2060 r[3].f[j] = rgba[3][j]; 2061 } 2062 2063 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2064 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2065 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2066 } 2067 } 2068} 2069 2070static void 2071exec_txq(struct tgsi_exec_machine *mach, 2072 const struct tgsi_full_instruction *inst) 2073{ 2074 struct tgsi_sampler *sampler; 2075 const uint unit = inst->Src[1].Register.Index; 2076 int result[4]; 2077 union tgsi_exec_channel r[4], src; 2078 uint chan; 2079 int i,j; 2080 2081 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_INT); 2082 sampler = mach->Samplers[unit]; 2083 2084 sampler->get_dims(sampler, src.i[0], result); 2085 2086 for (i = 0; i < QUAD_SIZE; i++) { 2087 for (j = 0; j < 4; j++) { 2088 r[j].i[i] = result[j]; 2089 } 2090 } 2091 2092 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2093 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2094 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 2095 TGSI_EXEC_DATA_INT); 2096 } 2097 } 2098} 2099 2100static void 2101exec_sample(struct tgsi_exec_machine *mach, 2102 const struct tgsi_full_instruction *inst, 2103 uint modifier) 2104{ 2105 const uint resource_unit = inst->Src[1].Register.Index; 2106 const uint sampler_unit = inst->Src[2].Register.Index; 2107 union tgsi_exec_channel r[4]; 2108 const union tgsi_exec_channel *lod = &ZeroVec; 2109 enum tgsi_sampler_control control; 2110 uint chan; 2111 2112 if (modifier != TEX_MODIFIER_NONE) { 2113 if (modifier == TEX_MODIFIER_LOD_BIAS) 2114 FETCH(&r[3], 3, CHAN_X); 2115 else /*TEX_MODIFIER_LOD*/ 2116 FETCH(&r[3], 0, CHAN_W); 2117 2118 if (modifier != TEX_MODIFIER_PROJECTED) { 2119 lod = &r[3]; 2120 } 2121 } 2122 2123 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2124 control = tgsi_sampler_lod_explicit; 2125 } else { 2126 control = tgsi_sampler_lod_bias; 2127 } 2128 2129 switch (mach->Resources[resource_unit].Resource) { 2130 case TGSI_TEXTURE_1D: 2131 case TGSI_TEXTURE_SHADOW1D: 2132 FETCH(&r[0], 0, CHAN_X); 2133 2134 if (modifier == TEX_MODIFIER_PROJECTED) { 2135 micro_div(&r[0], &r[0], &r[3]); 2136 } 2137 2138 fetch_texel(mach->Samplers[sampler_unit], 2139 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ 2140 control, 2141 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2142 break; 2143 2144 case TGSI_TEXTURE_1D_ARRAY: 2145 case TGSI_TEXTURE_2D: 2146 case TGSI_TEXTURE_RECT: 2147 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2148 case TGSI_TEXTURE_SHADOW2D: 2149 case TGSI_TEXTURE_SHADOWRECT: 2150 FETCH(&r[0], 0, CHAN_X); 2151 FETCH(&r[1], 0, CHAN_Y); 2152 FETCH(&r[2], 0, CHAN_Z); 2153 2154 if (modifier == TEX_MODIFIER_PROJECTED) { 2155 micro_div(&r[0], &r[0], &r[3]); 2156 micro_div(&r[1], &r[1], &r[3]); 2157 micro_div(&r[2], &r[2], &r[3]); 2158 } 2159 2160 fetch_texel(mach->Samplers[sampler_unit], 2161 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ 2162 control, 2163 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2164 break; 2165 2166 case TGSI_TEXTURE_2D_ARRAY: 2167 case TGSI_TEXTURE_3D: 2168 case TGSI_TEXTURE_CUBE: 2169 FETCH(&r[0], 0, CHAN_X); 2170 FETCH(&r[1], 0, CHAN_Y); 2171 FETCH(&r[2], 0, CHAN_Z); 2172 2173 if (modifier == TEX_MODIFIER_PROJECTED) { 2174 micro_div(&r[0], &r[0], &r[3]); 2175 micro_div(&r[1], &r[1], &r[3]); 2176 micro_div(&r[2], &r[2], &r[3]); 2177 } 2178 2179 fetch_texel(mach->Samplers[sampler_unit], 2180 &r[0], &r[1], &r[2], lod, 2181 control, 2182 &r[0], &r[1], &r[2], &r[3]); 2183 break; 2184 2185 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2186 case TGSI_TEXTURE_SHADOWCUBE: 2187 FETCH(&r[0], 0, CHAN_X); 2188 FETCH(&r[1], 0, CHAN_Y); 2189 FETCH(&r[2], 0, CHAN_Z); 2190 FETCH(&r[3], 0, CHAN_W); 2191 2192 assert(modifier != TEX_MODIFIER_PROJECTED); 2193 2194 fetch_texel(mach->Samplers[sampler_unit], 2195 &r[0], &r[1], &r[2], &r[3], 2196 control, 2197 &r[0], &r[1], &r[2], &r[3]); 2198 break; 2199 2200 default: 2201 assert(0); 2202 } 2203 2204 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2205 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2206 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2207 } 2208 } 2209} 2210 2211static void 2212exec_sample_d(struct tgsi_exec_machine *mach, 2213 const struct tgsi_full_instruction *inst) 2214{ 2215 const uint resource_unit = inst->Src[1].Register.Index; 2216 const uint sampler_unit = inst->Src[2].Register.Index; 2217 union tgsi_exec_channel r[4]; 2218 uint chan; 2219 /* 2220 * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet. 2221 */ 2222 2223 switch (mach->Resources[resource_unit].Resource) { 2224 case TGSI_TEXTURE_1D: 2225 case TGSI_TEXTURE_SHADOW1D: 2226 2227 FETCH(&r[0], 0, CHAN_X); 2228 2229 fetch_texel(mach->Samplers[sampler_unit], 2230 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ 2231 tgsi_sampler_lod_bias, 2232 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2233 break; 2234 2235 case TGSI_TEXTURE_2D: 2236 case TGSI_TEXTURE_RECT: 2237 case TGSI_TEXTURE_SHADOW2D: 2238 case TGSI_TEXTURE_SHADOWRECT: 2239 2240 FETCH(&r[0], 0, CHAN_X); 2241 FETCH(&r[1], 0, CHAN_Y); 2242 FETCH(&r[2], 0, CHAN_Z); 2243 2244 fetch_texel(mach->Samplers[sampler_unit], 2245 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ 2246 tgsi_sampler_lod_bias, 2247 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2248 break; 2249 2250 case TGSI_TEXTURE_3D: 2251 case TGSI_TEXTURE_CUBE: 2252 2253 FETCH(&r[0], 0, CHAN_X); 2254 FETCH(&r[1], 0, CHAN_Y); 2255 FETCH(&r[2], 0, CHAN_Z); 2256 2257 fetch_texel(mach->Samplers[sampler_unit], 2258 &r[0], &r[1], &r[2], &ZeroVec, 2259 tgsi_sampler_lod_bias, 2260 &r[0], &r[1], &r[2], &r[3]); 2261 break; 2262 2263 default: 2264 assert(0); 2265 } 2266 2267 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2268 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2269 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2270 } 2271 } 2272} 2273 2274 2275/** 2276 * Evaluate a constant-valued coefficient at the position of the 2277 * current quad. 2278 */ 2279static void 2280eval_constant_coef( 2281 struct tgsi_exec_machine *mach, 2282 unsigned attrib, 2283 unsigned chan ) 2284{ 2285 unsigned i; 2286 2287 for( i = 0; i < QUAD_SIZE; i++ ) { 2288 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2289 } 2290} 2291 2292/** 2293 * Evaluate a linear-valued coefficient at the position of the 2294 * current quad. 2295 */ 2296static void 2297eval_linear_coef( 2298 struct tgsi_exec_machine *mach, 2299 unsigned attrib, 2300 unsigned chan ) 2301{ 2302 const float x = mach->QuadPos.xyzw[0].f[0]; 2303 const float y = mach->QuadPos.xyzw[1].f[0]; 2304 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2305 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2306 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2307 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2308 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2309 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2310 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 2311} 2312 2313/** 2314 * Evaluate a perspective-valued coefficient at the position of the 2315 * current quad. 2316 */ 2317static void 2318eval_perspective_coef( 2319 struct tgsi_exec_machine *mach, 2320 unsigned attrib, 2321 unsigned chan ) 2322{ 2323 const float x = mach->QuadPos.xyzw[0].f[0]; 2324 const float y = mach->QuadPos.xyzw[1].f[0]; 2325 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2326 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2327 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2328 const float *w = mach->QuadPos.xyzw[3].f; 2329 /* divide by W here */ 2330 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 2331 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 2332 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 2333 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 2334} 2335 2336 2337typedef void (* eval_coef_func)( 2338 struct tgsi_exec_machine *mach, 2339 unsigned attrib, 2340 unsigned chan ); 2341 2342static void 2343exec_declaration(struct tgsi_exec_machine *mach, 2344 const struct tgsi_full_declaration *decl) 2345{ 2346 if (decl->Declaration.File == TGSI_FILE_RESOURCE) { 2347 mach->Resources[decl->Range.First] = decl->Resource; 2348 return; 2349 } 2350 2351 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 2352 if (decl->Declaration.File == TGSI_FILE_INPUT) { 2353 uint first, last, mask; 2354 2355 first = decl->Range.First; 2356 last = decl->Range.Last; 2357 mask = decl->Declaration.UsageMask; 2358 2359 /* XXX we could remove this special-case code since 2360 * mach->InterpCoefs[first].a0 should already have the 2361 * front/back-face value. But we should first update the 2362 * ureg code to emit the right UsageMask value (WRITEMASK_X). 2363 * Then, we could remove the tgsi_exec_machine::Face field. 2364 */ 2365 /* XXX make FACE a system value */ 2366 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 2367 uint i; 2368 2369 assert(decl->Semantic.Index == 0); 2370 assert(first == last); 2371 2372 for (i = 0; i < QUAD_SIZE; i++) { 2373 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 2374 } 2375 } else { 2376 eval_coef_func eval; 2377 uint i, j; 2378 2379 switch (decl->Declaration.Interpolate) { 2380 case TGSI_INTERPOLATE_CONSTANT: 2381 eval = eval_constant_coef; 2382 break; 2383 2384 case TGSI_INTERPOLATE_LINEAR: 2385 eval = eval_linear_coef; 2386 break; 2387 2388 case TGSI_INTERPOLATE_PERSPECTIVE: 2389 eval = eval_perspective_coef; 2390 break; 2391 2392 case TGSI_INTERPOLATE_COLOR: 2393 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 2394 break; 2395 2396 default: 2397 assert(0); 2398 return; 2399 } 2400 2401 for (j = 0; j < NUM_CHANNELS; j++) { 2402 if (mask & (1 << j)) { 2403 for (i = first; i <= last; i++) { 2404 eval(mach, i, j); 2405 } 2406 } 2407 } 2408 } 2409 } 2410 } 2411 2412 if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 2413 mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; 2414 } 2415} 2416 2417 2418typedef void (* micro_op)(union tgsi_exec_channel *dst); 2419 2420static void 2421exec_vector(struct tgsi_exec_machine *mach, 2422 const struct tgsi_full_instruction *inst, 2423 micro_op op, 2424 enum tgsi_exec_datatype dst_datatype) 2425{ 2426 unsigned int chan; 2427 2428 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2429 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2430 union tgsi_exec_channel dst; 2431 2432 op(&dst); 2433 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 2434 } 2435 } 2436} 2437 2438typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 2439 const union tgsi_exec_channel *src); 2440 2441static void 2442exec_scalar_unary(struct tgsi_exec_machine *mach, 2443 const struct tgsi_full_instruction *inst, 2444 micro_unary_op op, 2445 enum tgsi_exec_datatype dst_datatype, 2446 enum tgsi_exec_datatype src_datatype) 2447{ 2448 unsigned int chan; 2449 union tgsi_exec_channel src; 2450 union tgsi_exec_channel dst; 2451 2452 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); 2453 op(&dst, &src); 2454 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2455 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2456 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 2457 } 2458 } 2459} 2460 2461static void 2462exec_vector_unary(struct tgsi_exec_machine *mach, 2463 const struct tgsi_full_instruction *inst, 2464 micro_unary_op op, 2465 enum tgsi_exec_datatype dst_datatype, 2466 enum tgsi_exec_datatype src_datatype) 2467{ 2468 unsigned int chan; 2469 struct tgsi_exec_vector dst; 2470 2471 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2472 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2473 union tgsi_exec_channel src; 2474 2475 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 2476 op(&dst.xyzw[chan], &src); 2477 } 2478 } 2479 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2480 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2481 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2482 } 2483 } 2484} 2485 2486typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 2487 const union tgsi_exec_channel *src0, 2488 const union tgsi_exec_channel *src1); 2489 2490static void 2491exec_scalar_binary(struct tgsi_exec_machine *mach, 2492 const struct tgsi_full_instruction *inst, 2493 micro_binary_op op, 2494 enum tgsi_exec_datatype dst_datatype, 2495 enum tgsi_exec_datatype src_datatype) 2496{ 2497 unsigned int chan; 2498 union tgsi_exec_channel src[2]; 2499 union tgsi_exec_channel dst; 2500 2501 fetch_source(mach, &src[0], &inst->Src[0], CHAN_X, src_datatype); 2502 fetch_source(mach, &src[1], &inst->Src[1], CHAN_Y, src_datatype); 2503 op(&dst, &src[0], &src[1]); 2504 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2505 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2506 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 2507 } 2508 } 2509} 2510 2511static void 2512exec_vector_binary(struct tgsi_exec_machine *mach, 2513 const struct tgsi_full_instruction *inst, 2514 micro_binary_op op, 2515 enum tgsi_exec_datatype dst_datatype, 2516 enum tgsi_exec_datatype src_datatype) 2517{ 2518 unsigned int chan; 2519 struct tgsi_exec_vector dst; 2520 2521 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2522 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2523 union tgsi_exec_channel src[2]; 2524 2525 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2526 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2527 op(&dst.xyzw[chan], &src[0], &src[1]); 2528 } 2529 } 2530 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2531 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2532 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2533 } 2534 } 2535} 2536 2537typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 2538 const union tgsi_exec_channel *src0, 2539 const union tgsi_exec_channel *src1, 2540 const union tgsi_exec_channel *src2); 2541 2542static void 2543exec_vector_trinary(struct tgsi_exec_machine *mach, 2544 const struct tgsi_full_instruction *inst, 2545 micro_trinary_op op, 2546 enum tgsi_exec_datatype dst_datatype, 2547 enum tgsi_exec_datatype src_datatype) 2548{ 2549 unsigned int chan; 2550 struct tgsi_exec_vector dst; 2551 2552 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2553 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2554 union tgsi_exec_channel src[3]; 2555 2556 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2557 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2558 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 2559 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 2560 } 2561 } 2562 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2563 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2564 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2565 } 2566 } 2567} 2568 2569static void 2570exec_dp3(struct tgsi_exec_machine *mach, 2571 const struct tgsi_full_instruction *inst) 2572{ 2573 unsigned int chan; 2574 union tgsi_exec_channel arg[3]; 2575 2576 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2577 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2578 micro_mul(&arg[2], &arg[0], &arg[1]); 2579 2580 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { 2581 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2582 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 2583 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2584 } 2585 2586 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2587 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2588 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2589 } 2590 } 2591} 2592 2593static void 2594exec_dp4(struct tgsi_exec_machine *mach, 2595 const struct tgsi_full_instruction *inst) 2596{ 2597 unsigned int chan; 2598 union tgsi_exec_channel arg[3]; 2599 2600 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2601 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2602 micro_mul(&arg[2], &arg[0], &arg[1]); 2603 2604 for (chan = CHAN_Y; chan <= CHAN_W; chan++) { 2605 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2606 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 2607 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2608 } 2609 2610 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2611 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2612 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2613 } 2614 } 2615} 2616 2617static void 2618exec_dp2a(struct tgsi_exec_machine *mach, 2619 const struct tgsi_full_instruction *inst) 2620{ 2621 unsigned int chan; 2622 union tgsi_exec_channel arg[3]; 2623 2624 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2625 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2626 micro_mul(&arg[2], &arg[0], &arg[1]); 2627 2628 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2629 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2630 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 2631 2632 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2633 micro_add(&arg[0], &arg[0], &arg[1]); 2634 2635 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2636 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2637 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2638 } 2639 } 2640} 2641 2642static void 2643exec_dph(struct tgsi_exec_machine *mach, 2644 const struct tgsi_full_instruction *inst) 2645{ 2646 unsigned int chan; 2647 union tgsi_exec_channel arg[3]; 2648 2649 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2650 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2651 micro_mul(&arg[2], &arg[0], &arg[1]); 2652 2653 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2654 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2655 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2656 2657 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2658 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2659 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 2660 2661 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); 2662 micro_add(&arg[0], &arg[0], &arg[1]); 2663 2664 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2665 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2666 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2667 } 2668 } 2669} 2670 2671static void 2672exec_dp2(struct tgsi_exec_machine *mach, 2673 const struct tgsi_full_instruction *inst) 2674{ 2675 unsigned int chan; 2676 union tgsi_exec_channel arg[3]; 2677 2678 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2679 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2680 micro_mul(&arg[2], &arg[0], &arg[1]); 2681 2682 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2683 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2684 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2685 2686 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2687 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2688 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2689 } 2690 } 2691} 2692 2693static void 2694exec_nrm4(struct tgsi_exec_machine *mach, 2695 const struct tgsi_full_instruction *inst) 2696{ 2697 unsigned int chan; 2698 union tgsi_exec_channel arg[4]; 2699 union tgsi_exec_channel scale; 2700 2701 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2702 micro_mul(&scale, &arg[0], &arg[0]); 2703 2704 for (chan = CHAN_Y; chan <= CHAN_W; chan++) { 2705 union tgsi_exec_channel product; 2706 2707 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2708 micro_mul(&product, &arg[chan], &arg[chan]); 2709 micro_add(&scale, &scale, &product); 2710 } 2711 2712 micro_rsq(&scale, &scale); 2713 2714 for (chan = CHAN_X; chan <= CHAN_W; chan++) { 2715 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2716 micro_mul(&arg[chan], &arg[chan], &scale); 2717 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2718 } 2719 } 2720} 2721 2722static void 2723exec_nrm3(struct tgsi_exec_machine *mach, 2724 const struct tgsi_full_instruction *inst) 2725{ 2726 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 2727 unsigned int chan; 2728 union tgsi_exec_channel arg[3]; 2729 union tgsi_exec_channel scale; 2730 2731 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2732 micro_mul(&scale, &arg[0], &arg[0]); 2733 2734 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { 2735 union tgsi_exec_channel product; 2736 2737 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2738 micro_mul(&product, &arg[chan], &arg[chan]); 2739 micro_add(&scale, &scale, &product); 2740 } 2741 2742 micro_rsq(&scale, &scale); 2743 2744 for (chan = CHAN_X; chan <= CHAN_Z; chan++) { 2745 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2746 micro_mul(&arg[chan], &arg[chan], &scale); 2747 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2748 } 2749 } 2750 } 2751 2752 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2753 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2754 } 2755} 2756 2757static void 2758exec_scs(struct tgsi_exec_machine *mach, 2759 const struct tgsi_full_instruction *inst) 2760{ 2761 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 2762 union tgsi_exec_channel arg; 2763 union tgsi_exec_channel result; 2764 2765 fetch_source(mach, &arg, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2766 2767 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2768 micro_cos(&result, &arg); 2769 store_dest(mach, &result, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); 2770 } 2771 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2772 micro_sin(&result, &arg); 2773 store_dest(mach, &result, &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2774 } 2775 } 2776 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2777 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2778 } 2779 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2780 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2781 } 2782} 2783 2784static void 2785exec_x2d(struct tgsi_exec_machine *mach, 2786 const struct tgsi_full_instruction *inst) 2787{ 2788 union tgsi_exec_channel r[4]; 2789 union tgsi_exec_channel d[2]; 2790 2791 fetch_source(mach, &r[0], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2792 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2793 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) { 2794 fetch_source(mach, &r[2], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2795 micro_mul(&r[2], &r[2], &r[0]); 2796 fetch_source(mach, &r[3], &inst->Src[2], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2797 micro_mul(&r[3], &r[3], &r[1]); 2798 micro_add(&r[2], &r[2], &r[3]); 2799 fetch_source(mach, &r[3], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2800 micro_add(&d[0], &r[2], &r[3]); 2801 } 2802 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) { 2803 fetch_source(mach, &r[2], &inst->Src[2], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2804 micro_mul(&r[2], &r[2], &r[0]); 2805 fetch_source(mach, &r[3], &inst->Src[2], CHAN_W, TGSI_EXEC_DATA_FLOAT); 2806 micro_mul(&r[3], &r[3], &r[1]); 2807 micro_add(&r[2], &r[2], &r[3]); 2808 fetch_source(mach, &r[3], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2809 micro_add(&d[1], &r[2], &r[3]); 2810 } 2811 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2812 store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); 2813 } 2814 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2815 store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2816 } 2817 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2818 store_dest(mach, &d[0], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2819 } 2820 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2821 store_dest(mach, &d[1], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2822 } 2823} 2824 2825static void 2826exec_rfl(struct tgsi_exec_machine *mach, 2827 const struct tgsi_full_instruction *inst) 2828{ 2829 union tgsi_exec_channel r[9]; 2830 2831 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 2832 /* r0 = dp3(src0, src0) */ 2833 fetch_source(mach, &r[2], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2834 micro_mul(&r[0], &r[2], &r[2]); 2835 fetch_source(mach, &r[4], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2836 micro_mul(&r[8], &r[4], &r[4]); 2837 micro_add(&r[0], &r[0], &r[8]); 2838 fetch_source(mach, &r[6], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2839 micro_mul(&r[8], &r[6], &r[6]); 2840 micro_add(&r[0], &r[0], &r[8]); 2841 2842 /* r1 = dp3(src0, src1) */ 2843 fetch_source(mach, &r[3], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2844 micro_mul(&r[1], &r[2], &r[3]); 2845 fetch_source(mach, &r[5], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2846 micro_mul(&r[8], &r[4], &r[5]); 2847 micro_add(&r[1], &r[1], &r[8]); 2848 fetch_source(mach, &r[7], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2849 micro_mul(&r[8], &r[6], &r[7]); 2850 micro_add(&r[1], &r[1], &r[8]); 2851 2852 /* r1 = 2 * r1 / r0 */ 2853 micro_add(&r[1], &r[1], &r[1]); 2854 micro_div(&r[1], &r[1], &r[0]); 2855 2856 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2857 micro_mul(&r[2], &r[2], &r[1]); 2858 micro_sub(&r[2], &r[2], &r[3]); 2859 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); 2860 } 2861 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2862 micro_mul(&r[4], &r[4], &r[1]); 2863 micro_sub(&r[4], &r[4], &r[5]); 2864 store_dest(mach, &r[4], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2865 } 2866 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2867 micro_mul(&r[6], &r[6], &r[1]); 2868 micro_sub(&r[6], &r[6], &r[7]); 2869 store_dest(mach, &r[6], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2870 } 2871 } 2872 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2873 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2874 } 2875} 2876 2877static void 2878exec_xpd(struct tgsi_exec_machine *mach, 2879 const struct tgsi_full_instruction *inst) 2880{ 2881 union tgsi_exec_channel r[6]; 2882 union tgsi_exec_channel d[3]; 2883 2884 fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2885 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2886 2887 micro_mul(&r[2], &r[0], &r[1]); 2888 2889 fetch_source(mach, &r[3], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2890 fetch_source(mach, &r[4], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2891 2892 micro_mul(&r[5], &r[3], &r[4] ); 2893 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2894 2895 fetch_source(mach, &r[2], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2896 2897 micro_mul(&r[3], &r[3], &r[2]); 2898 2899 fetch_source(mach, &r[5], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2900 2901 micro_mul(&r[1], &r[1], &r[5]); 2902 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2903 2904 micro_mul(&r[5], &r[5], &r[4]); 2905 micro_mul(&r[0], &r[0], &r[2]); 2906 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2907 2908 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2909 store_dest(mach, &d[CHAN_X], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); 2910 } 2911 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2912 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2913 } 2914 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2915 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2916 } 2917 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2918 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2919 } 2920} 2921 2922static void 2923exec_dst(struct tgsi_exec_machine *mach, 2924 const struct tgsi_full_instruction *inst) 2925{ 2926 union tgsi_exec_channel r[2]; 2927 union tgsi_exec_channel d[4]; 2928 2929 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2930 fetch_source(mach, &r[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2931 fetch_source(mach, &r[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2932 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2933 } 2934 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2935 fetch_source(mach, &d[CHAN_Z], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2936 } 2937 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2938 fetch_source(mach, &d[CHAN_W], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); 2939 } 2940 2941 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2942 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); 2943 } 2944 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2945 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2946 } 2947 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2948 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2949 } 2950 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2951 store_dest(mach, &d[CHAN_W], &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2952 } 2953} 2954 2955static void 2956exec_log(struct tgsi_exec_machine *mach, 2957 const struct tgsi_full_instruction *inst) 2958{ 2959 union tgsi_exec_channel r[3]; 2960 2961 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2962 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 2963 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 2964 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 2965 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2966 store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); 2967 } 2968 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2969 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 2970 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 2971 store_dest(mach, &r[0], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2972 } 2973 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2974 store_dest(mach, &r[1], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2975 } 2976 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2977 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2978 } 2979} 2980 2981static void 2982exec_exp(struct tgsi_exec_machine *mach, 2983 const struct tgsi_full_instruction *inst) 2984{ 2985 union tgsi_exec_channel r[3]; 2986 2987 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2988 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 2989 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2990 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 2991 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); 2992 } 2993 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2994 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 2995 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2996 } 2997 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2998 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 2999 store_dest(mach, &r[2], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3000 } 3001 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3002 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 3003 } 3004} 3005 3006static void 3007exec_lit(struct tgsi_exec_machine *mach, 3008 const struct tgsi_full_instruction *inst) 3009{ 3010 union tgsi_exec_channel r[3]; 3011 union tgsi_exec_channel d[3]; 3012 3013 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3014 fetch_source(mach, &r[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 3015 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3016 fetch_source(mach, &r[1], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3017 micro_max(&r[1], &r[1], &ZeroVec); 3018 3019 fetch_source(mach, &r[2], &inst->Src[0], CHAN_W, TGSI_EXEC_DATA_FLOAT); 3020 micro_min(&r[2], &r[2], &P128Vec); 3021 micro_max(&r[2], &r[2], &M128Vec); 3022 micro_pow(&r[1], &r[1], &r[2]); 3023 micro_lt(&d[CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 3024 store_dest(mach, &d[CHAN_Z], &inst->Dst[0], inst, CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3025 } 3026 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3027 micro_max(&d[CHAN_Y], &r[0], &ZeroVec); 3028 store_dest(mach, &d[CHAN_Y], &inst->Dst[0], inst, CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3029 } 3030 } 3031 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3032 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_X, TGSI_EXEC_DATA_FLOAT); 3033 } 3034 3035 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3036 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 3037 } 3038} 3039 3040static void 3041exec_break(struct tgsi_exec_machine *mach) 3042{ 3043 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3044 /* turn off loop channels for each enabled exec channel */ 3045 mach->LoopMask &= ~mach->ExecMask; 3046 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3047 UPDATE_EXEC_MASK(mach); 3048 } else { 3049 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 3050 3051 mach->Switch.mask = 0x0; 3052 3053 UPDATE_EXEC_MASK(mach); 3054 } 3055} 3056 3057static void 3058exec_switch(struct tgsi_exec_machine *mach, 3059 const struct tgsi_full_instruction *inst) 3060{ 3061 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3062 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3063 3064 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3065 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); 3066 mach->Switch.mask = 0x0; 3067 mach->Switch.defaultMask = 0x0; 3068 3069 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3070 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3071 3072 UPDATE_EXEC_MASK(mach); 3073} 3074 3075static void 3076exec_case(struct tgsi_exec_machine *mach, 3077 const struct tgsi_full_instruction *inst) 3078{ 3079 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3080 union tgsi_exec_channel src; 3081 uint mask = 0; 3082 3083 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); 3084 3085 if (mach->Switch.selector.u[0] == src.u[0]) { 3086 mask |= 0x1; 3087 } 3088 if (mach->Switch.selector.u[1] == src.u[1]) { 3089 mask |= 0x2; 3090 } 3091 if (mach->Switch.selector.u[2] == src.u[2]) { 3092 mask |= 0x4; 3093 } 3094 if (mach->Switch.selector.u[3] == src.u[3]) { 3095 mask |= 0x8; 3096 } 3097 3098 mach->Switch.defaultMask |= mask; 3099 3100 mach->Switch.mask |= mask & prevMask; 3101 3102 UPDATE_EXEC_MASK(mach); 3103} 3104 3105static void 3106exec_default(struct tgsi_exec_machine *mach) 3107{ 3108 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3109 3110 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 3111 3112 UPDATE_EXEC_MASK(mach); 3113} 3114 3115static void 3116exec_endswitch(struct tgsi_exec_machine *mach) 3117{ 3118 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 3119 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3120 3121 UPDATE_EXEC_MASK(mach); 3122} 3123 3124static void 3125micro_i2f(union tgsi_exec_channel *dst, 3126 const union tgsi_exec_channel *src) 3127{ 3128 dst->f[0] = (float)src->i[0]; 3129 dst->f[1] = (float)src->i[1]; 3130 dst->f[2] = (float)src->i[2]; 3131 dst->f[3] = (float)src->i[3]; 3132} 3133 3134static void 3135micro_not(union tgsi_exec_channel *dst, 3136 const union tgsi_exec_channel *src) 3137{ 3138 dst->u[0] = ~src->u[0]; 3139 dst->u[1] = ~src->u[1]; 3140 dst->u[2] = ~src->u[2]; 3141 dst->u[3] = ~src->u[3]; 3142} 3143 3144static void 3145micro_shl(union tgsi_exec_channel *dst, 3146 const union tgsi_exec_channel *src0, 3147 const union tgsi_exec_channel *src1) 3148{ 3149 dst->u[0] = src0->u[0] << src1->u[0]; 3150 dst->u[1] = src0->u[1] << src1->u[1]; 3151 dst->u[2] = src0->u[2] << src1->u[2]; 3152 dst->u[3] = src0->u[3] << src1->u[3]; 3153} 3154 3155static void 3156micro_and(union tgsi_exec_channel *dst, 3157 const union tgsi_exec_channel *src0, 3158 const union tgsi_exec_channel *src1) 3159{ 3160 dst->u[0] = src0->u[0] & src1->u[0]; 3161 dst->u[1] = src0->u[1] & src1->u[1]; 3162 dst->u[2] = src0->u[2] & src1->u[2]; 3163 dst->u[3] = src0->u[3] & src1->u[3]; 3164} 3165 3166static void 3167micro_or(union tgsi_exec_channel *dst, 3168 const union tgsi_exec_channel *src0, 3169 const union tgsi_exec_channel *src1) 3170{ 3171 dst->u[0] = src0->u[0] | src1->u[0]; 3172 dst->u[1] = src0->u[1] | src1->u[1]; 3173 dst->u[2] = src0->u[2] | src1->u[2]; 3174 dst->u[3] = src0->u[3] | src1->u[3]; 3175} 3176 3177static void 3178micro_xor(union tgsi_exec_channel *dst, 3179 const union tgsi_exec_channel *src0, 3180 const union tgsi_exec_channel *src1) 3181{ 3182 dst->u[0] = src0->u[0] ^ src1->u[0]; 3183 dst->u[1] = src0->u[1] ^ src1->u[1]; 3184 dst->u[2] = src0->u[2] ^ src1->u[2]; 3185 dst->u[3] = src0->u[3] ^ src1->u[3]; 3186} 3187 3188static void 3189micro_mod(union tgsi_exec_channel *dst, 3190 const union tgsi_exec_channel *src0, 3191 const union tgsi_exec_channel *src1) 3192{ 3193 dst->i[0] = src0->i[0] % src1->i[0]; 3194 dst->i[1] = src0->i[1] % src1->i[1]; 3195 dst->i[2] = src0->i[2] % src1->i[2]; 3196 dst->i[3] = src0->i[3] % src1->i[3]; 3197} 3198 3199static void 3200micro_f2i(union tgsi_exec_channel *dst, 3201 const union tgsi_exec_channel *src) 3202{ 3203 dst->i[0] = (int)src->f[0]; 3204 dst->i[1] = (int)src->f[1]; 3205 dst->i[2] = (int)src->f[2]; 3206 dst->i[3] = (int)src->f[3]; 3207} 3208 3209static void 3210micro_idiv(union tgsi_exec_channel *dst, 3211 const union tgsi_exec_channel *src0, 3212 const union tgsi_exec_channel *src1) 3213{ 3214 dst->i[0] = src0->i[0] / src1->i[0]; 3215 dst->i[1] = src0->i[1] / src1->i[1]; 3216 dst->i[2] = src0->i[2] / src1->i[2]; 3217 dst->i[3] = src0->i[3] / src1->i[3]; 3218} 3219 3220static void 3221micro_imax(union tgsi_exec_channel *dst, 3222 const union tgsi_exec_channel *src0, 3223 const union tgsi_exec_channel *src1) 3224{ 3225 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 3226 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 3227 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 3228 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 3229} 3230 3231static void 3232micro_imin(union tgsi_exec_channel *dst, 3233 const union tgsi_exec_channel *src0, 3234 const union tgsi_exec_channel *src1) 3235{ 3236 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 3237 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 3238 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 3239 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 3240} 3241 3242static void 3243micro_isge(union tgsi_exec_channel *dst, 3244 const union tgsi_exec_channel *src0, 3245 const union tgsi_exec_channel *src1) 3246{ 3247 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 3248 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 3249 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 3250 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 3251} 3252 3253static void 3254micro_ishr(union tgsi_exec_channel *dst, 3255 const union tgsi_exec_channel *src0, 3256 const union tgsi_exec_channel *src1) 3257{ 3258 dst->i[0] = src0->i[0] >> src1->i[0]; 3259 dst->i[1] = src0->i[1] >> src1->i[1]; 3260 dst->i[2] = src0->i[2] >> src1->i[2]; 3261 dst->i[3] = src0->i[3] >> src1->i[3]; 3262} 3263 3264static void 3265micro_islt(union tgsi_exec_channel *dst, 3266 const union tgsi_exec_channel *src0, 3267 const union tgsi_exec_channel *src1) 3268{ 3269 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 3270 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 3271 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 3272 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 3273} 3274 3275static void 3276micro_f2u(union tgsi_exec_channel *dst, 3277 const union tgsi_exec_channel *src) 3278{ 3279 dst->u[0] = (uint)src->f[0]; 3280 dst->u[1] = (uint)src->f[1]; 3281 dst->u[2] = (uint)src->f[2]; 3282 dst->u[3] = (uint)src->f[3]; 3283} 3284 3285static void 3286micro_u2f(union tgsi_exec_channel *dst, 3287 const union tgsi_exec_channel *src) 3288{ 3289 dst->f[0] = (float)src->u[0]; 3290 dst->f[1] = (float)src->u[1]; 3291 dst->f[2] = (float)src->u[2]; 3292 dst->f[3] = (float)src->u[3]; 3293} 3294 3295static void 3296micro_uadd(union tgsi_exec_channel *dst, 3297 const union tgsi_exec_channel *src0, 3298 const union tgsi_exec_channel *src1) 3299{ 3300 dst->u[0] = src0->u[0] + src1->u[0]; 3301 dst->u[1] = src0->u[1] + src1->u[1]; 3302 dst->u[2] = src0->u[2] + src1->u[2]; 3303 dst->u[3] = src0->u[3] + src1->u[3]; 3304} 3305 3306static void 3307micro_udiv(union tgsi_exec_channel *dst, 3308 const union tgsi_exec_channel *src0, 3309 const union tgsi_exec_channel *src1) 3310{ 3311 dst->u[0] = src0->u[0] / src1->u[0]; 3312 dst->u[1] = src0->u[1] / src1->u[1]; 3313 dst->u[2] = src0->u[2] / src1->u[2]; 3314 dst->u[3] = src0->u[3] / src1->u[3]; 3315} 3316 3317static void 3318micro_umad(union tgsi_exec_channel *dst, 3319 const union tgsi_exec_channel *src0, 3320 const union tgsi_exec_channel *src1, 3321 const union tgsi_exec_channel *src2) 3322{ 3323 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 3324 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 3325 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 3326 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 3327} 3328 3329static void 3330micro_umax(union tgsi_exec_channel *dst, 3331 const union tgsi_exec_channel *src0, 3332 const union tgsi_exec_channel *src1) 3333{ 3334 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 3335 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 3336 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 3337 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 3338} 3339 3340static void 3341micro_umin(union tgsi_exec_channel *dst, 3342 const union tgsi_exec_channel *src0, 3343 const union tgsi_exec_channel *src1) 3344{ 3345 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 3346 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 3347 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 3348 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 3349} 3350 3351static void 3352micro_umod(union tgsi_exec_channel *dst, 3353 const union tgsi_exec_channel *src0, 3354 const union tgsi_exec_channel *src1) 3355{ 3356 dst->u[0] = src0->u[0] % src1->u[0]; 3357 dst->u[1] = src0->u[1] % src1->u[1]; 3358 dst->u[2] = src0->u[2] % src1->u[2]; 3359 dst->u[3] = src0->u[3] % src1->u[3]; 3360} 3361 3362static void 3363micro_umul(union tgsi_exec_channel *dst, 3364 const union tgsi_exec_channel *src0, 3365 const union tgsi_exec_channel *src1) 3366{ 3367 dst->u[0] = src0->u[0] * src1->u[0]; 3368 dst->u[1] = src0->u[1] * src1->u[1]; 3369 dst->u[2] = src0->u[2] * src1->u[2]; 3370 dst->u[3] = src0->u[3] * src1->u[3]; 3371} 3372 3373static void 3374micro_useq(union tgsi_exec_channel *dst, 3375 const union tgsi_exec_channel *src0, 3376 const union tgsi_exec_channel *src1) 3377{ 3378 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 3379 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 3380 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 3381 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 3382} 3383 3384static void 3385micro_usge(union tgsi_exec_channel *dst, 3386 const union tgsi_exec_channel *src0, 3387 const union tgsi_exec_channel *src1) 3388{ 3389 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 3390 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 3391 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 3392 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 3393} 3394 3395static void 3396micro_ushr(union tgsi_exec_channel *dst, 3397 const union tgsi_exec_channel *src0, 3398 const union tgsi_exec_channel *src1) 3399{ 3400 dst->u[0] = src0->u[0] >> src1->u[0]; 3401 dst->u[1] = src0->u[1] >> src1->u[1]; 3402 dst->u[2] = src0->u[2] >> src1->u[2]; 3403 dst->u[3] = src0->u[3] >> src1->u[3]; 3404} 3405 3406static void 3407micro_uslt(union tgsi_exec_channel *dst, 3408 const union tgsi_exec_channel *src0, 3409 const union tgsi_exec_channel *src1) 3410{ 3411 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 3412 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 3413 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 3414 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 3415} 3416 3417static void 3418micro_usne(union tgsi_exec_channel *dst, 3419 const union tgsi_exec_channel *src0, 3420 const union tgsi_exec_channel *src1) 3421{ 3422 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 3423 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 3424 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 3425 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 3426} 3427 3428static void 3429micro_uarl(union tgsi_exec_channel *dst, 3430 const union tgsi_exec_channel *src) 3431{ 3432 dst->i[0] = src->u[0]; 3433 dst->i[1] = src->u[1]; 3434 dst->i[2] = src->u[2]; 3435 dst->i[3] = src->u[3]; 3436} 3437 3438static void 3439micro_ucmp(union tgsi_exec_channel *dst, 3440 const union tgsi_exec_channel *src0, 3441 const union tgsi_exec_channel *src1, 3442 const union tgsi_exec_channel *src2) 3443{ 3444 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 3445 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 3446 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 3447 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 3448} 3449 3450static void 3451exec_instruction( 3452 struct tgsi_exec_machine *mach, 3453 const struct tgsi_full_instruction *inst, 3454 int *pc ) 3455{ 3456 union tgsi_exec_channel r[10]; 3457 3458 (*pc)++; 3459 3460 switch (inst->Instruction.Opcode) { 3461 case TGSI_OPCODE_ARL: 3462 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 3463 break; 3464 3465 case TGSI_OPCODE_MOV: 3466 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 3467 break; 3468 3469 case TGSI_OPCODE_LIT: 3470 exec_lit(mach, inst); 3471 break; 3472 3473 case TGSI_OPCODE_RCP: 3474 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3475 break; 3476 3477 case TGSI_OPCODE_RSQ: 3478 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3479 break; 3480 3481 case TGSI_OPCODE_EXP: 3482 exec_exp(mach, inst); 3483 break; 3484 3485 case TGSI_OPCODE_LOG: 3486 exec_log(mach, inst); 3487 break; 3488 3489 case TGSI_OPCODE_MUL: 3490 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3491 break; 3492 3493 case TGSI_OPCODE_ADD: 3494 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3495 break; 3496 3497 case TGSI_OPCODE_DP3: 3498 exec_dp3(mach, inst); 3499 break; 3500 3501 case TGSI_OPCODE_DP4: 3502 exec_dp4(mach, inst); 3503 break; 3504 3505 case TGSI_OPCODE_DST: 3506 exec_dst(mach, inst); 3507 break; 3508 3509 case TGSI_OPCODE_MIN: 3510 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3511 break; 3512 3513 case TGSI_OPCODE_MAX: 3514 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3515 break; 3516 3517 case TGSI_OPCODE_SLT: 3518 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3519 break; 3520 3521 case TGSI_OPCODE_SGE: 3522 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3523 break; 3524 3525 case TGSI_OPCODE_MAD: 3526 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3527 break; 3528 3529 case TGSI_OPCODE_SUB: 3530 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3531 break; 3532 3533 case TGSI_OPCODE_LRP: 3534 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3535 break; 3536 3537 case TGSI_OPCODE_CND: 3538 exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3539 break; 3540 3541 case TGSI_OPCODE_DP2A: 3542 exec_dp2a(mach, inst); 3543 break; 3544 3545 case TGSI_OPCODE_FRC: 3546 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3547 break; 3548 3549 case TGSI_OPCODE_CLAMP: 3550 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3551 break; 3552 3553 case TGSI_OPCODE_FLR: 3554 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3555 break; 3556 3557 case TGSI_OPCODE_ROUND: 3558 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3559 break; 3560 3561 case TGSI_OPCODE_EX2: 3562 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3563 break; 3564 3565 case TGSI_OPCODE_LG2: 3566 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3567 break; 3568 3569 case TGSI_OPCODE_POW: 3570 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3571 break; 3572 3573 case TGSI_OPCODE_XPD: 3574 exec_xpd(mach, inst); 3575 break; 3576 3577 case TGSI_OPCODE_ABS: 3578 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3579 break; 3580 3581 case TGSI_OPCODE_RCC: 3582 exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3583 break; 3584 3585 case TGSI_OPCODE_DPH: 3586 exec_dph(mach, inst); 3587 break; 3588 3589 case TGSI_OPCODE_COS: 3590 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3591 break; 3592 3593 case TGSI_OPCODE_DDX: 3594 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3595 break; 3596 3597 case TGSI_OPCODE_DDY: 3598 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3599 break; 3600 3601 case TGSI_OPCODE_KILP: 3602 exec_kilp (mach, inst); 3603 break; 3604 3605 case TGSI_OPCODE_KIL: 3606 exec_kil (mach, inst); 3607 break; 3608 3609 case TGSI_OPCODE_PK2H: 3610 assert (0); 3611 break; 3612 3613 case TGSI_OPCODE_PK2US: 3614 assert (0); 3615 break; 3616 3617 case TGSI_OPCODE_PK4B: 3618 assert (0); 3619 break; 3620 3621 case TGSI_OPCODE_PK4UB: 3622 assert (0); 3623 break; 3624 3625 case TGSI_OPCODE_RFL: 3626 exec_rfl(mach, inst); 3627 break; 3628 3629 case TGSI_OPCODE_SEQ: 3630 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3631 break; 3632 3633 case TGSI_OPCODE_SFL: 3634 exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT); 3635 break; 3636 3637 case TGSI_OPCODE_SGT: 3638 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3639 break; 3640 3641 case TGSI_OPCODE_SIN: 3642 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3643 break; 3644 3645 case TGSI_OPCODE_SLE: 3646 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3647 break; 3648 3649 case TGSI_OPCODE_SNE: 3650 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3651 break; 3652 3653 case TGSI_OPCODE_STR: 3654 exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT); 3655 break; 3656 3657 case TGSI_OPCODE_TEX: 3658 /* simple texture lookup */ 3659 /* src[0] = texcoord */ 3660 /* src[1] = sampler unit */ 3661 exec_tex(mach, inst, TEX_MODIFIER_NONE); 3662 break; 3663 3664 case TGSI_OPCODE_TXB: 3665 /* Texture lookup with lod bias */ 3666 /* src[0] = texcoord (src[0].w = LOD bias) */ 3667 /* src[1] = sampler unit */ 3668 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); 3669 break; 3670 3671 case TGSI_OPCODE_TXD: 3672 /* Texture lookup with explict partial derivatives */ 3673 /* src[0] = texcoord */ 3674 /* src[1] = d[strq]/dx */ 3675 /* src[2] = d[strq]/dy */ 3676 /* src[3] = sampler unit */ 3677 exec_txd(mach, inst); 3678 break; 3679 3680 case TGSI_OPCODE_TXL: 3681 /* Texture lookup with explit LOD */ 3682 /* src[0] = texcoord (src[0].w = LOD) */ 3683 /* src[1] = sampler unit */ 3684 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); 3685 break; 3686 3687 case TGSI_OPCODE_TXP: 3688 /* Texture lookup with projection */ 3689 /* src[0] = texcoord (src[0].w = projection) */ 3690 /* src[1] = sampler unit */ 3691 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); 3692 break; 3693 3694 case TGSI_OPCODE_UP2H: 3695 assert (0); 3696 break; 3697 3698 case TGSI_OPCODE_UP2US: 3699 assert (0); 3700 break; 3701 3702 case TGSI_OPCODE_UP4B: 3703 assert (0); 3704 break; 3705 3706 case TGSI_OPCODE_UP4UB: 3707 assert (0); 3708 break; 3709 3710 case TGSI_OPCODE_X2D: 3711 exec_x2d(mach, inst); 3712 break; 3713 3714 case TGSI_OPCODE_ARA: 3715 assert (0); 3716 break; 3717 3718 case TGSI_OPCODE_ARR: 3719 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 3720 break; 3721 3722 case TGSI_OPCODE_BRA: 3723 assert (0); 3724 break; 3725 3726 case TGSI_OPCODE_CAL: 3727 /* skip the call if no execution channels are enabled */ 3728 if (mach->ExecMask) { 3729 /* do the call */ 3730 3731 /* First, record the depths of the execution stacks. 3732 * This is important for deeply nested/looped return statements. 3733 * We have to unwind the stacks by the correct amount. For a 3734 * real code generator, we could determine the number of entries 3735 * to pop off each stack with simple static analysis and avoid 3736 * implementing this data structure at run time. 3737 */ 3738 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 3739 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 3740 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 3741 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 3742 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 3743 /* note that PC was already incremented above */ 3744 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 3745 3746 mach->CallStackTop++; 3747 3748 /* Second, push the Cond, Loop, Cont, Func stacks */ 3749 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 3750 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3751 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3752 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3753 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3754 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 3755 3756 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 3757 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3758 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3759 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3760 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3761 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 3762 3763 /* Finally, jump to the subroutine */ 3764 *pc = inst->Label.Label; 3765 } 3766 break; 3767 3768 case TGSI_OPCODE_RET: 3769 mach->FuncMask &= ~mach->ExecMask; 3770 UPDATE_EXEC_MASK(mach); 3771 3772 if (mach->FuncMask == 0x0) { 3773 /* really return now (otherwise, keep executing */ 3774 3775 if (mach->CallStackTop == 0) { 3776 /* returning from main() */ 3777 mach->CondStackTop = 0; 3778 mach->LoopStackTop = 0; 3779 *pc = -1; 3780 return; 3781 } 3782 3783 assert(mach->CallStackTop > 0); 3784 mach->CallStackTop--; 3785 3786 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3787 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3788 3789 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3790 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3791 3792 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3793 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3794 3795 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 3796 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 3797 3798 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 3799 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 3800 3801 assert(mach->FuncStackTop > 0); 3802 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3803 3804 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3805 3806 UPDATE_EXEC_MASK(mach); 3807 } 3808 break; 3809 3810 case TGSI_OPCODE_SSG: 3811 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3812 break; 3813 3814 case TGSI_OPCODE_CMP: 3815 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3816 break; 3817 3818 case TGSI_OPCODE_SCS: 3819 exec_scs(mach, inst); 3820 break; 3821 3822 case TGSI_OPCODE_NRM: 3823 exec_nrm3(mach, inst); 3824 break; 3825 3826 case TGSI_OPCODE_NRM4: 3827 exec_nrm4(mach, inst); 3828 break; 3829 3830 case TGSI_OPCODE_DIV: 3831 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3832 break; 3833 3834 case TGSI_OPCODE_DP2: 3835 exec_dp2(mach, inst); 3836 break; 3837 3838 case TGSI_OPCODE_IF: 3839 /* push CondMask */ 3840 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 3841 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 3842 FETCH( &r[0], 0, CHAN_X ); 3843 /* update CondMask */ 3844 if( ! r[0].u[0] ) { 3845 mach->CondMask &= ~0x1; 3846 } 3847 if( ! r[0].u[1] ) { 3848 mach->CondMask &= ~0x2; 3849 } 3850 if( ! r[0].u[2] ) { 3851 mach->CondMask &= ~0x4; 3852 } 3853 if( ! r[0].u[3] ) { 3854 mach->CondMask &= ~0x8; 3855 } 3856 UPDATE_EXEC_MASK(mach); 3857 /* Todo: If CondMask==0, jump to ELSE */ 3858 break; 3859 3860 case TGSI_OPCODE_ELSE: 3861 /* invert CondMask wrt previous mask */ 3862 { 3863 uint prevMask; 3864 assert(mach->CondStackTop > 0); 3865 prevMask = mach->CondStack[mach->CondStackTop - 1]; 3866 mach->CondMask = ~mach->CondMask & prevMask; 3867 UPDATE_EXEC_MASK(mach); 3868 /* Todo: If CondMask==0, jump to ENDIF */ 3869 } 3870 break; 3871 3872 case TGSI_OPCODE_ENDIF: 3873 /* pop CondMask */ 3874 assert(mach->CondStackTop > 0); 3875 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 3876 UPDATE_EXEC_MASK(mach); 3877 break; 3878 3879 case TGSI_OPCODE_END: 3880 /* make sure we end primitives which haven't 3881 * been explicitly emitted */ 3882 conditional_emit_primitive(mach); 3883 /* halt execution */ 3884 *pc = -1; 3885 break; 3886 3887 case TGSI_OPCODE_PUSHA: 3888 assert (0); 3889 break; 3890 3891 case TGSI_OPCODE_POPA: 3892 assert (0); 3893 break; 3894 3895 case TGSI_OPCODE_CEIL: 3896 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3897 break; 3898 3899 case TGSI_OPCODE_I2F: 3900 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 3901 break; 3902 3903 case TGSI_OPCODE_NOT: 3904 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3905 break; 3906 3907 case TGSI_OPCODE_TRUNC: 3908 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3909 break; 3910 3911 case TGSI_OPCODE_SHL: 3912 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3913 break; 3914 3915 case TGSI_OPCODE_AND: 3916 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3917 break; 3918 3919 case TGSI_OPCODE_OR: 3920 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3921 break; 3922 3923 case TGSI_OPCODE_MOD: 3924 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3925 break; 3926 3927 case TGSI_OPCODE_XOR: 3928 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3929 break; 3930 3931 case TGSI_OPCODE_SAD: 3932 assert (0); 3933 break; 3934 3935 case TGSI_OPCODE_TXF: 3936 exec_txf(mach, inst); 3937 break; 3938 3939 case TGSI_OPCODE_TXQ: 3940 exec_txq(mach, inst); 3941 break; 3942 3943 case TGSI_OPCODE_EMIT: 3944 emit_vertex(mach); 3945 break; 3946 3947 case TGSI_OPCODE_ENDPRIM: 3948 emit_primitive(mach); 3949 break; 3950 3951 case TGSI_OPCODE_BGNLOOP: 3952 /* push LoopMask and ContMasks */ 3953 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3954 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3955 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3956 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3957 3958 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3959 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3960 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3961 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3962 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 3963 break; 3964 3965 case TGSI_OPCODE_ENDLOOP: 3966 /* Restore ContMask, but don't pop */ 3967 assert(mach->ContStackTop > 0); 3968 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3969 UPDATE_EXEC_MASK(mach); 3970 if (mach->ExecMask) { 3971 /* repeat loop: jump to instruction just past BGNLOOP */ 3972 assert(mach->LoopLabelStackTop > 0); 3973 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3974 } 3975 else { 3976 /* exit loop: pop LoopMask */ 3977 assert(mach->LoopStackTop > 0); 3978 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3979 /* pop ContMask */ 3980 assert(mach->ContStackTop > 0); 3981 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3982 assert(mach->LoopLabelStackTop > 0); 3983 --mach->LoopLabelStackTop; 3984 3985 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3986 } 3987 UPDATE_EXEC_MASK(mach); 3988 break; 3989 3990 case TGSI_OPCODE_BRK: 3991 exec_break(mach); 3992 break; 3993 3994 case TGSI_OPCODE_CONT: 3995 /* turn off cont channels for each enabled exec channel */ 3996 mach->ContMask &= ~mach->ExecMask; 3997 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3998 UPDATE_EXEC_MASK(mach); 3999 break; 4000 4001 case TGSI_OPCODE_BGNSUB: 4002 /* no-op */ 4003 break; 4004 4005 case TGSI_OPCODE_ENDSUB: 4006 /* 4007 * XXX: This really should be a no-op. We should never reach this opcode. 4008 */ 4009 4010 assert(mach->CallStackTop > 0); 4011 mach->CallStackTop--; 4012 4013 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 4014 mach->CondMask = mach->CondStack[mach->CondStackTop]; 4015 4016 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 4017 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 4018 4019 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 4020 mach->ContMask = mach->ContStack[mach->ContStackTop]; 4021 4022 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 4023 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 4024 4025 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 4026 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 4027 4028 assert(mach->FuncStackTop > 0); 4029 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 4030 4031 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 4032 4033 UPDATE_EXEC_MASK(mach); 4034 break; 4035 4036 case TGSI_OPCODE_NOP: 4037 break; 4038 4039 case TGSI_OPCODE_BREAKC: 4040 FETCH(&r[0], 0, CHAN_X); 4041 /* update CondMask */ 4042 if (r[0].u[0] && (mach->ExecMask & 0x1)) { 4043 mach->LoopMask &= ~0x1; 4044 } 4045 if (r[0].u[1] && (mach->ExecMask & 0x2)) { 4046 mach->LoopMask &= ~0x2; 4047 } 4048 if (r[0].u[2] && (mach->ExecMask & 0x4)) { 4049 mach->LoopMask &= ~0x4; 4050 } 4051 if (r[0].u[3] && (mach->ExecMask & 0x8)) { 4052 mach->LoopMask &= ~0x8; 4053 } 4054 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 4055 UPDATE_EXEC_MASK(mach); 4056 break; 4057 4058 case TGSI_OPCODE_F2I: 4059 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 4060 break; 4061 4062 case TGSI_OPCODE_IDIV: 4063 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4064 break; 4065 4066 case TGSI_OPCODE_IMAX: 4067 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4068 break; 4069 4070 case TGSI_OPCODE_IMIN: 4071 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4072 break; 4073 4074 case TGSI_OPCODE_INEG: 4075 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4076 break; 4077 4078 case TGSI_OPCODE_ISGE: 4079 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4080 break; 4081 4082 case TGSI_OPCODE_ISHR: 4083 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4084 break; 4085 4086 case TGSI_OPCODE_ISLT: 4087 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4088 break; 4089 4090 case TGSI_OPCODE_F2U: 4091 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 4092 break; 4093 4094 case TGSI_OPCODE_U2F: 4095 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 4096 break; 4097 4098 case TGSI_OPCODE_UADD: 4099 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4100 break; 4101 4102 case TGSI_OPCODE_UDIV: 4103 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4104 break; 4105 4106 case TGSI_OPCODE_UMAD: 4107 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4108 break; 4109 4110 case TGSI_OPCODE_UMAX: 4111 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4112 break; 4113 4114 case TGSI_OPCODE_UMIN: 4115 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4116 break; 4117 4118 case TGSI_OPCODE_UMOD: 4119 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4120 break; 4121 4122 case TGSI_OPCODE_UMUL: 4123 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4124 break; 4125 4126 case TGSI_OPCODE_USEQ: 4127 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4128 break; 4129 4130 case TGSI_OPCODE_USGE: 4131 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4132 break; 4133 4134 case TGSI_OPCODE_USHR: 4135 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4136 break; 4137 4138 case TGSI_OPCODE_USLT: 4139 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4140 break; 4141 4142 case TGSI_OPCODE_USNE: 4143 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4144 break; 4145 4146 case TGSI_OPCODE_SWITCH: 4147 exec_switch(mach, inst); 4148 break; 4149 4150 case TGSI_OPCODE_CASE: 4151 exec_case(mach, inst); 4152 break; 4153 4154 case TGSI_OPCODE_DEFAULT: 4155 exec_default(mach); 4156 break; 4157 4158 case TGSI_OPCODE_ENDSWITCH: 4159 exec_endswitch(mach); 4160 break; 4161 4162 case TGSI_OPCODE_LOAD: 4163 assert(0); 4164 break; 4165 4166 case TGSI_OPCODE_LOAD_MS: 4167 assert(0); 4168 break; 4169 4170 case TGSI_OPCODE_SAMPLE: 4171 exec_sample(mach, inst, TEX_MODIFIER_NONE); 4172 break; 4173 4174 case TGSI_OPCODE_SAMPLE_B: 4175 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS); 4176 break; 4177 4178 case TGSI_OPCODE_SAMPLE_C: 4179 exec_sample(mach, inst, TEX_MODIFIER_NONE); 4180 break; 4181 4182 case TGSI_OPCODE_SAMPLE_C_LZ: 4183 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS); 4184 break; 4185 4186 case TGSI_OPCODE_SAMPLE_D: 4187 exec_sample_d(mach, inst); 4188 break; 4189 4190 case TGSI_OPCODE_SAMPLE_L: 4191 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); 4192 break; 4193 4194 case TGSI_OPCODE_GATHER4: 4195 assert(0); 4196 break; 4197 4198 case TGSI_OPCODE_RESINFO: 4199 assert(0); 4200 break; 4201 4202 case TGSI_OPCODE_SAMPLE_POS: 4203 assert(0); 4204 break; 4205 4206 case TGSI_OPCODE_SAMPLE_INFO: 4207 assert(0); 4208 break; 4209 4210 case TGSI_OPCODE_UARL: 4211 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 4212 break; 4213 4214 case TGSI_OPCODE_UCMP: 4215 exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4216 break; 4217 4218 case TGSI_OPCODE_IABS: 4219 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4220 break; 4221 4222 case TGSI_OPCODE_ISSG: 4223 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4224 break; 4225 4226 default: 4227 assert( 0 ); 4228 } 4229} 4230 4231 4232#define DEBUG_EXECUTION 0 4233 4234 4235/** 4236 * Run TGSI interpreter. 4237 * \return bitmask of "alive" quad components 4238 */ 4239uint 4240tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 4241{ 4242 uint i; 4243 int pc = 0; 4244 4245 mach->CondMask = 0xf; 4246 mach->LoopMask = 0xf; 4247 mach->ContMask = 0xf; 4248 mach->FuncMask = 0xf; 4249 mach->ExecMask = 0xf; 4250 4251 mach->Switch.mask = 0xf; 4252 4253 assert(mach->CondStackTop == 0); 4254 assert(mach->LoopStackTop == 0); 4255 assert(mach->ContStackTop == 0); 4256 assert(mach->SwitchStackTop == 0); 4257 assert(mach->BreakStackTop == 0); 4258 assert(mach->CallStackTop == 0); 4259 4260 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 4261 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 4262 4263 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 4264 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 4265 mach->Primitives[0] = 0; 4266 } 4267 4268 /* execute declarations (interpolants) */ 4269 for (i = 0; i < mach->NumDeclarations; i++) { 4270 exec_declaration( mach, mach->Declarations+i ); 4271 } 4272 4273 { 4274#if DEBUG_EXECUTION 4275 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 4276 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 4277 uint inst = 1; 4278 4279 memcpy(temps, mach->Temps, sizeof(temps)); 4280 memcpy(outputs, mach->Outputs, sizeof(outputs)); 4281#endif 4282 4283 /* execute instructions, until pc is set to -1 */ 4284 while (pc != -1) { 4285 4286#if DEBUG_EXECUTION 4287 uint i; 4288 4289 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 4290#endif 4291 4292 assert(pc < (int) mach->NumInstructions); 4293 exec_instruction(mach, mach->Instructions + pc, &pc); 4294 4295#if DEBUG_EXECUTION 4296 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 4297 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 4298 uint j; 4299 4300 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 4301 debug_printf("TEMP[%2u] = ", i); 4302 for (j = 0; j < 4; j++) { 4303 if (j > 0) { 4304 debug_printf(" "); 4305 } 4306 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 4307 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 4308 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 4309 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 4310 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 4311 } 4312 } 4313 } 4314 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 4315 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 4316 uint j; 4317 4318 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 4319 debug_printf("OUT[%2u] = ", i); 4320 for (j = 0; j < 4; j++) { 4321 if (j > 0) { 4322 debug_printf(" "); 4323 } 4324 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 4325 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 4326 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 4327 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 4328 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 4329 } 4330 } 4331 } 4332#endif 4333 } 4334 } 4335 4336#if 0 4337 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 4338 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 4339 /* 4340 * Scale back depth component. 4341 */ 4342 for (i = 0; i < 4; i++) 4343 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 4344 } 4345#endif 4346 4347 /* Strictly speaking, these assertions aren't really needed but they 4348 * can potentially catch some bugs in the control flow code. 4349 */ 4350 assert(mach->CondStackTop == 0); 4351 assert(mach->LoopStackTop == 0); 4352 assert(mach->ContStackTop == 0); 4353 assert(mach->SwitchStackTop == 0); 4354 assert(mach->BreakStackTop == 0); 4355 assert(mach->CallStackTop == 0); 4356 4357 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4358} 4359