tgsi_exec.c revision 101f792a2af9c9a19a050afba8b60caa689466a5
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/u_memory.h" 62#include "util/u_math.h" 63 64 65#define FAST_MATH 1 66 67#define TILE_TOP_LEFT 0 68#define TILE_TOP_RIGHT 1 69#define TILE_BOTTOM_LEFT 2 70#define TILE_BOTTOM_RIGHT 3 71 72static void 73micro_abs(union tgsi_exec_channel *dst, 74 const union tgsi_exec_channel *src) 75{ 76 dst->f[0] = fabsf(src->f[0]); 77 dst->f[1] = fabsf(src->f[1]); 78 dst->f[2] = fabsf(src->f[2]); 79 dst->f[3] = fabsf(src->f[3]); 80} 81 82static void 83micro_arl(union tgsi_exec_channel *dst, 84 const union tgsi_exec_channel *src) 85{ 86 dst->i[0] = (int)floorf(src->f[0]); 87 dst->i[1] = (int)floorf(src->f[1]); 88 dst->i[2] = (int)floorf(src->f[2]); 89 dst->i[3] = (int)floorf(src->f[3]); 90} 91 92static void 93micro_arr(union tgsi_exec_channel *dst, 94 const union tgsi_exec_channel *src) 95{ 96 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 97 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 98 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 99 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 100} 101 102static void 103micro_ceil(union tgsi_exec_channel *dst, 104 const union tgsi_exec_channel *src) 105{ 106 dst->f[0] = ceilf(src->f[0]); 107 dst->f[1] = ceilf(src->f[1]); 108 dst->f[2] = ceilf(src->f[2]); 109 dst->f[3] = ceilf(src->f[3]); 110} 111 112static void 113micro_clamp(union tgsi_exec_channel *dst, 114 const union tgsi_exec_channel *src0, 115 const union tgsi_exec_channel *src1, 116 const union tgsi_exec_channel *src2) 117{ 118 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; 119 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; 120 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; 121 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; 122} 123 124static void 125micro_cmp(union tgsi_exec_channel *dst, 126 const union tgsi_exec_channel *src0, 127 const union tgsi_exec_channel *src1, 128 const union tgsi_exec_channel *src2) 129{ 130 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 131 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 132 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 133 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 134} 135 136static void 137micro_cnd(union tgsi_exec_channel *dst, 138 const union tgsi_exec_channel *src0, 139 const union tgsi_exec_channel *src1, 140 const union tgsi_exec_channel *src2) 141{ 142 dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; 143 dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; 144 dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; 145 dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; 146} 147 148static void 149micro_cos(union tgsi_exec_channel *dst, 150 const union tgsi_exec_channel *src) 151{ 152 dst->f[0] = cosf(src->f[0]); 153 dst->f[1] = cosf(src->f[1]); 154 dst->f[2] = cosf(src->f[2]); 155 dst->f[3] = cosf(src->f[3]); 156} 157 158static void 159micro_ddx(union tgsi_exec_channel *dst, 160 const union tgsi_exec_channel *src) 161{ 162 dst->f[0] = 163 dst->f[1] = 164 dst->f[2] = 165 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 166} 167 168static void 169micro_ddy(union tgsi_exec_channel *dst, 170 const union tgsi_exec_channel *src) 171{ 172 dst->f[0] = 173 dst->f[1] = 174 dst->f[2] = 175 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 176} 177 178static void 179micro_exp2(union tgsi_exec_channel *dst, 180 const union tgsi_exec_channel *src) 181{ 182#if FAST_MATH 183 dst->f[0] = util_fast_exp2(src->f[0]); 184 dst->f[1] = util_fast_exp2(src->f[1]); 185 dst->f[2] = util_fast_exp2(src->f[2]); 186 dst->f[3] = util_fast_exp2(src->f[3]); 187#else 188#if DEBUG 189 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 190 uint i; 191 union tgsi_exec_channel clamped; 192 193 for (i = 0; i < 4; i++) { 194 if (src->f[i] > 127.99999f) { 195 clamped.f[i] = 127.99999f; 196 } else if (src->f[i] < -126.99999f) { 197 clamped.f[i] = -126.99999f; 198 } else { 199 clamped.f[i] = src->f[i]; 200 } 201 } 202 src = &clamped; 203#endif /* DEBUG */ 204 205 dst->f[0] = powf(2.0f, src->f[0]); 206 dst->f[1] = powf(2.0f, src->f[1]); 207 dst->f[2] = powf(2.0f, src->f[2]); 208 dst->f[3] = powf(2.0f, src->f[3]); 209#endif /* FAST_MATH */ 210} 211 212static void 213micro_flr(union tgsi_exec_channel *dst, 214 const union tgsi_exec_channel *src) 215{ 216 dst->f[0] = floorf(src->f[0]); 217 dst->f[1] = floorf(src->f[1]); 218 dst->f[2] = floorf(src->f[2]); 219 dst->f[3] = floorf(src->f[3]); 220} 221 222static void 223micro_frc(union tgsi_exec_channel *dst, 224 const union tgsi_exec_channel *src) 225{ 226 dst->f[0] = src->f[0] - floorf(src->f[0]); 227 dst->f[1] = src->f[1] - floorf(src->f[1]); 228 dst->f[2] = src->f[2] - floorf(src->f[2]); 229 dst->f[3] = src->f[3] - floorf(src->f[3]); 230} 231 232static void 233micro_iabs(union tgsi_exec_channel *dst, 234 const union tgsi_exec_channel *src) 235{ 236 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 237 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 238 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 239 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 240} 241 242static void 243micro_ineg(union tgsi_exec_channel *dst, 244 const union tgsi_exec_channel *src) 245{ 246 dst->i[0] = -src->i[0]; 247 dst->i[1] = -src->i[1]; 248 dst->i[2] = -src->i[2]; 249 dst->i[3] = -src->i[3]; 250} 251 252static void 253micro_lg2(union tgsi_exec_channel *dst, 254 const union tgsi_exec_channel *src) 255{ 256#if FAST_MATH 257 dst->f[0] = util_fast_log2(src->f[0]); 258 dst->f[1] = util_fast_log2(src->f[1]); 259 dst->f[2] = util_fast_log2(src->f[2]); 260 dst->f[3] = util_fast_log2(src->f[3]); 261#else 262 dst->f[0] = logf(src->f[0]) * 1.442695f; 263 dst->f[1] = logf(src->f[1]) * 1.442695f; 264 dst->f[2] = logf(src->f[2]) * 1.442695f; 265 dst->f[3] = logf(src->f[3]) * 1.442695f; 266#endif 267} 268 269static void 270micro_lrp(union tgsi_exec_channel *dst, 271 const union tgsi_exec_channel *src0, 272 const union tgsi_exec_channel *src1, 273 const union tgsi_exec_channel *src2) 274{ 275 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 276 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 277 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 278 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 279} 280 281static void 282micro_mad(union tgsi_exec_channel *dst, 283 const union tgsi_exec_channel *src0, 284 const union tgsi_exec_channel *src1, 285 const union tgsi_exec_channel *src2) 286{ 287 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 288 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 289 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 290 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 291} 292 293static void 294micro_mov(union tgsi_exec_channel *dst, 295 const union tgsi_exec_channel *src) 296{ 297 dst->u[0] = src->u[0]; 298 dst->u[1] = src->u[1]; 299 dst->u[2] = src->u[2]; 300 dst->u[3] = src->u[3]; 301} 302 303static void 304micro_rcp(union tgsi_exec_channel *dst, 305 const union tgsi_exec_channel *src) 306{ 307#if 0 /* for debugging */ 308 assert(src->f[0] != 0.0f); 309 assert(src->f[1] != 0.0f); 310 assert(src->f[2] != 0.0f); 311 assert(src->f[3] != 0.0f); 312#endif 313 dst->f[0] = 1.0f / src->f[0]; 314 dst->f[1] = 1.0f / src->f[1]; 315 dst->f[2] = 1.0f / src->f[2]; 316 dst->f[3] = 1.0f / src->f[3]; 317} 318 319static void 320micro_rnd(union tgsi_exec_channel *dst, 321 const union tgsi_exec_channel *src) 322{ 323 dst->f[0] = floorf(src->f[0] + 0.5f); 324 dst->f[1] = floorf(src->f[1] + 0.5f); 325 dst->f[2] = floorf(src->f[2] + 0.5f); 326 dst->f[3] = floorf(src->f[3] + 0.5f); 327} 328 329static void 330micro_rsq(union tgsi_exec_channel *dst, 331 const union tgsi_exec_channel *src) 332{ 333#if 0 /* for debugging */ 334 assert(src->f[0] != 0.0f); 335 assert(src->f[1] != 0.0f); 336 assert(src->f[2] != 0.0f); 337 assert(src->f[3] != 0.0f); 338#endif 339 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); 340 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); 341 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); 342 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); 343} 344 345static void 346micro_seq(union tgsi_exec_channel *dst, 347 const union tgsi_exec_channel *src0, 348 const union tgsi_exec_channel *src1) 349{ 350 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 351 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 352 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 353 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 354} 355 356static void 357micro_sge(union tgsi_exec_channel *dst, 358 const union tgsi_exec_channel *src0, 359 const union tgsi_exec_channel *src1) 360{ 361 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 362 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 363 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 364 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 365} 366 367static void 368micro_sgn(union tgsi_exec_channel *dst, 369 const union tgsi_exec_channel *src) 370{ 371 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 372 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 373 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 374 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 375} 376 377static void 378micro_sgt(union tgsi_exec_channel *dst, 379 const union tgsi_exec_channel *src0, 380 const union tgsi_exec_channel *src1) 381{ 382 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 383 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 384 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 385 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 386} 387 388static void 389micro_sin(union tgsi_exec_channel *dst, 390 const union tgsi_exec_channel *src) 391{ 392 dst->f[0] = sinf(src->f[0]); 393 dst->f[1] = sinf(src->f[1]); 394 dst->f[2] = sinf(src->f[2]); 395 dst->f[3] = sinf(src->f[3]); 396} 397 398static void 399micro_sle(union tgsi_exec_channel *dst, 400 const union tgsi_exec_channel *src0, 401 const union tgsi_exec_channel *src1) 402{ 403 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 404 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 405 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 406 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 407} 408 409static void 410micro_slt(union tgsi_exec_channel *dst, 411 const union tgsi_exec_channel *src0, 412 const union tgsi_exec_channel *src1) 413{ 414 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 415 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 416 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 417 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 418} 419 420static void 421micro_sne(union tgsi_exec_channel *dst, 422 const union tgsi_exec_channel *src0, 423 const union tgsi_exec_channel *src1) 424{ 425 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 426 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 427 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 428 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 429} 430 431static void 432micro_trunc(union tgsi_exec_channel *dst, 433 const union tgsi_exec_channel *src) 434{ 435 dst->f[0] = (float)(int)src->f[0]; 436 dst->f[1] = (float)(int)src->f[1]; 437 dst->f[2] = (float)(int)src->f[2]; 438 dst->f[3] = (float)(int)src->f[3]; 439} 440 441 442#define CHAN_X 0 443#define CHAN_Y 1 444#define CHAN_Z 2 445#define CHAN_W 3 446 447enum tgsi_exec_datatype { 448 TGSI_EXEC_DATA_FLOAT, 449 TGSI_EXEC_DATA_INT, 450 TGSI_EXEC_DATA_UINT 451}; 452 453/* 454 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 455 */ 456#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 457#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 458#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 459#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 460#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 461#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 462#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 463#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 464#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 465#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 466#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 467#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 468#define TEMP_128_I TGSI_EXEC_TEMP_128_I 469#define TEMP_128_C TGSI_EXEC_TEMP_128_C 470#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 471#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 472#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 473#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 474#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 475#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 476#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 477#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 478#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 479#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 480#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 481#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 482#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 483#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 484#define TEMP_R0 TGSI_EXEC_TEMP_R0 485#define TEMP_P0 TGSI_EXEC_TEMP_P0 486 487#define IS_CHANNEL_ENABLED(INST, CHAN)\ 488 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 489 490#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 491 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) 492 493#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 494 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 495 if (IS_CHANNEL_ENABLED( INST, CHAN )) 496 497#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 498 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 499 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 500 501 502/** The execution mask depends on the conditional mask and the loop mask */ 503#define UPDATE_EXEC_MASK(MACH) \ 504 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 505 506 507static const union tgsi_exec_channel ZeroVec = 508 { { 0.0, 0.0, 0.0, 0.0 } }; 509 510static const union tgsi_exec_channel OneVec = { 511 {1.0f, 1.0f, 1.0f, 1.0f} 512}; 513 514 515/** 516 * Assert that none of the float values in 'chan' are infinite or NaN. 517 * NaN and Inf may occur normally during program execution and should 518 * not lead to crashes, etc. But when debugging, it's helpful to catch 519 * them. 520 */ 521static INLINE void 522check_inf_or_nan(const union tgsi_exec_channel *chan) 523{ 524 assert(!util_is_inf_or_nan((chan)->f[0])); 525 assert(!util_is_inf_or_nan((chan)->f[1])); 526 assert(!util_is_inf_or_nan((chan)->f[2])); 527 assert(!util_is_inf_or_nan((chan)->f[3])); 528} 529 530 531#ifdef DEBUG 532static void 533print_chan(const char *msg, const union tgsi_exec_channel *chan) 534{ 535 debug_printf("%s = {%f, %f, %f, %f}\n", 536 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 537} 538#endif 539 540 541#ifdef DEBUG 542static void 543print_temp(const struct tgsi_exec_machine *mach, uint index) 544{ 545 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 546 int i; 547 debug_printf("Temp[%u] =\n", index); 548 for (i = 0; i < 4; i++) { 549 debug_printf(" %c: { %f, %f, %f, %f }\n", 550 "XYZW"[i], 551 tmp->xyzw[i].f[0], 552 tmp->xyzw[i].f[1], 553 tmp->xyzw[i].f[2], 554 tmp->xyzw[i].f[3]); 555 } 556} 557#endif 558 559 560/** 561 * Check if there's a potential src/dst register data dependency when 562 * using SOA execution. 563 * Example: 564 * MOV T, T.yxwz; 565 * This would expand into: 566 * MOV t0, t1; 567 * MOV t1, t0; 568 * MOV t2, t3; 569 * MOV t3, t2; 570 * The second instruction will have the wrong value for t0 if executed as-is. 571 */ 572boolean 573tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 574{ 575 uint i, chan; 576 577 uint writemask = inst->Dst[0].Register.WriteMask; 578 if (writemask == TGSI_WRITEMASK_X || 579 writemask == TGSI_WRITEMASK_Y || 580 writemask == TGSI_WRITEMASK_Z || 581 writemask == TGSI_WRITEMASK_W || 582 writemask == TGSI_WRITEMASK_NONE) { 583 /* no chance of data dependency */ 584 return FALSE; 585 } 586 587 /* loop over src regs */ 588 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 589 if ((inst->Src[i].Register.File == 590 inst->Dst[0].Register.File) && 591 (inst->Src[i].Register.Index == 592 inst->Dst[0].Register.Index)) { 593 /* loop over dest channels */ 594 uint channelsWritten = 0x0; 595 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 596 /* check if we're reading a channel that's been written */ 597 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 598 if (channelsWritten & (1 << swizzle)) { 599 return TRUE; 600 } 601 602 channelsWritten |= (1 << chan); 603 } 604 } 605 } 606 return FALSE; 607} 608 609 610/** 611 * Initialize machine state by expanding tokens to full instructions, 612 * allocating temporary storage, setting up constants, etc. 613 * After this, we can call tgsi_exec_machine_run() many times. 614 */ 615void 616tgsi_exec_machine_bind_shader( 617 struct tgsi_exec_machine *mach, 618 const struct tgsi_token *tokens, 619 uint numSamplers, 620 struct tgsi_sampler **samplers) 621{ 622 uint k; 623 struct tgsi_parse_context parse; 624 struct tgsi_full_instruction *instructions; 625 struct tgsi_full_declaration *declarations; 626 uint maxInstructions = 10, numInstructions = 0; 627 uint maxDeclarations = 10, numDeclarations = 0; 628 629#if 0 630 tgsi_dump(tokens, 0); 631#endif 632 633 util_init_math(); 634 635 mach->Tokens = tokens; 636 mach->Samplers = samplers; 637 638 if (!tokens) { 639 /* unbind and free all */ 640 if (mach->Declarations) { 641 FREE( mach->Declarations ); 642 } 643 mach->Declarations = NULL; 644 mach->NumDeclarations = 0; 645 646 if (mach->Instructions) { 647 FREE( mach->Instructions ); 648 } 649 mach->Instructions = NULL; 650 mach->NumInstructions = 0; 651 652 return; 653 } 654 655 k = tgsi_parse_init (&parse, mach->Tokens); 656 if (k != TGSI_PARSE_OK) { 657 debug_printf( "Problem parsing!\n" ); 658 return; 659 } 660 661 mach->Processor = parse.FullHeader.Processor.Processor; 662 mach->ImmLimit = 0; 663 664 declarations = (struct tgsi_full_declaration *) 665 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 666 667 if (!declarations) { 668 return; 669 } 670 671 instructions = (struct tgsi_full_instruction *) 672 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 673 674 if (!instructions) { 675 FREE( declarations ); 676 return; 677 } 678 679 while( !tgsi_parse_end_of_tokens( &parse ) ) { 680 uint i; 681 682 tgsi_parse_token( &parse ); 683 switch( parse.FullToken.Token.Type ) { 684 case TGSI_TOKEN_TYPE_DECLARATION: 685 /* save expanded declaration */ 686 if (numDeclarations == maxDeclarations) { 687 declarations = REALLOC(declarations, 688 maxDeclarations 689 * sizeof(struct tgsi_full_declaration), 690 (maxDeclarations + 10) 691 * sizeof(struct tgsi_full_declaration)); 692 maxDeclarations += 10; 693 } 694 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 695 unsigned reg; 696 for (reg = parse.FullToken.FullDeclaration.Range.First; 697 reg <= parse.FullToken.FullDeclaration.Range.Last; 698 ++reg) { 699 ++mach->NumOutputs; 700 } 701 } 702 if (parse.FullToken.FullDeclaration.Declaration.File == 703 TGSI_FILE_IMMEDIATE_ARRAY) { 704 unsigned reg; 705 struct tgsi_full_declaration *decl = 706 &parse.FullToken.FullDeclaration; 707 debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES); 708 for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) { 709 for( i = 0; i < 4; i++ ) { 710 int idx = reg * 4 + i; 711 mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float; 712 } 713 } 714 } 715 memcpy(declarations + numDeclarations, 716 &parse.FullToken.FullDeclaration, 717 sizeof(declarations[0])); 718 numDeclarations++; 719 break; 720 721 case TGSI_TOKEN_TYPE_IMMEDIATE: 722 { 723 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 724 assert( size <= 4 ); 725 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 726 727 for( i = 0; i < size; i++ ) { 728 mach->Imms[mach->ImmLimit][i] = 729 parse.FullToken.FullImmediate.u[i].Float; 730 } 731 mach->ImmLimit += 1; 732 } 733 break; 734 735 case TGSI_TOKEN_TYPE_INSTRUCTION: 736 737 /* save expanded instruction */ 738 if (numInstructions == maxInstructions) { 739 instructions = REALLOC(instructions, 740 maxInstructions 741 * sizeof(struct tgsi_full_instruction), 742 (maxInstructions + 10) 743 * sizeof(struct tgsi_full_instruction)); 744 maxInstructions += 10; 745 } 746 747 memcpy(instructions + numInstructions, 748 &parse.FullToken.FullInstruction, 749 sizeof(instructions[0])); 750 751 numInstructions++; 752 break; 753 754 case TGSI_TOKEN_TYPE_PROPERTY: 755 break; 756 757 default: 758 assert( 0 ); 759 } 760 } 761 tgsi_parse_free (&parse); 762 763 if (mach->Declarations) { 764 FREE( mach->Declarations ); 765 } 766 mach->Declarations = declarations; 767 mach->NumDeclarations = numDeclarations; 768 769 if (mach->Instructions) { 770 FREE( mach->Instructions ); 771 } 772 mach->Instructions = instructions; 773 mach->NumInstructions = numInstructions; 774} 775 776 777struct tgsi_exec_machine * 778tgsi_exec_machine_create( void ) 779{ 780 struct tgsi_exec_machine *mach; 781 uint i; 782 783 mach = align_malloc( sizeof *mach, 16 ); 784 if (!mach) 785 goto fail; 786 787 memset(mach, 0, sizeof(*mach)); 788 789 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 790 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 791 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 792 793 /* Setup constants. */ 794 for( i = 0; i < 4; i++ ) { 795 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 796 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 797 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 798 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 799 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 800 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 801 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 802 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 803 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 804 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 805 } 806 807#ifdef DEBUG 808 /* silence warnings */ 809 (void) print_chan; 810 (void) print_temp; 811#endif 812 813 return mach; 814 815fail: 816 align_free(mach); 817 return NULL; 818} 819 820 821void 822tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 823{ 824 if (mach) { 825 if (mach->Instructions) 826 FREE(mach->Instructions); 827 if (mach->Declarations) 828 FREE(mach->Declarations); 829 } 830 831 align_free(mach); 832} 833 834static void 835micro_add(union tgsi_exec_channel *dst, 836 const union tgsi_exec_channel *src0, 837 const union tgsi_exec_channel *src1) 838{ 839 dst->f[0] = src0->f[0] + src1->f[0]; 840 dst->f[1] = src0->f[1] + src1->f[1]; 841 dst->f[2] = src0->f[2] + src1->f[2]; 842 dst->f[3] = src0->f[3] + src1->f[3]; 843} 844 845static void 846micro_div( 847 union tgsi_exec_channel *dst, 848 const union tgsi_exec_channel *src0, 849 const union tgsi_exec_channel *src1 ) 850{ 851 if (src1->f[0] != 0) { 852 dst->f[0] = src0->f[0] / src1->f[0]; 853 } 854 if (src1->f[1] != 0) { 855 dst->f[1] = src0->f[1] / src1->f[1]; 856 } 857 if (src1->f[2] != 0) { 858 dst->f[2] = src0->f[2] / src1->f[2]; 859 } 860 if (src1->f[3] != 0) { 861 dst->f[3] = src0->f[3] / src1->f[3]; 862 } 863} 864 865static void 866micro_float_clamp(union tgsi_exec_channel *dst, 867 const union tgsi_exec_channel *src) 868{ 869 uint i; 870 871 for (i = 0; i < 4; i++) { 872 if (src->f[i] > 0.0f) { 873 if (src->f[i] > 1.884467e+019f) 874 dst->f[i] = 1.884467e+019f; 875 else if (src->f[i] < 5.42101e-020f) 876 dst->f[i] = 5.42101e-020f; 877 else 878 dst->f[i] = src->f[i]; 879 } 880 else { 881 if (src->f[i] < -1.884467e+019f) 882 dst->f[i] = -1.884467e+019f; 883 else if (src->f[i] > -5.42101e-020f) 884 dst->f[i] = -5.42101e-020f; 885 else 886 dst->f[i] = src->f[i]; 887 } 888 } 889} 890 891static void 892micro_lt( 893 union tgsi_exec_channel *dst, 894 const union tgsi_exec_channel *src0, 895 const union tgsi_exec_channel *src1, 896 const union tgsi_exec_channel *src2, 897 const union tgsi_exec_channel *src3 ) 898{ 899 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 900 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 901 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 902 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 903} 904 905static void 906micro_max(union tgsi_exec_channel *dst, 907 const union tgsi_exec_channel *src0, 908 const union tgsi_exec_channel *src1) 909{ 910 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 911 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 912 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 913 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 914} 915 916static void 917micro_min(union tgsi_exec_channel *dst, 918 const union tgsi_exec_channel *src0, 919 const union tgsi_exec_channel *src1) 920{ 921 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 922 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 923 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 924 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 925} 926 927static void 928micro_mul(union tgsi_exec_channel *dst, 929 const union tgsi_exec_channel *src0, 930 const union tgsi_exec_channel *src1) 931{ 932 dst->f[0] = src0->f[0] * src1->f[0]; 933 dst->f[1] = src0->f[1] * src1->f[1]; 934 dst->f[2] = src0->f[2] * src1->f[2]; 935 dst->f[3] = src0->f[3] * src1->f[3]; 936} 937 938#if 0 939static void 940micro_imul64( 941 union tgsi_exec_channel *dst0, 942 union tgsi_exec_channel *dst1, 943 const union tgsi_exec_channel *src0, 944 const union tgsi_exec_channel *src1 ) 945{ 946 dst1->i[0] = src0->i[0] * src1->i[0]; 947 dst1->i[1] = src0->i[1] * src1->i[1]; 948 dst1->i[2] = src0->i[2] * src1->i[2]; 949 dst1->i[3] = src0->i[3] * src1->i[3]; 950 dst0->i[0] = 0; 951 dst0->i[1] = 0; 952 dst0->i[2] = 0; 953 dst0->i[3] = 0; 954} 955#endif 956 957#if 0 958static void 959micro_umul64( 960 union tgsi_exec_channel *dst0, 961 union tgsi_exec_channel *dst1, 962 const union tgsi_exec_channel *src0, 963 const union tgsi_exec_channel *src1 ) 964{ 965 dst1->u[0] = src0->u[0] * src1->u[0]; 966 dst1->u[1] = src0->u[1] * src1->u[1]; 967 dst1->u[2] = src0->u[2] * src1->u[2]; 968 dst1->u[3] = src0->u[3] * src1->u[3]; 969 dst0->u[0] = 0; 970 dst0->u[1] = 0; 971 dst0->u[2] = 0; 972 dst0->u[3] = 0; 973} 974#endif 975 976 977#if 0 978static void 979micro_movc( 980 union tgsi_exec_channel *dst, 981 const union tgsi_exec_channel *src0, 982 const union tgsi_exec_channel *src1, 983 const union tgsi_exec_channel *src2 ) 984{ 985 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 986 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 987 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 988 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 989} 990#endif 991 992static void 993micro_neg( 994 union tgsi_exec_channel *dst, 995 const union tgsi_exec_channel *src ) 996{ 997 dst->f[0] = -src->f[0]; 998 dst->f[1] = -src->f[1]; 999 dst->f[2] = -src->f[2]; 1000 dst->f[3] = -src->f[3]; 1001} 1002 1003static void 1004micro_pow( 1005 union tgsi_exec_channel *dst, 1006 const union tgsi_exec_channel *src0, 1007 const union tgsi_exec_channel *src1 ) 1008{ 1009#if FAST_MATH 1010 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 1011 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 1012 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 1013 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 1014#else 1015 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1016 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1017 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1018 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1019#endif 1020} 1021 1022static void 1023micro_sub(union tgsi_exec_channel *dst, 1024 const union tgsi_exec_channel *src0, 1025 const union tgsi_exec_channel *src1) 1026{ 1027 dst->f[0] = src0->f[0] - src1->f[0]; 1028 dst->f[1] = src0->f[1] - src1->f[1]; 1029 dst->f[2] = src0->f[2] - src1->f[2]; 1030 dst->f[3] = src0->f[3] - src1->f[3]; 1031} 1032 1033static void 1034fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1035 const uint file, 1036 const uint swizzle, 1037 const union tgsi_exec_channel *index, 1038 const union tgsi_exec_channel *index2D, 1039 union tgsi_exec_channel *chan) 1040{ 1041 uint i; 1042 1043 switch (file) { 1044 case TGSI_FILE_CONSTANT: 1045 for (i = 0; i < QUAD_SIZE; i++) { 1046 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 1047 assert(mach->Consts[index2D->i[i]]); 1048 1049 if (index->i[i] < 0) { 1050 chan->u[i] = 0; 1051 } else { 1052 const uint *p = (const uint *)mach->Consts[index2D->i[i]]; 1053 1054 chan->u[i] = p[index->i[i] * 4 + swizzle]; 1055 } 1056 } 1057 break; 1058 1059 case TGSI_FILE_INPUT: 1060 case TGSI_FILE_SYSTEM_VALUE: 1061 for (i = 0; i < QUAD_SIZE; i++) { 1062 /* 1063 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1064 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1065 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1066 index2D->i[i], index->i[i]); 1067 }*/ 1068 chan->u[i] = mach->Inputs[index2D->i[i] * 1069 TGSI_EXEC_MAX_INPUT_ATTRIBS + 1070 index->i[i]].xyzw[swizzle].u[i]; 1071 } 1072 break; 1073 1074 case TGSI_FILE_TEMPORARY: 1075 for (i = 0; i < QUAD_SIZE; i++) { 1076 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1077 assert(index2D->i[i] == 0); 1078 1079 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1080 } 1081 break; 1082 1083 case TGSI_FILE_TEMPORARY_ARRAY: 1084 for (i = 0; i < QUAD_SIZE; i++) { 1085 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1086 assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS); 1087 1088 chan->u[i] = 1089 mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i]; 1090 } 1091 break; 1092 1093 case TGSI_FILE_IMMEDIATE: 1094 for (i = 0; i < QUAD_SIZE; i++) { 1095 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1096 assert(index2D->i[i] == 0); 1097 1098 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1099 } 1100 break; 1101 1102 case TGSI_FILE_IMMEDIATE_ARRAY: 1103 for (i = 0; i < QUAD_SIZE; i++) { 1104 assert(index2D->i[i] == 0); 1105 1106 chan->f[i] = mach->ImmArray[index->i[i]][swizzle]; 1107 } 1108 break; 1109 1110 case TGSI_FILE_ADDRESS: 1111 for (i = 0; i < QUAD_SIZE; i++) { 1112 assert(index->i[i] >= 0); 1113 assert(index2D->i[i] == 0); 1114 1115 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1116 } 1117 break; 1118 1119 case TGSI_FILE_PREDICATE: 1120 for (i = 0; i < QUAD_SIZE; i++) { 1121 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); 1122 assert(index2D->i[i] == 0); 1123 1124 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; 1125 } 1126 break; 1127 1128 case TGSI_FILE_OUTPUT: 1129 /* vertex/fragment output vars can be read too */ 1130 for (i = 0; i < QUAD_SIZE; i++) { 1131 assert(index->i[i] >= 0); 1132 assert(index2D->i[i] == 0); 1133 1134 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1135 } 1136 break; 1137 1138 default: 1139 assert(0); 1140 for (i = 0; i < QUAD_SIZE; i++) { 1141 chan->u[i] = 0; 1142 } 1143 } 1144} 1145 1146static void 1147fetch_source(const struct tgsi_exec_machine *mach, 1148 union tgsi_exec_channel *chan, 1149 const struct tgsi_full_src_register *reg, 1150 const uint chan_index, 1151 enum tgsi_exec_datatype src_datatype) 1152{ 1153 union tgsi_exec_channel index; 1154 union tgsi_exec_channel index2D; 1155 uint swizzle; 1156 1157 /* We start with a direct index into a register file. 1158 * 1159 * file[1], 1160 * where: 1161 * file = Register.File 1162 * [1] = Register.Index 1163 */ 1164 index.i[0] = 1165 index.i[1] = 1166 index.i[2] = 1167 index.i[3] = reg->Register.Index; 1168 1169 /* There is an extra source register that indirectly subscripts 1170 * a register file. The direct index now becomes an offset 1171 * that is being added to the indirect register. 1172 * 1173 * file[ind[2].x+1], 1174 * where: 1175 * ind = Indirect.File 1176 * [2] = Indirect.Index 1177 * .x = Indirect.SwizzleX 1178 */ 1179 if (reg->Register.Indirect) { 1180 union tgsi_exec_channel index2; 1181 union tgsi_exec_channel indir_index; 1182 const uint execmask = mach->ExecMask; 1183 uint i; 1184 1185 /* which address register (always zero now) */ 1186 index2.i[0] = 1187 index2.i[1] = 1188 index2.i[2] = 1189 index2.i[3] = reg->Indirect.Index; 1190 1191 /* get current value of address register[swizzle] */ 1192 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1193 fetch_src_file_channel(mach, 1194 reg->Indirect.File, 1195 swizzle, 1196 &index2, 1197 &ZeroVec, 1198 &indir_index); 1199 1200 /* add value of address register to the offset */ 1201 index.i[0] += indir_index.i[0]; 1202 index.i[1] += indir_index.i[1]; 1203 index.i[2] += indir_index.i[2]; 1204 index.i[3] += indir_index.i[3]; 1205 1206 /* for disabled execution channels, zero-out the index to 1207 * avoid using a potential garbage value. 1208 */ 1209 for (i = 0; i < QUAD_SIZE; i++) { 1210 if ((execmask & (1 << i)) == 0) 1211 index.i[i] = 0; 1212 } 1213 } 1214 1215 /* There is an extra source register that is a second 1216 * subscript to a register file. Effectively it means that 1217 * the register file is actually a 2D array of registers. 1218 * 1219 * file[3][1], 1220 * where: 1221 * [3] = Dimension.Index 1222 */ 1223 if (reg->Register.Dimension) { 1224 index2D.i[0] = 1225 index2D.i[1] = 1226 index2D.i[2] = 1227 index2D.i[3] = reg->Dimension.Index; 1228 1229 /* Again, the second subscript index can be addressed indirectly 1230 * identically to the first one. 1231 * Nothing stops us from indirectly addressing the indirect register, 1232 * but there is no need for that, so we won't exercise it. 1233 * 1234 * file[ind[4].y+3][1], 1235 * where: 1236 * ind = DimIndirect.File 1237 * [4] = DimIndirect.Index 1238 * .y = DimIndirect.SwizzleX 1239 */ 1240 if (reg->Dimension.Indirect) { 1241 union tgsi_exec_channel index2; 1242 union tgsi_exec_channel indir_index; 1243 const uint execmask = mach->ExecMask; 1244 uint i; 1245 1246 index2.i[0] = 1247 index2.i[1] = 1248 index2.i[2] = 1249 index2.i[3] = reg->DimIndirect.Index; 1250 1251 swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); 1252 fetch_src_file_channel(mach, 1253 reg->DimIndirect.File, 1254 swizzle, 1255 &index2, 1256 &ZeroVec, 1257 &indir_index); 1258 1259 index2D.i[0] += indir_index.i[0]; 1260 index2D.i[1] += indir_index.i[1]; 1261 index2D.i[2] += indir_index.i[2]; 1262 index2D.i[3] += indir_index.i[3]; 1263 1264 /* for disabled execution channels, zero-out the index to 1265 * avoid using a potential garbage value. 1266 */ 1267 for (i = 0; i < QUAD_SIZE; i++) { 1268 if ((execmask & (1 << i)) == 0) { 1269 index2D.i[i] = 0; 1270 } 1271 } 1272 } 1273 1274 /* If by any chance there was a need for a 3D array of register 1275 * files, we would have to check whether Dimension is followed 1276 * by a dimension register and continue the saga. 1277 */ 1278 } else { 1279 index2D.i[0] = 1280 index2D.i[1] = 1281 index2D.i[2] = 1282 index2D.i[3] = 0; 1283 } 1284 1285 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1286 fetch_src_file_channel(mach, 1287 reg->Register.File, 1288 swizzle, 1289 &index, 1290 &index2D, 1291 chan); 1292 1293 if (reg->Register.Absolute) { 1294 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1295 micro_abs(chan, chan); 1296 } else { 1297 micro_iabs(chan, chan); 1298 } 1299 } 1300 1301 if (reg->Register.Negate) { 1302 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1303 micro_neg(chan, chan); 1304 } else { 1305 micro_ineg(chan, chan); 1306 } 1307 } 1308} 1309 1310static void 1311store_dest(struct tgsi_exec_machine *mach, 1312 const union tgsi_exec_channel *chan, 1313 const struct tgsi_full_dst_register *reg, 1314 const struct tgsi_full_instruction *inst, 1315 uint chan_index, 1316 enum tgsi_exec_datatype dst_datatype) 1317{ 1318 uint i; 1319 union tgsi_exec_channel null; 1320 union tgsi_exec_channel *dst; 1321 union tgsi_exec_channel index2D; 1322 uint execmask = mach->ExecMask; 1323 int offset = 0; /* indirection offset */ 1324 int index; 1325 1326 /* for debugging */ 1327 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1328 check_inf_or_nan(chan); 1329 } 1330 1331 /* There is an extra source register that indirectly subscripts 1332 * a register file. The direct index now becomes an offset 1333 * that is being added to the indirect register. 1334 * 1335 * file[ind[2].x+1], 1336 * where: 1337 * ind = Indirect.File 1338 * [2] = Indirect.Index 1339 * .x = Indirect.SwizzleX 1340 */ 1341 if (reg->Register.Indirect) { 1342 union tgsi_exec_channel index; 1343 union tgsi_exec_channel indir_index; 1344 uint swizzle; 1345 1346 /* which address register (always zero for now) */ 1347 index.i[0] = 1348 index.i[1] = 1349 index.i[2] = 1350 index.i[3] = reg->Indirect.Index; 1351 1352 /* get current value of address register[swizzle] */ 1353 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1354 1355 /* fetch values from the address/indirection register */ 1356 fetch_src_file_channel(mach, 1357 reg->Indirect.File, 1358 swizzle, 1359 &index, 1360 &ZeroVec, 1361 &indir_index); 1362 1363 /* save indirection offset */ 1364 offset = indir_index.i[0]; 1365 } 1366 1367 /* There is an extra source register that is a second 1368 * subscript to a register file. Effectively it means that 1369 * the register file is actually a 2D array of registers. 1370 * 1371 * file[3][1], 1372 * where: 1373 * [3] = Dimension.Index 1374 */ 1375 if (reg->Register.Dimension) { 1376 index2D.i[0] = 1377 index2D.i[1] = 1378 index2D.i[2] = 1379 index2D.i[3] = reg->Dimension.Index; 1380 1381 /* Again, the second subscript index can be addressed indirectly 1382 * identically to the first one. 1383 * Nothing stops us from indirectly addressing the indirect register, 1384 * but there is no need for that, so we won't exercise it. 1385 * 1386 * file[ind[4].y+3][1], 1387 * where: 1388 * ind = DimIndirect.File 1389 * [4] = DimIndirect.Index 1390 * .y = DimIndirect.SwizzleX 1391 */ 1392 if (reg->Dimension.Indirect) { 1393 union tgsi_exec_channel index2; 1394 union tgsi_exec_channel indir_index; 1395 const uint execmask = mach->ExecMask; 1396 unsigned swizzle; 1397 uint i; 1398 1399 index2.i[0] = 1400 index2.i[1] = 1401 index2.i[2] = 1402 index2.i[3] = reg->DimIndirect.Index; 1403 1404 swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); 1405 fetch_src_file_channel(mach, 1406 reg->DimIndirect.File, 1407 swizzle, 1408 &index2, 1409 &ZeroVec, 1410 &indir_index); 1411 1412 index2D.i[0] += indir_index.i[0]; 1413 index2D.i[1] += indir_index.i[1]; 1414 index2D.i[2] += indir_index.i[2]; 1415 index2D.i[3] += indir_index.i[3]; 1416 1417 /* for disabled execution channels, zero-out the index to 1418 * avoid using a potential garbage value. 1419 */ 1420 for (i = 0; i < QUAD_SIZE; i++) { 1421 if ((execmask & (1 << i)) == 0) { 1422 index2D.i[i] = 0; 1423 } 1424 } 1425 } 1426 1427 /* If by any chance there was a need for a 3D array of register 1428 * files, we would have to check whether Dimension is followed 1429 * by a dimension register and continue the saga. 1430 */ 1431 } else { 1432 index2D.i[0] = 1433 index2D.i[1] = 1434 index2D.i[2] = 1435 index2D.i[3] = 0; 1436 } 1437 1438 switch (reg->Register.File) { 1439 case TGSI_FILE_NULL: 1440 dst = &null; 1441 break; 1442 1443 case TGSI_FILE_OUTPUT: 1444 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1445 + reg->Register.Index; 1446 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1447#if 0 1448 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1449 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1450 for (i = 0; i < QUAD_SIZE; i++) 1451 if (execmask & (1 << i)) 1452 fprintf(stderr, "%f, ", chan->f[i]); 1453 fprintf(stderr, ")\n"); 1454 } 1455#endif 1456 break; 1457 1458 case TGSI_FILE_TEMPORARY: 1459 index = reg->Register.Index; 1460 assert( index < TGSI_EXEC_NUM_TEMPS ); 1461 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1462 break; 1463 1464 case TGSI_FILE_TEMPORARY_ARRAY: 1465 index = reg->Register.Index; 1466 assert( index < TGSI_EXEC_NUM_TEMPS ); 1467 assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS ); 1468 /* XXX we use index2D.i[0] here but somehow we might 1469 * end up with someone trying to store indirectly in 1470 * different buffers */ 1471 dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index]; 1472 break; 1473 1474 case TGSI_FILE_ADDRESS: 1475 index = reg->Register.Index; 1476 dst = &mach->Addrs[index].xyzw[chan_index]; 1477 break; 1478 1479 case TGSI_FILE_PREDICATE: 1480 index = reg->Register.Index; 1481 assert(index < TGSI_EXEC_NUM_PREDS); 1482 dst = &mach->Predicates[index].xyzw[chan_index]; 1483 break; 1484 1485 default: 1486 assert( 0 ); 1487 return; 1488 } 1489 1490 if (inst->Instruction.Predicate) { 1491 uint swizzle; 1492 union tgsi_exec_channel *pred; 1493 1494 switch (chan_index) { 1495 case CHAN_X: 1496 swizzle = inst->Predicate.SwizzleX; 1497 break; 1498 case CHAN_Y: 1499 swizzle = inst->Predicate.SwizzleY; 1500 break; 1501 case CHAN_Z: 1502 swizzle = inst->Predicate.SwizzleZ; 1503 break; 1504 case CHAN_W: 1505 swizzle = inst->Predicate.SwizzleW; 1506 break; 1507 default: 1508 assert(0); 1509 return; 1510 } 1511 1512 assert(inst->Predicate.Index == 0); 1513 1514 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; 1515 1516 if (inst->Predicate.Negate) { 1517 for (i = 0; i < QUAD_SIZE; i++) { 1518 if (pred->u[i]) { 1519 execmask &= ~(1 << i); 1520 } 1521 } 1522 } else { 1523 for (i = 0; i < QUAD_SIZE; i++) { 1524 if (!pred->u[i]) { 1525 execmask &= ~(1 << i); 1526 } 1527 } 1528 } 1529 } 1530 1531 switch (inst->Instruction.Saturate) { 1532 case TGSI_SAT_NONE: 1533 for (i = 0; i < QUAD_SIZE; i++) 1534 if (execmask & (1 << i)) 1535 dst->i[i] = chan->i[i]; 1536 break; 1537 1538 case TGSI_SAT_ZERO_ONE: 1539 for (i = 0; i < QUAD_SIZE; i++) 1540 if (execmask & (1 << i)) { 1541 if (chan->f[i] < 0.0f) 1542 dst->f[i] = 0.0f; 1543 else if (chan->f[i] > 1.0f) 1544 dst->f[i] = 1.0f; 1545 else 1546 dst->i[i] = chan->i[i]; 1547 } 1548 break; 1549 1550 case TGSI_SAT_MINUS_PLUS_ONE: 1551 for (i = 0; i < QUAD_SIZE; i++) 1552 if (execmask & (1 << i)) { 1553 if (chan->f[i] < -1.0f) 1554 dst->f[i] = -1.0f; 1555 else if (chan->f[i] > 1.0f) 1556 dst->f[i] = 1.0f; 1557 else 1558 dst->i[i] = chan->i[i]; 1559 } 1560 break; 1561 1562 default: 1563 assert( 0 ); 1564 } 1565} 1566 1567#define FETCH(VAL,INDEX,CHAN)\ 1568 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1569 1570#define STORE(VAL,INDEX,CHAN)\ 1571 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) 1572 1573 1574/** 1575 * Execute ARB-style KIL which is predicated by a src register. 1576 * Kill fragment if any of the four values is less than zero. 1577 */ 1578static void 1579exec_kil(struct tgsi_exec_machine *mach, 1580 const struct tgsi_full_instruction *inst) 1581{ 1582 uint uniquemask; 1583 uint chan_index; 1584 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1585 union tgsi_exec_channel r[1]; 1586 1587 /* This mask stores component bits that were already tested. */ 1588 uniquemask = 0; 1589 1590 for (chan_index = 0; chan_index < 4; chan_index++) 1591 { 1592 uint swizzle; 1593 uint i; 1594 1595 /* unswizzle channel */ 1596 swizzle = tgsi_util_get_full_src_register_swizzle ( 1597 &inst->Src[0], 1598 chan_index); 1599 1600 /* check if the component has not been already tested */ 1601 if (uniquemask & (1 << swizzle)) 1602 continue; 1603 uniquemask |= 1 << swizzle; 1604 1605 FETCH(&r[0], 0, chan_index); 1606 for (i = 0; i < 4; i++) 1607 if (r[0].f[i] < 0.0f) 1608 kilmask |= 1 << i; 1609 } 1610 1611 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1612} 1613 1614/** 1615 * Execute NVIDIA-style KIL which is predicated by a condition code. 1616 * Kill fragment if the condition code is TRUE. 1617 */ 1618static void 1619exec_kilp(struct tgsi_exec_machine *mach, 1620 const struct tgsi_full_instruction *inst) 1621{ 1622 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1623 1624 /* "unconditional" kil */ 1625 kilmask = mach->ExecMask; 1626 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1627} 1628 1629static void 1630emit_vertex(struct tgsi_exec_machine *mach) 1631{ 1632 /* FIXME: check for exec mask correctly 1633 unsigned i; 1634 for (i = 0; i < QUAD_SIZE; ++i) { 1635 if ((mach->ExecMask & (1 << i))) 1636 */ 1637 if (mach->ExecMask) { 1638 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 1639 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 1640 } 1641} 1642 1643static void 1644emit_primitive(struct tgsi_exec_machine *mach) 1645{ 1646 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 1647 /* FIXME: check for exec mask correctly 1648 unsigned i; 1649 for (i = 0; i < QUAD_SIZE; ++i) { 1650 if ((mach->ExecMask & (1 << i))) 1651 */ 1652 if (mach->ExecMask) { 1653 ++(*prim_count); 1654 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 1655 mach->Primitives[*prim_count] = 0; 1656 } 1657} 1658 1659static void 1660conditional_emit_primitive(struct tgsi_exec_machine *mach) 1661{ 1662 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1663 int emitted_verts = 1664 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; 1665 if (emitted_verts) { 1666 emit_primitive(mach); 1667 } 1668 } 1669} 1670 1671 1672/* 1673 * Fetch four texture samples using STR texture coordinates. 1674 */ 1675static void 1676fetch_texel( struct tgsi_sampler *sampler, 1677 const union tgsi_exec_channel *s, 1678 const union tgsi_exec_channel *t, 1679 const union tgsi_exec_channel *p, 1680 const union tgsi_exec_channel *c0, 1681 enum tgsi_sampler_control control, 1682 union tgsi_exec_channel *r, 1683 union tgsi_exec_channel *g, 1684 union tgsi_exec_channel *b, 1685 union tgsi_exec_channel *a ) 1686{ 1687 uint j; 1688 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1689 1690 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); 1691 1692 for (j = 0; j < 4; j++) { 1693 r->f[j] = rgba[0][j]; 1694 g->f[j] = rgba[1][j]; 1695 b->f[j] = rgba[2][j]; 1696 a->f[j] = rgba[3][j]; 1697 } 1698} 1699 1700 1701#define TEX_MODIFIER_NONE 0 1702#define TEX_MODIFIER_PROJECTED 1 1703#define TEX_MODIFIER_LOD_BIAS 2 1704#define TEX_MODIFIER_EXPLICIT_LOD 3 1705 1706 1707static void 1708exec_tex(struct tgsi_exec_machine *mach, 1709 const struct tgsi_full_instruction *inst, 1710 uint modifier) 1711{ 1712 const uint unit = inst->Src[1].Register.Index; 1713 union tgsi_exec_channel r[4]; 1714 const union tgsi_exec_channel *lod = &ZeroVec; 1715 enum tgsi_sampler_control control; 1716 uint chan_index; 1717 1718 if (modifier != TEX_MODIFIER_NONE) { 1719 FETCH(&r[3], 0, CHAN_W); 1720 if (modifier != TEX_MODIFIER_PROJECTED) { 1721 lod = &r[3]; 1722 } 1723 } 1724 1725 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 1726 control = tgsi_sampler_lod_explicit; 1727 } else { 1728 control = tgsi_sampler_lod_bias; 1729 } 1730 1731 switch (inst->Texture.Texture) { 1732 case TGSI_TEXTURE_1D: 1733 case TGSI_TEXTURE_SHADOW1D: 1734 FETCH(&r[0], 0, CHAN_X); 1735 1736 if (modifier == TEX_MODIFIER_PROJECTED) { 1737 micro_div(&r[0], &r[0], &r[3]); 1738 } 1739 1740 fetch_texel(mach->Samplers[unit], 1741 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ 1742 control, 1743 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1744 break; 1745 1746 case TGSI_TEXTURE_2D: 1747 case TGSI_TEXTURE_RECT: 1748 case TGSI_TEXTURE_SHADOW2D: 1749 case TGSI_TEXTURE_SHADOWRECT: 1750 FETCH(&r[0], 0, CHAN_X); 1751 FETCH(&r[1], 0, CHAN_Y); 1752 FETCH(&r[2], 0, CHAN_Z); 1753 1754 if (modifier == TEX_MODIFIER_PROJECTED) { 1755 micro_div(&r[0], &r[0], &r[3]); 1756 micro_div(&r[1], &r[1], &r[3]); 1757 micro_div(&r[2], &r[2], &r[3]); 1758 } 1759 1760 fetch_texel(mach->Samplers[unit], 1761 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ 1762 control, 1763 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1764 break; 1765 1766 case TGSI_TEXTURE_3D: 1767 case TGSI_TEXTURE_CUBE: 1768 FETCH(&r[0], 0, CHAN_X); 1769 FETCH(&r[1], 0, CHAN_Y); 1770 FETCH(&r[2], 0, CHAN_Z); 1771 1772 if (modifier == TEX_MODIFIER_PROJECTED) { 1773 micro_div(&r[0], &r[0], &r[3]); 1774 micro_div(&r[1], &r[1], &r[3]); 1775 micro_div(&r[2], &r[2], &r[3]); 1776 } 1777 1778 fetch_texel(mach->Samplers[unit], 1779 &r[0], &r[1], &r[2], lod, 1780 control, 1781 &r[0], &r[1], &r[2], &r[3]); 1782 break; 1783 1784 default: 1785 assert(0); 1786 } 1787 1788 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1789 STORE(&r[chan_index], 0, chan_index); 1790 } 1791} 1792 1793static void 1794exec_txd(struct tgsi_exec_machine *mach, 1795 const struct tgsi_full_instruction *inst) 1796{ 1797 const uint unit = inst->Src[3].Register.Index; 1798 union tgsi_exec_channel r[4]; 1799 uint chan_index; 1800 1801 /* 1802 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1803 */ 1804 1805 switch (inst->Texture.Texture) { 1806 case TGSI_TEXTURE_1D: 1807 case TGSI_TEXTURE_SHADOW1D: 1808 1809 FETCH(&r[0], 0, CHAN_X); 1810 1811 fetch_texel(mach->Samplers[unit], 1812 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ 1813 tgsi_sampler_lod_bias, 1814 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1815 break; 1816 1817 case TGSI_TEXTURE_2D: 1818 case TGSI_TEXTURE_RECT: 1819 case TGSI_TEXTURE_SHADOW2D: 1820 case TGSI_TEXTURE_SHADOWRECT: 1821 1822 FETCH(&r[0], 0, CHAN_X); 1823 FETCH(&r[1], 0, CHAN_Y); 1824 FETCH(&r[2], 0, CHAN_Z); 1825 1826 fetch_texel(mach->Samplers[unit], 1827 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ 1828 tgsi_sampler_lod_bias, 1829 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1830 break; 1831 1832 case TGSI_TEXTURE_3D: 1833 case TGSI_TEXTURE_CUBE: 1834 1835 FETCH(&r[0], 0, CHAN_X); 1836 FETCH(&r[1], 0, CHAN_Y); 1837 FETCH(&r[2], 0, CHAN_Z); 1838 1839 fetch_texel(mach->Samplers[unit], 1840 &r[0], &r[1], &r[2], &ZeroVec, 1841 tgsi_sampler_lod_bias, 1842 &r[0], &r[1], &r[2], &r[3]); 1843 break; 1844 1845 default: 1846 assert(0); 1847 } 1848 1849 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1850 STORE(&r[chan_index], 0, chan_index); 1851 } 1852} 1853 1854 1855/** 1856 * Evaluate a constant-valued coefficient at the position of the 1857 * current quad. 1858 */ 1859static void 1860eval_constant_coef( 1861 struct tgsi_exec_machine *mach, 1862 unsigned attrib, 1863 unsigned chan ) 1864{ 1865 unsigned i; 1866 1867 for( i = 0; i < QUAD_SIZE; i++ ) { 1868 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1869 } 1870} 1871 1872/** 1873 * Evaluate a linear-valued coefficient at the position of the 1874 * current quad. 1875 */ 1876static void 1877eval_linear_coef( 1878 struct tgsi_exec_machine *mach, 1879 unsigned attrib, 1880 unsigned chan ) 1881{ 1882 const float x = mach->QuadPos.xyzw[0].f[0]; 1883 const float y = mach->QuadPos.xyzw[1].f[0]; 1884 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1885 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1886 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1887 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1888 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1889 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1890 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1891} 1892 1893/** 1894 * Evaluate a perspective-valued coefficient at the position of the 1895 * current quad. 1896 */ 1897static void 1898eval_perspective_coef( 1899 struct tgsi_exec_machine *mach, 1900 unsigned attrib, 1901 unsigned chan ) 1902{ 1903 const float x = mach->QuadPos.xyzw[0].f[0]; 1904 const float y = mach->QuadPos.xyzw[1].f[0]; 1905 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1906 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1907 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1908 const float *w = mach->QuadPos.xyzw[3].f; 1909 /* divide by W here */ 1910 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1911 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1912 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1913 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1914} 1915 1916 1917typedef void (* eval_coef_func)( 1918 struct tgsi_exec_machine *mach, 1919 unsigned attrib, 1920 unsigned chan ); 1921 1922static void 1923exec_declaration(struct tgsi_exec_machine *mach, 1924 const struct tgsi_full_declaration *decl) 1925{ 1926 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 1927 if (decl->Declaration.File == TGSI_FILE_INPUT || 1928 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1929 uint first, last, mask; 1930 1931 first = decl->Range.First; 1932 last = decl->Range.Last; 1933 mask = decl->Declaration.UsageMask; 1934 1935 /* XXX we could remove this special-case code since 1936 * mach->InterpCoefs[first].a0 should already have the 1937 * front/back-face value. But we should first update the 1938 * ureg code to emit the right UsageMask value (WRITEMASK_X). 1939 * Then, we could remove the tgsi_exec_machine::Face field. 1940 */ 1941 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 1942 uint i; 1943 1944 assert(decl->Semantic.Index == 0); 1945 assert(first == last); 1946 1947 for (i = 0; i < QUAD_SIZE; i++) { 1948 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 1949 } 1950 } else { 1951 eval_coef_func eval; 1952 uint i, j; 1953 1954 switch (decl->Declaration.Interpolate) { 1955 case TGSI_INTERPOLATE_CONSTANT: 1956 eval = eval_constant_coef; 1957 break; 1958 1959 case TGSI_INTERPOLATE_LINEAR: 1960 eval = eval_linear_coef; 1961 break; 1962 1963 case TGSI_INTERPOLATE_PERSPECTIVE: 1964 eval = eval_perspective_coef; 1965 break; 1966 1967 default: 1968 assert(0); 1969 return; 1970 } 1971 1972 for (j = 0; j < NUM_CHANNELS; j++) { 1973 if (mask & (1 << j)) { 1974 for (i = first; i <= last; i++) { 1975 eval(mach, i, j); 1976 } 1977 } 1978 } 1979 } 1980 } 1981 } 1982} 1983 1984typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 1985 const union tgsi_exec_channel *src); 1986 1987static void 1988exec_scalar_unary(struct tgsi_exec_machine *mach, 1989 const struct tgsi_full_instruction *inst, 1990 micro_unary_op op, 1991 enum tgsi_exec_datatype dst_datatype, 1992 enum tgsi_exec_datatype src_datatype) 1993{ 1994 unsigned int chan; 1995 union tgsi_exec_channel src; 1996 union tgsi_exec_channel dst; 1997 1998 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); 1999 op(&dst, &src); 2000 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2001 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2002 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 2003 } 2004 } 2005} 2006 2007static void 2008exec_vector_unary(struct tgsi_exec_machine *mach, 2009 const struct tgsi_full_instruction *inst, 2010 micro_unary_op op, 2011 enum tgsi_exec_datatype dst_datatype, 2012 enum tgsi_exec_datatype src_datatype) 2013{ 2014 unsigned int chan; 2015 struct tgsi_exec_vector dst; 2016 2017 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2018 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2019 union tgsi_exec_channel src; 2020 2021 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 2022 op(&dst.xyzw[chan], &src); 2023 } 2024 } 2025 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2026 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2027 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2028 } 2029 } 2030} 2031 2032typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 2033 const union tgsi_exec_channel *src0, 2034 const union tgsi_exec_channel *src1); 2035 2036static void 2037exec_vector_binary(struct tgsi_exec_machine *mach, 2038 const struct tgsi_full_instruction *inst, 2039 micro_binary_op op, 2040 enum tgsi_exec_datatype dst_datatype, 2041 enum tgsi_exec_datatype src_datatype) 2042{ 2043 unsigned int chan; 2044 struct tgsi_exec_vector dst; 2045 2046 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2047 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2048 union tgsi_exec_channel src[2]; 2049 2050 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2051 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2052 op(&dst.xyzw[chan], &src[0], &src[1]); 2053 } 2054 } 2055 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2056 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2057 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2058 } 2059 } 2060} 2061 2062typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 2063 const union tgsi_exec_channel *src0, 2064 const union tgsi_exec_channel *src1, 2065 const union tgsi_exec_channel *src2); 2066 2067static void 2068exec_vector_trinary(struct tgsi_exec_machine *mach, 2069 const struct tgsi_full_instruction *inst, 2070 micro_trinary_op op, 2071 enum tgsi_exec_datatype dst_datatype, 2072 enum tgsi_exec_datatype src_datatype) 2073{ 2074 unsigned int chan; 2075 struct tgsi_exec_vector dst; 2076 2077 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2078 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2079 union tgsi_exec_channel src[3]; 2080 2081 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2082 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2083 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 2084 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 2085 } 2086 } 2087 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2088 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2089 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2090 } 2091 } 2092} 2093 2094static void 2095exec_dp3(struct tgsi_exec_machine *mach, 2096 const struct tgsi_full_instruction *inst) 2097{ 2098 unsigned int chan; 2099 union tgsi_exec_channel arg[3]; 2100 2101 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2102 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2103 micro_mul(&arg[2], &arg[0], &arg[1]); 2104 2105 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { 2106 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2107 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 2108 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2109 } 2110 2111 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2112 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2113 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2114 } 2115 } 2116} 2117 2118static void 2119exec_dp4(struct tgsi_exec_machine *mach, 2120 const struct tgsi_full_instruction *inst) 2121{ 2122 unsigned int chan; 2123 union tgsi_exec_channel arg[3]; 2124 2125 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2126 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2127 micro_mul(&arg[2], &arg[0], &arg[1]); 2128 2129 for (chan = CHAN_Y; chan <= CHAN_W; chan++) { 2130 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2131 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 2132 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2133 } 2134 2135 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2136 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2137 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2138 } 2139 } 2140} 2141 2142static void 2143exec_dp2a(struct tgsi_exec_machine *mach, 2144 const struct tgsi_full_instruction *inst) 2145{ 2146 unsigned int chan; 2147 union tgsi_exec_channel arg[3]; 2148 2149 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2150 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2151 micro_mul(&arg[2], &arg[0], &arg[1]); 2152 2153 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2154 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2155 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 2156 2157 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2158 micro_add(&arg[0], &arg[0], &arg[1]); 2159 2160 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2161 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2162 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2163 } 2164 } 2165} 2166 2167static void 2168exec_dph(struct tgsi_exec_machine *mach, 2169 const struct tgsi_full_instruction *inst) 2170{ 2171 unsigned int chan; 2172 union tgsi_exec_channel arg[3]; 2173 2174 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2175 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2176 micro_mul(&arg[2], &arg[0], &arg[1]); 2177 2178 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2179 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2180 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2181 2182 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2183 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2184 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 2185 2186 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); 2187 micro_add(&arg[0], &arg[0], &arg[1]); 2188 2189 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2190 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2191 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2192 } 2193 } 2194} 2195 2196static void 2197exec_dp2(struct tgsi_exec_machine *mach, 2198 const struct tgsi_full_instruction *inst) 2199{ 2200 unsigned int chan; 2201 union tgsi_exec_channel arg[3]; 2202 2203 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2204 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2205 micro_mul(&arg[2], &arg[0], &arg[1]); 2206 2207 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2208 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2209 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2210 2211 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2212 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2213 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2214 } 2215 } 2216} 2217 2218static void 2219exec_nrm4(struct tgsi_exec_machine *mach, 2220 const struct tgsi_full_instruction *inst) 2221{ 2222 unsigned int chan; 2223 union tgsi_exec_channel arg[4]; 2224 union tgsi_exec_channel scale; 2225 2226 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2227 micro_mul(&scale, &arg[0], &arg[0]); 2228 2229 for (chan = CHAN_Y; chan <= CHAN_W; chan++) { 2230 union tgsi_exec_channel product; 2231 2232 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2233 micro_mul(&product, &arg[chan], &arg[chan]); 2234 micro_add(&scale, &scale, &product); 2235 } 2236 2237 micro_rsq(&scale, &scale); 2238 2239 for (chan = CHAN_X; chan <= CHAN_W; chan++) { 2240 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2241 micro_mul(&arg[chan], &arg[chan], &scale); 2242 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2243 } 2244 } 2245} 2246 2247static void 2248exec_nrm3(struct tgsi_exec_machine *mach, 2249 const struct tgsi_full_instruction *inst) 2250{ 2251 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 2252 unsigned int chan; 2253 union tgsi_exec_channel arg[3]; 2254 union tgsi_exec_channel scale; 2255 2256 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2257 micro_mul(&scale, &arg[0], &arg[0]); 2258 2259 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { 2260 union tgsi_exec_channel product; 2261 2262 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2263 micro_mul(&product, &arg[chan], &arg[chan]); 2264 micro_add(&scale, &scale, &product); 2265 } 2266 2267 micro_rsq(&scale, &scale); 2268 2269 for (chan = CHAN_X; chan <= CHAN_Z; chan++) { 2270 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2271 micro_mul(&arg[chan], &arg[chan], &scale); 2272 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2273 } 2274 } 2275 } 2276 2277 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2278 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2279 } 2280} 2281 2282static void 2283exec_break(struct tgsi_exec_machine *mach) 2284{ 2285 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 2286 /* turn off loop channels for each enabled exec channel */ 2287 mach->LoopMask &= ~mach->ExecMask; 2288 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2289 UPDATE_EXEC_MASK(mach); 2290 } else { 2291 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 2292 2293 mach->Switch.mask = 0x0; 2294 2295 UPDATE_EXEC_MASK(mach); 2296 } 2297} 2298 2299static void 2300exec_switch(struct tgsi_exec_machine *mach, 2301 const struct tgsi_full_instruction *inst) 2302{ 2303 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 2304 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 2305 2306 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 2307 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); 2308 mach->Switch.mask = 0x0; 2309 mach->Switch.defaultMask = 0x0; 2310 2311 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 2312 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 2313 2314 UPDATE_EXEC_MASK(mach); 2315} 2316 2317static void 2318exec_case(struct tgsi_exec_machine *mach, 2319 const struct tgsi_full_instruction *inst) 2320{ 2321 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 2322 union tgsi_exec_channel src; 2323 uint mask = 0; 2324 2325 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); 2326 2327 if (mach->Switch.selector.u[0] == src.u[0]) { 2328 mask |= 0x1; 2329 } 2330 if (mach->Switch.selector.u[1] == src.u[1]) { 2331 mask |= 0x2; 2332 } 2333 if (mach->Switch.selector.u[2] == src.u[2]) { 2334 mask |= 0x4; 2335 } 2336 if (mach->Switch.selector.u[3] == src.u[3]) { 2337 mask |= 0x8; 2338 } 2339 2340 mach->Switch.defaultMask |= mask; 2341 2342 mach->Switch.mask |= mask & prevMask; 2343 2344 UPDATE_EXEC_MASK(mach); 2345} 2346 2347static void 2348exec_default(struct tgsi_exec_machine *mach) 2349{ 2350 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 2351 2352 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 2353 2354 UPDATE_EXEC_MASK(mach); 2355} 2356 2357static void 2358exec_endswitch(struct tgsi_exec_machine *mach) 2359{ 2360 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 2361 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 2362 2363 UPDATE_EXEC_MASK(mach); 2364} 2365 2366static void 2367micro_i2f(union tgsi_exec_channel *dst, 2368 const union tgsi_exec_channel *src) 2369{ 2370 dst->f[0] = (float)src->i[0]; 2371 dst->f[1] = (float)src->i[1]; 2372 dst->f[2] = (float)src->i[2]; 2373 dst->f[3] = (float)src->i[3]; 2374} 2375 2376static void 2377micro_not(union tgsi_exec_channel *dst, 2378 const union tgsi_exec_channel *src) 2379{ 2380 dst->u[0] = ~src->u[0]; 2381 dst->u[1] = ~src->u[1]; 2382 dst->u[2] = ~src->u[2]; 2383 dst->u[3] = ~src->u[3]; 2384} 2385 2386static void 2387micro_shl(union tgsi_exec_channel *dst, 2388 const union tgsi_exec_channel *src0, 2389 const union tgsi_exec_channel *src1) 2390{ 2391 dst->u[0] = src0->u[0] << src1->u[0]; 2392 dst->u[1] = src0->u[1] << src1->u[1]; 2393 dst->u[2] = src0->u[2] << src1->u[2]; 2394 dst->u[3] = src0->u[3] << src1->u[3]; 2395} 2396 2397static void 2398micro_and(union tgsi_exec_channel *dst, 2399 const union tgsi_exec_channel *src0, 2400 const union tgsi_exec_channel *src1) 2401{ 2402 dst->u[0] = src0->u[0] & src1->u[0]; 2403 dst->u[1] = src0->u[1] & src1->u[1]; 2404 dst->u[2] = src0->u[2] & src1->u[2]; 2405 dst->u[3] = src0->u[3] & src1->u[3]; 2406} 2407 2408static void 2409micro_or(union tgsi_exec_channel *dst, 2410 const union tgsi_exec_channel *src0, 2411 const union tgsi_exec_channel *src1) 2412{ 2413 dst->u[0] = src0->u[0] | src1->u[0]; 2414 dst->u[1] = src0->u[1] | src1->u[1]; 2415 dst->u[2] = src0->u[2] | src1->u[2]; 2416 dst->u[3] = src0->u[3] | src1->u[3]; 2417} 2418 2419static void 2420micro_xor(union tgsi_exec_channel *dst, 2421 const union tgsi_exec_channel *src0, 2422 const union tgsi_exec_channel *src1) 2423{ 2424 dst->u[0] = src0->u[0] ^ src1->u[0]; 2425 dst->u[1] = src0->u[1] ^ src1->u[1]; 2426 dst->u[2] = src0->u[2] ^ src1->u[2]; 2427 dst->u[3] = src0->u[3] ^ src1->u[3]; 2428} 2429 2430static void 2431micro_f2i(union tgsi_exec_channel *dst, 2432 const union tgsi_exec_channel *src) 2433{ 2434 dst->i[0] = (int)src->f[0]; 2435 dst->i[1] = (int)src->f[1]; 2436 dst->i[2] = (int)src->f[2]; 2437 dst->i[3] = (int)src->f[3]; 2438} 2439 2440static void 2441micro_idiv(union tgsi_exec_channel *dst, 2442 const union tgsi_exec_channel *src0, 2443 const union tgsi_exec_channel *src1) 2444{ 2445 dst->i[0] = src0->i[0] / src1->i[0]; 2446 dst->i[1] = src0->i[1] / src1->i[1]; 2447 dst->i[2] = src0->i[2] / src1->i[2]; 2448 dst->i[3] = src0->i[3] / src1->i[3]; 2449} 2450 2451static void 2452micro_imax(union tgsi_exec_channel *dst, 2453 const union tgsi_exec_channel *src0, 2454 const union tgsi_exec_channel *src1) 2455{ 2456 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 2457 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 2458 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 2459 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 2460} 2461 2462static void 2463micro_imin(union tgsi_exec_channel *dst, 2464 const union tgsi_exec_channel *src0, 2465 const union tgsi_exec_channel *src1) 2466{ 2467 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 2468 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 2469 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 2470 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 2471} 2472 2473static void 2474micro_isge(union tgsi_exec_channel *dst, 2475 const union tgsi_exec_channel *src0, 2476 const union tgsi_exec_channel *src1) 2477{ 2478 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 2479 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 2480 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 2481 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 2482} 2483 2484static void 2485micro_ishr(union tgsi_exec_channel *dst, 2486 const union tgsi_exec_channel *src0, 2487 const union tgsi_exec_channel *src1) 2488{ 2489 dst->i[0] = src0->i[0] >> src1->i[0]; 2490 dst->i[1] = src0->i[1] >> src1->i[1]; 2491 dst->i[2] = src0->i[2] >> src1->i[2]; 2492 dst->i[3] = src0->i[3] >> src1->i[3]; 2493} 2494 2495static void 2496micro_islt(union tgsi_exec_channel *dst, 2497 const union tgsi_exec_channel *src0, 2498 const union tgsi_exec_channel *src1) 2499{ 2500 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 2501 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 2502 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 2503 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 2504} 2505 2506static void 2507micro_f2u(union tgsi_exec_channel *dst, 2508 const union tgsi_exec_channel *src) 2509{ 2510 dst->u[0] = (uint)src->f[0]; 2511 dst->u[1] = (uint)src->f[1]; 2512 dst->u[2] = (uint)src->f[2]; 2513 dst->u[3] = (uint)src->f[3]; 2514} 2515 2516static void 2517micro_u2f(union tgsi_exec_channel *dst, 2518 const union tgsi_exec_channel *src) 2519{ 2520 dst->f[0] = (float)src->u[0]; 2521 dst->f[1] = (float)src->u[1]; 2522 dst->f[2] = (float)src->u[2]; 2523 dst->f[3] = (float)src->u[3]; 2524} 2525 2526static void 2527micro_uadd(union tgsi_exec_channel *dst, 2528 const union tgsi_exec_channel *src0, 2529 const union tgsi_exec_channel *src1) 2530{ 2531 dst->u[0] = src0->u[0] + src1->u[0]; 2532 dst->u[1] = src0->u[1] + src1->u[1]; 2533 dst->u[2] = src0->u[2] + src1->u[2]; 2534 dst->u[3] = src0->u[3] + src1->u[3]; 2535} 2536 2537static void 2538micro_udiv(union tgsi_exec_channel *dst, 2539 const union tgsi_exec_channel *src0, 2540 const union tgsi_exec_channel *src1) 2541{ 2542 dst->u[0] = src0->u[0] / src1->u[0]; 2543 dst->u[1] = src0->u[1] / src1->u[1]; 2544 dst->u[2] = src0->u[2] / src1->u[2]; 2545 dst->u[3] = src0->u[3] / src1->u[3]; 2546} 2547 2548static void 2549micro_umad(union tgsi_exec_channel *dst, 2550 const union tgsi_exec_channel *src0, 2551 const union tgsi_exec_channel *src1, 2552 const union tgsi_exec_channel *src2) 2553{ 2554 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 2555 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 2556 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 2557 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 2558} 2559 2560static void 2561micro_umax(union tgsi_exec_channel *dst, 2562 const union tgsi_exec_channel *src0, 2563 const union tgsi_exec_channel *src1) 2564{ 2565 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 2566 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 2567 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 2568 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 2569} 2570 2571static void 2572micro_umin(union tgsi_exec_channel *dst, 2573 const union tgsi_exec_channel *src0, 2574 const union tgsi_exec_channel *src1) 2575{ 2576 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 2577 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 2578 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 2579 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 2580} 2581 2582static void 2583micro_umod(union tgsi_exec_channel *dst, 2584 const union tgsi_exec_channel *src0, 2585 const union tgsi_exec_channel *src1) 2586{ 2587 dst->u[0] = src0->u[0] % src1->u[0]; 2588 dst->u[1] = src0->u[1] % src1->u[1]; 2589 dst->u[2] = src0->u[2] % src1->u[2]; 2590 dst->u[3] = src0->u[3] % src1->u[3]; 2591} 2592 2593static void 2594micro_umul(union tgsi_exec_channel *dst, 2595 const union tgsi_exec_channel *src0, 2596 const union tgsi_exec_channel *src1) 2597{ 2598 dst->u[0] = src0->u[0] * src1->u[0]; 2599 dst->u[1] = src0->u[1] * src1->u[1]; 2600 dst->u[2] = src0->u[2] * src1->u[2]; 2601 dst->u[3] = src0->u[3] * src1->u[3]; 2602} 2603 2604static void 2605micro_useq(union tgsi_exec_channel *dst, 2606 const union tgsi_exec_channel *src0, 2607 const union tgsi_exec_channel *src1) 2608{ 2609 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 2610 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 2611 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 2612 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 2613} 2614 2615static void 2616micro_usge(union tgsi_exec_channel *dst, 2617 const union tgsi_exec_channel *src0, 2618 const union tgsi_exec_channel *src1) 2619{ 2620 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 2621 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 2622 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 2623 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 2624} 2625 2626static void 2627micro_ushr(union tgsi_exec_channel *dst, 2628 const union tgsi_exec_channel *src0, 2629 const union tgsi_exec_channel *src1) 2630{ 2631 dst->u[0] = src0->u[0] >> src1->u[0]; 2632 dst->u[1] = src0->u[1] >> src1->u[1]; 2633 dst->u[2] = src0->u[2] >> src1->u[2]; 2634 dst->u[3] = src0->u[3] >> src1->u[3]; 2635} 2636 2637static void 2638micro_uslt(union tgsi_exec_channel *dst, 2639 const union tgsi_exec_channel *src0, 2640 const union tgsi_exec_channel *src1) 2641{ 2642 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 2643 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 2644 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 2645 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 2646} 2647 2648static void 2649micro_usne(union tgsi_exec_channel *dst, 2650 const union tgsi_exec_channel *src0, 2651 const union tgsi_exec_channel *src1) 2652{ 2653 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 2654 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 2655 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 2656 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 2657} 2658 2659static void 2660exec_instruction( 2661 struct tgsi_exec_machine *mach, 2662 const struct tgsi_full_instruction *inst, 2663 int *pc ) 2664{ 2665 uint chan_index; 2666 union tgsi_exec_channel r[10]; 2667 union tgsi_exec_channel d[8]; 2668 2669 (*pc)++; 2670 2671 switch (inst->Instruction.Opcode) { 2672 case TGSI_OPCODE_ARL: 2673 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 2674 break; 2675 2676 case TGSI_OPCODE_MOV: 2677 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 2678 break; 2679 2680 case TGSI_OPCODE_LIT: 2681 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2682 FETCH( &r[0], 0, CHAN_X ); 2683 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2684 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2685 } 2686 2687 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2688 FETCH( &r[1], 0, CHAN_Y ); 2689 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2690 2691 FETCH( &r[2], 0, CHAN_W ); 2692 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2693 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2694 micro_pow( &r[1], &r[1], &r[2] ); 2695 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2696 } 2697 2698 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2699 STORE(&d[CHAN_Y], 0, CHAN_Y); 2700 } 2701 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2702 STORE(&d[CHAN_Z], 0, CHAN_Z); 2703 } 2704 } 2705 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2706 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2707 } 2708 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2709 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2710 } 2711 break; 2712 2713 case TGSI_OPCODE_RCP: 2714 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2715 break; 2716 2717 case TGSI_OPCODE_RSQ: 2718 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2719 break; 2720 2721 case TGSI_OPCODE_EXP: 2722 FETCH( &r[0], 0, CHAN_X ); 2723 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2724 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2725 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2726 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2727 } 2728 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2729 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2730 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2731 } 2732 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2733 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2734 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2735 } 2736 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2737 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2738 } 2739 break; 2740 2741 case TGSI_OPCODE_LOG: 2742 FETCH( &r[0], 0, CHAN_X ); 2743 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2744 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2745 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2746 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2747 STORE( &r[0], 0, CHAN_X ); 2748 } 2749 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2750 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2751 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2752 STORE( &r[0], 0, CHAN_Y ); 2753 } 2754 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2755 STORE( &r[1], 0, CHAN_Z ); 2756 } 2757 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2758 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2759 } 2760 break; 2761 2762 case TGSI_OPCODE_MUL: 2763 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2764 break; 2765 2766 case TGSI_OPCODE_ADD: 2767 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2768 break; 2769 2770 case TGSI_OPCODE_DP3: 2771 exec_dp3(mach, inst); 2772 break; 2773 2774 case TGSI_OPCODE_DP4: 2775 exec_dp4(mach, inst); 2776 break; 2777 2778 case TGSI_OPCODE_DST: 2779 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2780 FETCH( &r[0], 0, CHAN_Y ); 2781 FETCH( &r[1], 1, CHAN_Y); 2782 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2783 } 2784 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2785 FETCH(&d[CHAN_Z], 0, CHAN_Z); 2786 } 2787 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2788 FETCH(&d[CHAN_W], 1, CHAN_W); 2789 } 2790 2791 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2792 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); 2793 } 2794 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2795 STORE(&d[CHAN_Y], 0, CHAN_Y); 2796 } 2797 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2798 STORE(&d[CHAN_Z], 0, CHAN_Z); 2799 } 2800 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2801 STORE(&d[CHAN_W], 0, CHAN_W); 2802 } 2803 break; 2804 2805 case TGSI_OPCODE_MIN: 2806 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2807 break; 2808 2809 case TGSI_OPCODE_MAX: 2810 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2811 break; 2812 2813 case TGSI_OPCODE_SLT: 2814 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2815 break; 2816 2817 case TGSI_OPCODE_SGE: 2818 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2819 break; 2820 2821 case TGSI_OPCODE_MAD: 2822 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2823 break; 2824 2825 case TGSI_OPCODE_SUB: 2826 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2827 break; 2828 2829 case TGSI_OPCODE_LRP: 2830 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2831 break; 2832 2833 case TGSI_OPCODE_CND: 2834 exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2835 break; 2836 2837 case TGSI_OPCODE_DP2A: 2838 exec_dp2a(mach, inst); 2839 break; 2840 2841 case TGSI_OPCODE_FRC: 2842 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2843 break; 2844 2845 case TGSI_OPCODE_CLAMP: 2846 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2847 break; 2848 2849 case TGSI_OPCODE_FLR: 2850 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2851 break; 2852 2853 case TGSI_OPCODE_ROUND: 2854 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2855 break; 2856 2857 case TGSI_OPCODE_EX2: 2858 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2859 break; 2860 2861 case TGSI_OPCODE_LG2: 2862 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2863 break; 2864 2865 case TGSI_OPCODE_POW: 2866 FETCH(&r[0], 0, CHAN_X); 2867 FETCH(&r[1], 1, CHAN_X); 2868 2869 micro_pow( &r[0], &r[0], &r[1] ); 2870 2871 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2872 STORE( &r[0], 0, chan_index ); 2873 } 2874 break; 2875 2876 case TGSI_OPCODE_XPD: 2877 FETCH(&r[0], 0, CHAN_Y); 2878 FETCH(&r[1], 1, CHAN_Z); 2879 2880 micro_mul( &r[2], &r[0], &r[1] ); 2881 2882 FETCH(&r[3], 0, CHAN_Z); 2883 FETCH(&r[4], 1, CHAN_Y); 2884 2885 micro_mul( &r[5], &r[3], &r[4] ); 2886 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2887 2888 FETCH(&r[2], 1, CHAN_X); 2889 2890 micro_mul( &r[3], &r[3], &r[2] ); 2891 2892 FETCH(&r[5], 0, CHAN_X); 2893 2894 micro_mul( &r[1], &r[1], &r[5] ); 2895 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2896 2897 micro_mul( &r[5], &r[5], &r[4] ); 2898 micro_mul( &r[0], &r[0], &r[2] ); 2899 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2900 2901 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2902 STORE(&d[CHAN_X], 0, CHAN_X); 2903 } 2904 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2905 STORE(&d[CHAN_Y], 0, CHAN_Y); 2906 } 2907 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2908 STORE(&d[CHAN_Z], 0, CHAN_Z); 2909 } 2910 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2911 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2912 } 2913 break; 2914 2915 case TGSI_OPCODE_ABS: 2916 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2917 break; 2918 2919 case TGSI_OPCODE_RCC: 2920 FETCH(&r[0], 0, CHAN_X); 2921 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2922 micro_float_clamp(&r[0], &r[0]); 2923 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2924 STORE(&r[0], 0, chan_index); 2925 } 2926 break; 2927 2928 case TGSI_OPCODE_DPH: 2929 exec_dph(mach, inst); 2930 break; 2931 2932 case TGSI_OPCODE_COS: 2933 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2934 break; 2935 2936 case TGSI_OPCODE_DDX: 2937 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2938 break; 2939 2940 case TGSI_OPCODE_DDY: 2941 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2942 break; 2943 2944 case TGSI_OPCODE_KILP: 2945 exec_kilp (mach, inst); 2946 break; 2947 2948 case TGSI_OPCODE_KIL: 2949 exec_kil (mach, inst); 2950 break; 2951 2952 case TGSI_OPCODE_PK2H: 2953 assert (0); 2954 break; 2955 2956 case TGSI_OPCODE_PK2US: 2957 assert (0); 2958 break; 2959 2960 case TGSI_OPCODE_PK4B: 2961 assert (0); 2962 break; 2963 2964 case TGSI_OPCODE_PK4UB: 2965 assert (0); 2966 break; 2967 2968 case TGSI_OPCODE_RFL: 2969 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2970 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2971 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2972 /* r0 = dp3(src0, src0) */ 2973 FETCH(&r[2], 0, CHAN_X); 2974 micro_mul(&r[0], &r[2], &r[2]); 2975 FETCH(&r[4], 0, CHAN_Y); 2976 micro_mul(&r[8], &r[4], &r[4]); 2977 micro_add(&r[0], &r[0], &r[8]); 2978 FETCH(&r[6], 0, CHAN_Z); 2979 micro_mul(&r[8], &r[6], &r[6]); 2980 micro_add(&r[0], &r[0], &r[8]); 2981 2982 /* r1 = dp3(src0, src1) */ 2983 FETCH(&r[3], 1, CHAN_X); 2984 micro_mul(&r[1], &r[2], &r[3]); 2985 FETCH(&r[5], 1, CHAN_Y); 2986 micro_mul(&r[8], &r[4], &r[5]); 2987 micro_add(&r[1], &r[1], &r[8]); 2988 FETCH(&r[7], 1, CHAN_Z); 2989 micro_mul(&r[8], &r[6], &r[7]); 2990 micro_add(&r[1], &r[1], &r[8]); 2991 2992 /* r1 = 2 * r1 / r0 */ 2993 micro_add(&r[1], &r[1], &r[1]); 2994 micro_div(&r[1], &r[1], &r[0]); 2995 2996 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2997 micro_mul(&r[2], &r[2], &r[1]); 2998 micro_sub(&r[2], &r[2], &r[3]); 2999 STORE(&r[2], 0, CHAN_X); 3000 } 3001 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 3002 micro_mul(&r[4], &r[4], &r[1]); 3003 micro_sub(&r[4], &r[4], &r[5]); 3004 STORE(&r[4], 0, CHAN_Y); 3005 } 3006 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 3007 micro_mul(&r[6], &r[6], &r[1]); 3008 micro_sub(&r[6], &r[6], &r[7]); 3009 STORE(&r[6], 0, CHAN_Z); 3010 } 3011 } 3012 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 3013 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 3014 } 3015 break; 3016 3017 case TGSI_OPCODE_SEQ: 3018 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3019 break; 3020 3021 case TGSI_OPCODE_SFL: 3022 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3023 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 3024 } 3025 break; 3026 3027 case TGSI_OPCODE_SGT: 3028 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3029 break; 3030 3031 case TGSI_OPCODE_SIN: 3032 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3033 break; 3034 3035 case TGSI_OPCODE_SLE: 3036 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3037 break; 3038 3039 case TGSI_OPCODE_SNE: 3040 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3041 break; 3042 3043 case TGSI_OPCODE_STR: 3044 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 3045 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 3046 } 3047 break; 3048 3049 case TGSI_OPCODE_TEX: 3050 /* simple texture lookup */ 3051 /* src[0] = texcoord */ 3052 /* src[1] = sampler unit */ 3053 exec_tex(mach, inst, TEX_MODIFIER_NONE); 3054 break; 3055 3056 case TGSI_OPCODE_TXB: 3057 /* Texture lookup with lod bias */ 3058 /* src[0] = texcoord (src[0].w = LOD bias) */ 3059 /* src[1] = sampler unit */ 3060 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); 3061 break; 3062 3063 case TGSI_OPCODE_TXD: 3064 /* Texture lookup with explict partial derivatives */ 3065 /* src[0] = texcoord */ 3066 /* src[1] = d[strq]/dx */ 3067 /* src[2] = d[strq]/dy */ 3068 /* src[3] = sampler unit */ 3069 exec_txd(mach, inst); 3070 break; 3071 3072 case TGSI_OPCODE_TXL: 3073 /* Texture lookup with explit LOD */ 3074 /* src[0] = texcoord (src[0].w = LOD) */ 3075 /* src[1] = sampler unit */ 3076 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); 3077 break; 3078 3079 case TGSI_OPCODE_TXP: 3080 /* Texture lookup with projection */ 3081 /* src[0] = texcoord (src[0].w = projection) */ 3082 /* src[1] = sampler unit */ 3083 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); 3084 break; 3085 3086 case TGSI_OPCODE_UP2H: 3087 assert (0); 3088 break; 3089 3090 case TGSI_OPCODE_UP2US: 3091 assert (0); 3092 break; 3093 3094 case TGSI_OPCODE_UP4B: 3095 assert (0); 3096 break; 3097 3098 case TGSI_OPCODE_UP4UB: 3099 assert (0); 3100 break; 3101 3102 case TGSI_OPCODE_X2D: 3103 FETCH(&r[0], 1, CHAN_X); 3104 FETCH(&r[1], 1, CHAN_Y); 3105 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 3106 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 3107 FETCH(&r[2], 2, CHAN_X); 3108 micro_mul(&r[2], &r[2], &r[0]); 3109 FETCH(&r[3], 2, CHAN_Y); 3110 micro_mul(&r[3], &r[3], &r[1]); 3111 micro_add(&r[2], &r[2], &r[3]); 3112 FETCH(&r[3], 0, CHAN_X); 3113 micro_add(&d[CHAN_X], &r[2], &r[3]); 3114 3115 } 3116 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 3117 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 3118 FETCH(&r[2], 2, CHAN_Z); 3119 micro_mul(&r[2], &r[2], &r[0]); 3120 FETCH(&r[3], 2, CHAN_W); 3121 micro_mul(&r[3], &r[3], &r[1]); 3122 micro_add(&r[2], &r[2], &r[3]); 3123 FETCH(&r[3], 0, CHAN_Y); 3124 micro_add(&d[CHAN_Y], &r[2], &r[3]); 3125 3126 } 3127 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 3128 STORE(&d[CHAN_X], 0, CHAN_X); 3129 } 3130 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 3131 STORE(&d[CHAN_Y], 0, CHAN_Y); 3132 } 3133 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 3134 STORE(&d[CHAN_X], 0, CHAN_Z); 3135 } 3136 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 3137 STORE(&d[CHAN_Y], 0, CHAN_W); 3138 } 3139 break; 3140 3141 case TGSI_OPCODE_ARA: 3142 assert (0); 3143 break; 3144 3145 case TGSI_OPCODE_ARR: 3146 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 3147 break; 3148 3149 case TGSI_OPCODE_BRA: 3150 assert (0); 3151 break; 3152 3153 case TGSI_OPCODE_CAL: 3154 /* skip the call if no execution channels are enabled */ 3155 if (mach->ExecMask) { 3156 /* do the call */ 3157 3158 /* First, record the depths of the execution stacks. 3159 * This is important for deeply nested/looped return statements. 3160 * We have to unwind the stacks by the correct amount. For a 3161 * real code generator, we could determine the number of entries 3162 * to pop off each stack with simple static analysis and avoid 3163 * implementing this data structure at run time. 3164 */ 3165 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 3166 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 3167 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 3168 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 3169 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 3170 /* note that PC was already incremented above */ 3171 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 3172 3173 mach->CallStackTop++; 3174 3175 /* Second, push the Cond, Loop, Cont, Func stacks */ 3176 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 3177 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3178 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3179 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3180 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3181 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 3182 3183 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 3184 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3185 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3186 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3187 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3188 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 3189 3190 /* Finally, jump to the subroutine */ 3191 *pc = inst->Label.Label; 3192 } 3193 break; 3194 3195 case TGSI_OPCODE_RET: 3196 mach->FuncMask &= ~mach->ExecMask; 3197 UPDATE_EXEC_MASK(mach); 3198 3199 if (mach->FuncMask == 0x0) { 3200 /* really return now (otherwise, keep executing */ 3201 3202 if (mach->CallStackTop == 0) { 3203 /* returning from main() */ 3204 *pc = -1; 3205 return; 3206 } 3207 3208 assert(mach->CallStackTop > 0); 3209 mach->CallStackTop--; 3210 3211 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3212 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3213 3214 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3215 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3216 3217 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3218 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3219 3220 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 3221 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 3222 3223 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 3224 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 3225 3226 assert(mach->FuncStackTop > 0); 3227 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3228 3229 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3230 3231 UPDATE_EXEC_MASK(mach); 3232 } 3233 break; 3234 3235 case TGSI_OPCODE_SSG: 3236 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3237 break; 3238 3239 case TGSI_OPCODE_CMP: 3240 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3241 break; 3242 3243 case TGSI_OPCODE_SCS: 3244 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 3245 FETCH( &r[0], 0, CHAN_X ); 3246 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 3247 micro_cos(&r[1], &r[0]); 3248 STORE(&r[1], 0, CHAN_X); 3249 } 3250 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 3251 micro_sin(&r[1], &r[0]); 3252 STORE(&r[1], 0, CHAN_Y); 3253 } 3254 } 3255 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 3256 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 3257 } 3258 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 3259 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 3260 } 3261 break; 3262 3263 case TGSI_OPCODE_NRM: 3264 exec_nrm3(mach, inst); 3265 break; 3266 3267 case TGSI_OPCODE_NRM4: 3268 exec_nrm4(mach, inst); 3269 break; 3270 3271 case TGSI_OPCODE_DIV: 3272 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3273 break; 3274 3275 case TGSI_OPCODE_DP2: 3276 exec_dp2(mach, inst); 3277 break; 3278 3279 case TGSI_OPCODE_IF: 3280 /* push CondMask */ 3281 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 3282 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 3283 FETCH( &r[0], 0, CHAN_X ); 3284 /* update CondMask */ 3285 if( ! r[0].u[0] ) { 3286 mach->CondMask &= ~0x1; 3287 } 3288 if( ! r[0].u[1] ) { 3289 mach->CondMask &= ~0x2; 3290 } 3291 if( ! r[0].u[2] ) { 3292 mach->CondMask &= ~0x4; 3293 } 3294 if( ! r[0].u[3] ) { 3295 mach->CondMask &= ~0x8; 3296 } 3297 UPDATE_EXEC_MASK(mach); 3298 /* Todo: If CondMask==0, jump to ELSE */ 3299 break; 3300 3301 case TGSI_OPCODE_ELSE: 3302 /* invert CondMask wrt previous mask */ 3303 { 3304 uint prevMask; 3305 assert(mach->CondStackTop > 0); 3306 prevMask = mach->CondStack[mach->CondStackTop - 1]; 3307 mach->CondMask = ~mach->CondMask & prevMask; 3308 UPDATE_EXEC_MASK(mach); 3309 /* Todo: If CondMask==0, jump to ENDIF */ 3310 } 3311 break; 3312 3313 case TGSI_OPCODE_ENDIF: 3314 /* pop CondMask */ 3315 assert(mach->CondStackTop > 0); 3316 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 3317 UPDATE_EXEC_MASK(mach); 3318 break; 3319 3320 case TGSI_OPCODE_END: 3321 /* make sure we end primitives which haven't 3322 * been explicitly emitted */ 3323 conditional_emit_primitive(mach); 3324 /* halt execution */ 3325 *pc = -1; 3326 break; 3327 3328 case TGSI_OPCODE_PUSHA: 3329 assert (0); 3330 break; 3331 3332 case TGSI_OPCODE_POPA: 3333 assert (0); 3334 break; 3335 3336 case TGSI_OPCODE_CEIL: 3337 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3338 break; 3339 3340 case TGSI_OPCODE_I2F: 3341 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 3342 break; 3343 3344 case TGSI_OPCODE_NOT: 3345 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3346 break; 3347 3348 case TGSI_OPCODE_TRUNC: 3349 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3350 break; 3351 3352 case TGSI_OPCODE_SHL: 3353 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3354 break; 3355 3356 case TGSI_OPCODE_AND: 3357 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3358 break; 3359 3360 case TGSI_OPCODE_OR: 3361 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3362 break; 3363 3364 case TGSI_OPCODE_MOD: 3365 assert (0); 3366 break; 3367 3368 case TGSI_OPCODE_XOR: 3369 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3370 break; 3371 3372 case TGSI_OPCODE_SAD: 3373 assert (0); 3374 break; 3375 3376 case TGSI_OPCODE_TXF: 3377 assert (0); 3378 break; 3379 3380 case TGSI_OPCODE_TXQ: 3381 assert (0); 3382 break; 3383 3384 case TGSI_OPCODE_EMIT: 3385 emit_vertex(mach); 3386 break; 3387 3388 case TGSI_OPCODE_ENDPRIM: 3389 emit_primitive(mach); 3390 break; 3391 3392 case TGSI_OPCODE_BGNLOOP: 3393 /* push LoopMask and ContMasks */ 3394 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3395 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3396 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3397 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3398 3399 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3400 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3401 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3402 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3403 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 3404 break; 3405 3406 case TGSI_OPCODE_ENDLOOP: 3407 /* Restore ContMask, but don't pop */ 3408 assert(mach->ContStackTop > 0); 3409 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3410 UPDATE_EXEC_MASK(mach); 3411 if (mach->ExecMask) { 3412 /* repeat loop: jump to instruction just past BGNLOOP */ 3413 assert(mach->LoopLabelStackTop > 0); 3414 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3415 } 3416 else { 3417 /* exit loop: pop LoopMask */ 3418 assert(mach->LoopStackTop > 0); 3419 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3420 /* pop ContMask */ 3421 assert(mach->ContStackTop > 0); 3422 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3423 assert(mach->LoopLabelStackTop > 0); 3424 --mach->LoopLabelStackTop; 3425 3426 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3427 } 3428 UPDATE_EXEC_MASK(mach); 3429 break; 3430 3431 case TGSI_OPCODE_BRK: 3432 exec_break(mach); 3433 break; 3434 3435 case TGSI_OPCODE_CONT: 3436 /* turn off cont channels for each enabled exec channel */ 3437 mach->ContMask &= ~mach->ExecMask; 3438 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3439 UPDATE_EXEC_MASK(mach); 3440 break; 3441 3442 case TGSI_OPCODE_BGNSUB: 3443 /* no-op */ 3444 break; 3445 3446 case TGSI_OPCODE_ENDSUB: 3447 /* 3448 * XXX: This really should be a no-op. We should never reach this opcode. 3449 */ 3450 3451 assert(mach->CallStackTop > 0); 3452 mach->CallStackTop--; 3453 3454 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3455 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3456 3457 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3458 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3459 3460 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3461 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3462 3463 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 3464 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 3465 3466 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 3467 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 3468 3469 assert(mach->FuncStackTop > 0); 3470 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3471 3472 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3473 3474 UPDATE_EXEC_MASK(mach); 3475 break; 3476 3477 case TGSI_OPCODE_NOP: 3478 break; 3479 3480 case TGSI_OPCODE_BREAKC: 3481 FETCH(&r[0], 0, CHAN_X); 3482 /* update CondMask */ 3483 if (r[0].u[0] && (mach->ExecMask & 0x1)) { 3484 mach->LoopMask &= ~0x1; 3485 } 3486 if (r[0].u[1] && (mach->ExecMask & 0x2)) { 3487 mach->LoopMask &= ~0x2; 3488 } 3489 if (r[0].u[2] && (mach->ExecMask & 0x4)) { 3490 mach->LoopMask &= ~0x4; 3491 } 3492 if (r[0].u[3] && (mach->ExecMask & 0x8)) { 3493 mach->LoopMask &= ~0x8; 3494 } 3495 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3496 UPDATE_EXEC_MASK(mach); 3497 break; 3498 3499 case TGSI_OPCODE_F2I: 3500 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 3501 break; 3502 3503 case TGSI_OPCODE_IDIV: 3504 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3505 break; 3506 3507 case TGSI_OPCODE_IMAX: 3508 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3509 break; 3510 3511 case TGSI_OPCODE_IMIN: 3512 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3513 break; 3514 3515 case TGSI_OPCODE_INEG: 3516 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3517 break; 3518 3519 case TGSI_OPCODE_ISGE: 3520 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3521 break; 3522 3523 case TGSI_OPCODE_ISHR: 3524 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3525 break; 3526 3527 case TGSI_OPCODE_ISLT: 3528 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3529 break; 3530 3531 case TGSI_OPCODE_F2U: 3532 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 3533 break; 3534 3535 case TGSI_OPCODE_U2F: 3536 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 3537 break; 3538 3539 case TGSI_OPCODE_UADD: 3540 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3541 break; 3542 3543 case TGSI_OPCODE_UDIV: 3544 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3545 break; 3546 3547 case TGSI_OPCODE_UMAD: 3548 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3549 break; 3550 3551 case TGSI_OPCODE_UMAX: 3552 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3553 break; 3554 3555 case TGSI_OPCODE_UMIN: 3556 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3557 break; 3558 3559 case TGSI_OPCODE_UMOD: 3560 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3561 break; 3562 3563 case TGSI_OPCODE_UMUL: 3564 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3565 break; 3566 3567 case TGSI_OPCODE_USEQ: 3568 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3569 break; 3570 3571 case TGSI_OPCODE_USGE: 3572 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3573 break; 3574 3575 case TGSI_OPCODE_USHR: 3576 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3577 break; 3578 3579 case TGSI_OPCODE_USLT: 3580 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3581 break; 3582 3583 case TGSI_OPCODE_USNE: 3584 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3585 break; 3586 3587 case TGSI_OPCODE_SWITCH: 3588 exec_switch(mach, inst); 3589 break; 3590 3591 case TGSI_OPCODE_CASE: 3592 exec_case(mach, inst); 3593 break; 3594 3595 case TGSI_OPCODE_DEFAULT: 3596 exec_default(mach); 3597 break; 3598 3599 case TGSI_OPCODE_ENDSWITCH: 3600 exec_endswitch(mach); 3601 break; 3602 3603 default: 3604 assert( 0 ); 3605 } 3606} 3607 3608 3609#define DEBUG_EXECUTION 0 3610 3611 3612/** 3613 * Run TGSI interpreter. 3614 * \return bitmask of "alive" quad components 3615 */ 3616uint 3617tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3618{ 3619 uint i; 3620 int pc = 0; 3621 3622 mach->CondMask = 0xf; 3623 mach->LoopMask = 0xf; 3624 mach->ContMask = 0xf; 3625 mach->FuncMask = 0xf; 3626 mach->ExecMask = 0xf; 3627 3628 mach->Switch.mask = 0xf; 3629 3630 assert(mach->CondStackTop == 0); 3631 assert(mach->LoopStackTop == 0); 3632 assert(mach->ContStackTop == 0); 3633 assert(mach->SwitchStackTop == 0); 3634 assert(mach->BreakStackTop == 0); 3635 assert(mach->CallStackTop == 0); 3636 3637 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3638 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3639 3640 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3641 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3642 mach->Primitives[0] = 0; 3643 } 3644 3645 for (i = 0; i < QUAD_SIZE; i++) { 3646 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3647 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3648 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3649 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3650 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3651 } 3652 3653 /* execute declarations (interpolants) */ 3654 for (i = 0; i < mach->NumDeclarations; i++) { 3655 exec_declaration( mach, mach->Declarations+i ); 3656 } 3657 3658 { 3659#if DEBUG_EXECUTION 3660 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 3661 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 3662 uint inst = 1; 3663 3664 memcpy(temps, mach->Temps, sizeof(temps)); 3665 memcpy(outputs, mach->Outputs, sizeof(outputs)); 3666#endif 3667 3668 /* execute instructions, until pc is set to -1 */ 3669 while (pc != -1) { 3670 3671#if DEBUG_EXECUTION 3672 uint i; 3673 3674 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 3675#endif 3676 3677 assert(pc < (int) mach->NumInstructions); 3678 exec_instruction(mach, mach->Instructions + pc, &pc); 3679 3680#if DEBUG_EXECUTION 3681 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 3682 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 3683 uint j; 3684 3685 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 3686 debug_printf("TEMP[%2u] = ", i); 3687 for (j = 0; j < 4; j++) { 3688 if (j > 0) { 3689 debug_printf(" "); 3690 } 3691 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3692 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 3693 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 3694 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 3695 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 3696 } 3697 } 3698 } 3699 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 3700 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 3701 uint j; 3702 3703 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 3704 debug_printf("OUT[%2u] = ", i); 3705 for (j = 0; j < 4; j++) { 3706 if (j > 0) { 3707 debug_printf(" "); 3708 } 3709 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3710 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 3711 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 3712 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 3713 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 3714 } 3715 } 3716 } 3717#endif 3718 } 3719 } 3720 3721#if 0 3722 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3723 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3724 /* 3725 * Scale back depth component. 3726 */ 3727 for (i = 0; i < 4; i++) 3728 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3729 } 3730#endif 3731 3732 assert(mach->CondStackTop == 0); 3733 assert(mach->LoopStackTop == 0); 3734 assert(mach->ContStackTop == 0); 3735 assert(mach->SwitchStackTop == 0); 3736 assert(mach->BreakStackTop == 0); 3737 assert(mach->CallStackTop == 0); 3738 3739 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3740} 3741