1/* 2 * Mesa 3-D graphics library 3 * Version: 7.3 4 * 5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included 15 * in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25/** 26 * \file prog_execute.c 27 * Software interpreter for vertex/fragment programs. 28 * \author Brian Paul 29 */ 30 31/* 32 * NOTE: we do everything in single-precision floating point; we don't 33 * currently observe the single/half/fixed-precision qualifiers. 34 * 35 */ 36 37 38#include "main/glheader.h" 39#include "main/colormac.h" 40#include "main/macros.h" 41#include "prog_execute.h" 42#include "prog_instruction.h" 43#include "prog_parameter.h" 44#include "prog_print.h" 45#include "prog_noise.h" 46 47 48/* debug predicate */ 49#define DEBUG_PROG 0 50 51 52/** 53 * Set x to positive or negative infinity. 54 */ 55#if defined(USE_IEEE) || defined(_WIN32) 56#define SET_POS_INFINITY(x) \ 57 do { \ 58 fi_type fi; \ 59 fi.i = 0x7F800000; \ 60 x = fi.f; \ 61 } while (0) 62#define SET_NEG_INFINITY(x) \ 63 do { \ 64 fi_type fi; \ 65 fi.i = 0xFF800000; \ 66 x = fi.f; \ 67 } while (0) 68#elif defined(VMS) 69#define SET_POS_INFINITY(x) x = __MAXFLOAT 70#define SET_NEG_INFINITY(x) x = -__MAXFLOAT 71#else 72#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL 73#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL 74#endif 75 76#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits 77 78 79static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; 80 81 82 83/** 84 * Return TRUE for +0 and other positive values, FALSE otherwise. 85 * Used for RCC opcode. 86 */ 87static inline GLboolean 88positive(float x) 89{ 90 fi_type fi; 91 fi.f = x; 92 if (fi.i & 0x80000000) 93 return GL_FALSE; 94 return GL_TRUE; 95} 96 97 98 99/** 100 * Return a pointer to the 4-element float vector specified by the given 101 * source register. 102 */ 103static inline const GLfloat * 104get_src_register_pointer(const struct prog_src_register *source, 105 const struct gl_program_machine *machine) 106{ 107 const struct gl_program *prog = machine->CurProgram; 108 GLint reg = source->Index; 109 110 if (source->RelAddr) { 111 /* add address register value to src index/offset */ 112 reg += machine->AddressReg[0][0]; 113 if (reg < 0) { 114 return ZeroVec; 115 } 116 } 117 118 switch (source->File) { 119 case PROGRAM_TEMPORARY: 120 if (reg >= MAX_PROGRAM_TEMPS) 121 return ZeroVec; 122 return machine->Temporaries[reg]; 123 124 case PROGRAM_INPUT: 125 if (prog->Target == GL_VERTEX_PROGRAM_ARB) { 126 if (reg >= VERT_ATTRIB_MAX) 127 return ZeroVec; 128 return machine->VertAttribs[reg]; 129 } 130 else { 131 if (reg >= FRAG_ATTRIB_MAX) 132 return ZeroVec; 133 return machine->Attribs[reg][machine->CurElement]; 134 } 135 136 case PROGRAM_OUTPUT: 137 if (reg >= MAX_PROGRAM_OUTPUTS) 138 return ZeroVec; 139 return machine->Outputs[reg]; 140 141 case PROGRAM_LOCAL_PARAM: 142 if (reg >= MAX_PROGRAM_LOCAL_PARAMS) 143 return ZeroVec; 144 return machine->CurProgram->LocalParams[reg]; 145 146 case PROGRAM_ENV_PARAM: 147 if (reg >= MAX_PROGRAM_ENV_PARAMS) 148 return ZeroVec; 149 return machine->EnvParams[reg]; 150 151 case PROGRAM_STATE_VAR: 152 /* Fallthrough */ 153 case PROGRAM_CONSTANT: 154 /* Fallthrough */ 155 case PROGRAM_UNIFORM: 156 /* Fallthrough */ 157 case PROGRAM_NAMED_PARAM: 158 if (reg >= (GLint) prog->Parameters->NumParameters) 159 return ZeroVec; 160 return (GLfloat *) prog->Parameters->ParameterValues[reg]; 161 162 case PROGRAM_SYSTEM_VALUE: 163 assert(reg < Elements(machine->SystemValues)); 164 return machine->SystemValues[reg]; 165 166 default: 167 _mesa_problem(NULL, 168 "Invalid src register file %d in get_src_register_pointer()", 169 source->File); 170 return NULL; 171 } 172} 173 174 175/** 176 * Return a pointer to the 4-element float vector specified by the given 177 * destination register. 178 */ 179static inline GLfloat * 180get_dst_register_pointer(const struct prog_dst_register *dest, 181 struct gl_program_machine *machine) 182{ 183 static GLfloat dummyReg[4]; 184 GLint reg = dest->Index; 185 186 if (dest->RelAddr) { 187 /* add address register value to src index/offset */ 188 reg += machine->AddressReg[0][0]; 189 if (reg < 0) { 190 return dummyReg; 191 } 192 } 193 194 switch (dest->File) { 195 case PROGRAM_TEMPORARY: 196 if (reg >= MAX_PROGRAM_TEMPS) 197 return dummyReg; 198 return machine->Temporaries[reg]; 199 200 case PROGRAM_OUTPUT: 201 if (reg >= MAX_PROGRAM_OUTPUTS) 202 return dummyReg; 203 return machine->Outputs[reg]; 204 205 case PROGRAM_WRITE_ONLY: 206 return dummyReg; 207 208 default: 209 _mesa_problem(NULL, 210 "Invalid dest register file %d in get_dst_register_pointer()", 211 dest->File); 212 return NULL; 213 } 214} 215 216 217 218/** 219 * Fetch a 4-element float vector from the given source register. 220 * Apply swizzling and negating as needed. 221 */ 222static void 223fetch_vector4(const struct prog_src_register *source, 224 const struct gl_program_machine *machine, GLfloat result[4]) 225{ 226 const GLfloat *src = get_src_register_pointer(source, machine); 227 ASSERT(src); 228 229 if (source->Swizzle == SWIZZLE_NOOP) { 230 /* no swizzling */ 231 COPY_4V(result, src); 232 } 233 else { 234 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3); 235 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3); 236 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3); 237 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3); 238 result[0] = src[GET_SWZ(source->Swizzle, 0)]; 239 result[1] = src[GET_SWZ(source->Swizzle, 1)]; 240 result[2] = src[GET_SWZ(source->Swizzle, 2)]; 241 result[3] = src[GET_SWZ(source->Swizzle, 3)]; 242 } 243 244 if (source->Abs) { 245 result[0] = FABSF(result[0]); 246 result[1] = FABSF(result[1]); 247 result[2] = FABSF(result[2]); 248 result[3] = FABSF(result[3]); 249 } 250 if (source->Negate) { 251 ASSERT(source->Negate == NEGATE_XYZW); 252 result[0] = -result[0]; 253 result[1] = -result[1]; 254 result[2] = -result[2]; 255 result[3] = -result[3]; 256 } 257 258#ifdef NAN_CHECK 259 assert(!IS_INF_OR_NAN(result[0])); 260 assert(!IS_INF_OR_NAN(result[0])); 261 assert(!IS_INF_OR_NAN(result[0])); 262 assert(!IS_INF_OR_NAN(result[0])); 263#endif 264} 265 266 267/** 268 * Fetch a 4-element uint vector from the given source register. 269 * Apply swizzling but not negation/abs. 270 */ 271static void 272fetch_vector4ui(const struct prog_src_register *source, 273 const struct gl_program_machine *machine, GLuint result[4]) 274{ 275 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine); 276 ASSERT(src); 277 278 if (source->Swizzle == SWIZZLE_NOOP) { 279 /* no swizzling */ 280 COPY_4V(result, src); 281 } 282 else { 283 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3); 284 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3); 285 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3); 286 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3); 287 result[0] = src[GET_SWZ(source->Swizzle, 0)]; 288 result[1] = src[GET_SWZ(source->Swizzle, 1)]; 289 result[2] = src[GET_SWZ(source->Swizzle, 2)]; 290 result[3] = src[GET_SWZ(source->Swizzle, 3)]; 291 } 292 293 /* Note: no Negate or Abs here */ 294} 295 296 297 298/** 299 * Fetch the derivative with respect to X or Y for the given register. 300 * XXX this currently only works for fragment program input attribs. 301 */ 302static void 303fetch_vector4_deriv(struct gl_context * ctx, 304 const struct prog_src_register *source, 305 const struct gl_program_machine *machine, 306 char xOrY, GLfloat result[4]) 307{ 308 if (source->File == PROGRAM_INPUT && 309 source->Index < (GLint) machine->NumDeriv) { 310 const GLint col = machine->CurElement; 311 const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3]; 312 const GLfloat invQ = 1.0f / w; 313 GLfloat deriv[4]; 314 315 if (xOrY == 'X') { 316 deriv[0] = machine->DerivX[source->Index][0] * invQ; 317 deriv[1] = machine->DerivX[source->Index][1] * invQ; 318 deriv[2] = machine->DerivX[source->Index][2] * invQ; 319 deriv[3] = machine->DerivX[source->Index][3] * invQ; 320 } 321 else { 322 deriv[0] = machine->DerivY[source->Index][0] * invQ; 323 deriv[1] = machine->DerivY[source->Index][1] * invQ; 324 deriv[2] = machine->DerivY[source->Index][2] * invQ; 325 deriv[3] = machine->DerivY[source->Index][3] * invQ; 326 } 327 328 result[0] = deriv[GET_SWZ(source->Swizzle, 0)]; 329 result[1] = deriv[GET_SWZ(source->Swizzle, 1)]; 330 result[2] = deriv[GET_SWZ(source->Swizzle, 2)]; 331 result[3] = deriv[GET_SWZ(source->Swizzle, 3)]; 332 333 if (source->Abs) { 334 result[0] = FABSF(result[0]); 335 result[1] = FABSF(result[1]); 336 result[2] = FABSF(result[2]); 337 result[3] = FABSF(result[3]); 338 } 339 if (source->Negate) { 340 ASSERT(source->Negate == NEGATE_XYZW); 341 result[0] = -result[0]; 342 result[1] = -result[1]; 343 result[2] = -result[2]; 344 result[3] = -result[3]; 345 } 346 } 347 else { 348 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0); 349 } 350} 351 352 353/** 354 * As above, but only return result[0] element. 355 */ 356static void 357fetch_vector1(const struct prog_src_register *source, 358 const struct gl_program_machine *machine, GLfloat result[4]) 359{ 360 const GLfloat *src = get_src_register_pointer(source, machine); 361 ASSERT(src); 362 363 result[0] = src[GET_SWZ(source->Swizzle, 0)]; 364 365 if (source->Abs) { 366 result[0] = FABSF(result[0]); 367 } 368 if (source->Negate) { 369 result[0] = -result[0]; 370 } 371} 372 373 374static GLuint 375fetch_vector1ui(const struct prog_src_register *source, 376 const struct gl_program_machine *machine) 377{ 378 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine); 379 return src[GET_SWZ(source->Swizzle, 0)]; 380} 381 382 383/** 384 * Fetch texel from texture. Use partial derivatives when possible. 385 */ 386static inline void 387fetch_texel(struct gl_context *ctx, 388 const struct gl_program_machine *machine, 389 const struct prog_instruction *inst, 390 const GLfloat texcoord[4], GLfloat lodBias, 391 GLfloat color[4]) 392{ 393 const GLuint unit = machine->Samplers[inst->TexSrcUnit]; 394 395 /* Note: we only have the right derivatives for fragment input attribs. 396 */ 397 if (machine->NumDeriv > 0 && 398 inst->SrcReg[0].File == PROGRAM_INPUT && 399 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) { 400 /* simple texture fetch for which we should have derivatives */ 401 GLuint attr = inst->SrcReg[0].Index; 402 machine->FetchTexelDeriv(ctx, texcoord, 403 machine->DerivX[attr], 404 machine->DerivY[attr], 405 lodBias, unit, color); 406 } 407 else { 408 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color); 409 } 410} 411 412 413/** 414 * Test value against zero and return GT, LT, EQ or UN if NaN. 415 */ 416static inline GLuint 417generate_cc(float value) 418{ 419 if (value != value) 420 return COND_UN; /* NaN */ 421 if (value > 0.0F) 422 return COND_GT; 423 if (value < 0.0F) 424 return COND_LT; 425 return COND_EQ; 426} 427 428 429/** 430 * Test if the ccMaskRule is satisfied by the given condition code. 431 * Used to mask destination writes according to the current condition code. 432 */ 433static inline GLboolean 434test_cc(GLuint condCode, GLuint ccMaskRule) 435{ 436 switch (ccMaskRule) { 437 case COND_EQ: return (condCode == COND_EQ); 438 case COND_NE: return (condCode != COND_EQ); 439 case COND_LT: return (condCode == COND_LT); 440 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ); 441 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ); 442 case COND_GT: return (condCode == COND_GT); 443 case COND_TR: return GL_TRUE; 444 case COND_FL: return GL_FALSE; 445 default: return GL_TRUE; 446 } 447} 448 449 450/** 451 * Evaluate the 4 condition codes against a predicate and return GL_TRUE 452 * or GL_FALSE to indicate result. 453 */ 454static inline GLboolean 455eval_condition(const struct gl_program_machine *machine, 456 const struct prog_instruction *inst) 457{ 458 const GLuint swizzle = inst->DstReg.CondSwizzle; 459 const GLuint condMask = inst->DstReg.CondMask; 460 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) || 461 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) || 462 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) || 463 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) { 464 return GL_TRUE; 465 } 466 else { 467 return GL_FALSE; 468 } 469} 470 471 472 473/** 474 * Store 4 floats into a register. Observe the instructions saturate and 475 * set-condition-code flags. 476 */ 477static void 478store_vector4(const struct prog_instruction *inst, 479 struct gl_program_machine *machine, const GLfloat value[4]) 480{ 481 const struct prog_dst_register *dstReg = &(inst->DstReg); 482 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE; 483 GLuint writeMask = dstReg->WriteMask; 484 GLfloat clampedValue[4]; 485 GLfloat *dst = get_dst_register_pointer(dstReg, machine); 486 487#if 0 488 if (value[0] > 1.0e10 || 489 IS_INF_OR_NAN(value[0]) || 490 IS_INF_OR_NAN(value[1]) || 491 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3])) 492 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]); 493#endif 494 495 if (clamp) { 496 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F); 497 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F); 498 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F); 499 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F); 500 value = clampedValue; 501 } 502 503 if (dstReg->CondMask != COND_TR) { 504 /* condition codes may turn off some writes */ 505 if (writeMask & WRITEMASK_X) { 506 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)], 507 dstReg->CondMask)) 508 writeMask &= ~WRITEMASK_X; 509 } 510 if (writeMask & WRITEMASK_Y) { 511 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)], 512 dstReg->CondMask)) 513 writeMask &= ~WRITEMASK_Y; 514 } 515 if (writeMask & WRITEMASK_Z) { 516 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)], 517 dstReg->CondMask)) 518 writeMask &= ~WRITEMASK_Z; 519 } 520 if (writeMask & WRITEMASK_W) { 521 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)], 522 dstReg->CondMask)) 523 writeMask &= ~WRITEMASK_W; 524 } 525 } 526 527#ifdef NAN_CHECK 528 assert(!IS_INF_OR_NAN(value[0])); 529 assert(!IS_INF_OR_NAN(value[0])); 530 assert(!IS_INF_OR_NAN(value[0])); 531 assert(!IS_INF_OR_NAN(value[0])); 532#endif 533 534 if (writeMask & WRITEMASK_X) 535 dst[0] = value[0]; 536 if (writeMask & WRITEMASK_Y) 537 dst[1] = value[1]; 538 if (writeMask & WRITEMASK_Z) 539 dst[2] = value[2]; 540 if (writeMask & WRITEMASK_W) 541 dst[3] = value[3]; 542 543 if (inst->CondUpdate) { 544 if (writeMask & WRITEMASK_X) 545 machine->CondCodes[0] = generate_cc(value[0]); 546 if (writeMask & WRITEMASK_Y) 547 machine->CondCodes[1] = generate_cc(value[1]); 548 if (writeMask & WRITEMASK_Z) 549 machine->CondCodes[2] = generate_cc(value[2]); 550 if (writeMask & WRITEMASK_W) 551 machine->CondCodes[3] = generate_cc(value[3]); 552#if DEBUG_PROG 553 printf("CondCodes=(%s,%s,%s,%s) for:\n", 554 _mesa_condcode_string(machine->CondCodes[0]), 555 _mesa_condcode_string(machine->CondCodes[1]), 556 _mesa_condcode_string(machine->CondCodes[2]), 557 _mesa_condcode_string(machine->CondCodes[3])); 558#endif 559 } 560} 561 562 563/** 564 * Store 4 uints into a register. Observe the set-condition-code flags. 565 */ 566static void 567store_vector4ui(const struct prog_instruction *inst, 568 struct gl_program_machine *machine, const GLuint value[4]) 569{ 570 const struct prog_dst_register *dstReg = &(inst->DstReg); 571 GLuint writeMask = dstReg->WriteMask; 572 GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine); 573 574 if (dstReg->CondMask != COND_TR) { 575 /* condition codes may turn off some writes */ 576 if (writeMask & WRITEMASK_X) { 577 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)], 578 dstReg->CondMask)) 579 writeMask &= ~WRITEMASK_X; 580 } 581 if (writeMask & WRITEMASK_Y) { 582 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)], 583 dstReg->CondMask)) 584 writeMask &= ~WRITEMASK_Y; 585 } 586 if (writeMask & WRITEMASK_Z) { 587 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)], 588 dstReg->CondMask)) 589 writeMask &= ~WRITEMASK_Z; 590 } 591 if (writeMask & WRITEMASK_W) { 592 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)], 593 dstReg->CondMask)) 594 writeMask &= ~WRITEMASK_W; 595 } 596 } 597 598 if (writeMask & WRITEMASK_X) 599 dst[0] = value[0]; 600 if (writeMask & WRITEMASK_Y) 601 dst[1] = value[1]; 602 if (writeMask & WRITEMASK_Z) 603 dst[2] = value[2]; 604 if (writeMask & WRITEMASK_W) 605 dst[3] = value[3]; 606 607 if (inst->CondUpdate) { 608 if (writeMask & WRITEMASK_X) 609 machine->CondCodes[0] = generate_cc((float)value[0]); 610 if (writeMask & WRITEMASK_Y) 611 machine->CondCodes[1] = generate_cc((float)value[1]); 612 if (writeMask & WRITEMASK_Z) 613 machine->CondCodes[2] = generate_cc((float)value[2]); 614 if (writeMask & WRITEMASK_W) 615 machine->CondCodes[3] = generate_cc((float)value[3]); 616#if DEBUG_PROG 617 printf("CondCodes=(%s,%s,%s,%s) for:\n", 618 _mesa_condcode_string(machine->CondCodes[0]), 619 _mesa_condcode_string(machine->CondCodes[1]), 620 _mesa_condcode_string(machine->CondCodes[2]), 621 _mesa_condcode_string(machine->CondCodes[3])); 622#endif 623 } 624} 625 626 627 628/** 629 * Execute the given vertex/fragment program. 630 * 631 * \param ctx rendering context 632 * \param program the program to execute 633 * \param machine machine state (must be initialized) 634 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL. 635 */ 636GLboolean 637_mesa_execute_program(struct gl_context * ctx, 638 const struct gl_program *program, 639 struct gl_program_machine *machine) 640{ 641 const GLuint numInst = program->NumInstructions; 642 const GLuint maxExec = 65536; 643 GLuint pc, numExec = 0; 644 645 machine->CurProgram = program; 646 647 if (DEBUG_PROG) { 648 printf("execute program %u --------------------\n", program->Id); 649 } 650 651 if (program->Target == GL_VERTEX_PROGRAM_ARB) { 652 machine->EnvParams = ctx->VertexProgram.Parameters; 653 } 654 else { 655 machine->EnvParams = ctx->FragmentProgram.Parameters; 656 } 657 658 for (pc = 0; pc < numInst; pc++) { 659 const struct prog_instruction *inst = program->Instructions + pc; 660 661 if (DEBUG_PROG) { 662 _mesa_print_instruction(inst); 663 } 664 665 switch (inst->Opcode) { 666 case OPCODE_ABS: 667 { 668 GLfloat a[4], result[4]; 669 fetch_vector4(&inst->SrcReg[0], machine, a); 670 result[0] = FABSF(a[0]); 671 result[1] = FABSF(a[1]); 672 result[2] = FABSF(a[2]); 673 result[3] = FABSF(a[3]); 674 store_vector4(inst, machine, result); 675 } 676 break; 677 case OPCODE_ADD: 678 { 679 GLfloat a[4], b[4], result[4]; 680 fetch_vector4(&inst->SrcReg[0], machine, a); 681 fetch_vector4(&inst->SrcReg[1], machine, b); 682 result[0] = a[0] + b[0]; 683 result[1] = a[1] + b[1]; 684 result[2] = a[2] + b[2]; 685 result[3] = a[3] + b[3]; 686 store_vector4(inst, machine, result); 687 if (DEBUG_PROG) { 688 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n", 689 result[0], result[1], result[2], result[3], 690 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); 691 } 692 } 693 break; 694 case OPCODE_AND: /* bitwise AND */ 695 { 696 GLuint a[4], b[4], result[4]; 697 fetch_vector4ui(&inst->SrcReg[0], machine, a); 698 fetch_vector4ui(&inst->SrcReg[1], machine, b); 699 result[0] = a[0] & b[0]; 700 result[1] = a[1] & b[1]; 701 result[2] = a[2] & b[2]; 702 result[3] = a[3] & b[3]; 703 store_vector4ui(inst, machine, result); 704 } 705 break; 706 case OPCODE_ARL: 707 { 708 GLfloat t[4]; 709 fetch_vector4(&inst->SrcReg[0], machine, t); 710 machine->AddressReg[0][0] = IFLOOR(t[0]); 711 if (DEBUG_PROG) { 712 printf("ARL %d\n", machine->AddressReg[0][0]); 713 } 714 } 715 break; 716 case OPCODE_BGNLOOP: 717 /* no-op */ 718 ASSERT(program->Instructions[inst->BranchTarget].Opcode 719 == OPCODE_ENDLOOP); 720 break; 721 case OPCODE_ENDLOOP: 722 /* subtract 1 here since pc is incremented by for(pc) loop */ 723 ASSERT(program->Instructions[inst->BranchTarget].Opcode 724 == OPCODE_BGNLOOP); 725 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */ 726 break; 727 case OPCODE_BGNSUB: /* begin subroutine */ 728 break; 729 case OPCODE_ENDSUB: /* end subroutine */ 730 break; 731 case OPCODE_BRA: /* branch (conditional) */ 732 if (eval_condition(machine, inst)) { 733 /* take branch */ 734 /* Subtract 1 here since we'll do pc++ below */ 735 pc = inst->BranchTarget - 1; 736 } 737 break; 738 case OPCODE_BRK: /* break out of loop (conditional) */ 739 ASSERT(program->Instructions[inst->BranchTarget].Opcode 740 == OPCODE_ENDLOOP); 741 if (eval_condition(machine, inst)) { 742 /* break out of loop */ 743 /* pc++ at end of for-loop will put us after the ENDLOOP inst */ 744 pc = inst->BranchTarget; 745 } 746 break; 747 case OPCODE_CONT: /* continue loop (conditional) */ 748 ASSERT(program->Instructions[inst->BranchTarget].Opcode 749 == OPCODE_ENDLOOP); 750 if (eval_condition(machine, inst)) { 751 /* continue at ENDLOOP */ 752 /* Subtract 1 here since we'll do pc++ at end of for-loop */ 753 pc = inst->BranchTarget - 1; 754 } 755 break; 756 case OPCODE_CAL: /* Call subroutine (conditional) */ 757 if (eval_condition(machine, inst)) { 758 /* call the subroutine */ 759 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) { 760 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ 761 } 762 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */ 763 /* Subtract 1 here since we'll do pc++ at end of for-loop */ 764 pc = inst->BranchTarget - 1; 765 } 766 break; 767 case OPCODE_CMP: 768 { 769 GLfloat a[4], b[4], c[4], result[4]; 770 fetch_vector4(&inst->SrcReg[0], machine, a); 771 fetch_vector4(&inst->SrcReg[1], machine, b); 772 fetch_vector4(&inst->SrcReg[2], machine, c); 773 result[0] = a[0] < 0.0F ? b[0] : c[0]; 774 result[1] = a[1] < 0.0F ? b[1] : c[1]; 775 result[2] = a[2] < 0.0F ? b[2] : c[2]; 776 result[3] = a[3] < 0.0F ? b[3] : c[3]; 777 store_vector4(inst, machine, result); 778 if (DEBUG_PROG) { 779 printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n", 780 result[0], result[1], result[2], result[3], 781 a[0], a[1], a[2], a[3], 782 b[0], b[1], b[2], b[3], 783 c[0], c[1], c[2], c[3]); 784 } 785 } 786 break; 787 case OPCODE_COS: 788 { 789 GLfloat a[4], result[4]; 790 fetch_vector1(&inst->SrcReg[0], machine, a); 791 result[0] = result[1] = result[2] = result[3] 792 = (GLfloat) cos(a[0]); 793 store_vector4(inst, machine, result); 794 } 795 break; 796 case OPCODE_DDX: /* Partial derivative with respect to X */ 797 { 798 GLfloat result[4]; 799 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine, 800 'X', result); 801 store_vector4(inst, machine, result); 802 } 803 break; 804 case OPCODE_DDY: /* Partial derivative with respect to Y */ 805 { 806 GLfloat result[4]; 807 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine, 808 'Y', result); 809 store_vector4(inst, machine, result); 810 } 811 break; 812 case OPCODE_DP2: 813 { 814 GLfloat a[4], b[4], result[4]; 815 fetch_vector4(&inst->SrcReg[0], machine, a); 816 fetch_vector4(&inst->SrcReg[1], machine, b); 817 result[0] = result[1] = result[2] = result[3] = DOT2(a, b); 818 store_vector4(inst, machine, result); 819 if (DEBUG_PROG) { 820 printf("DP2 %g = (%g %g) . (%g %g)\n", 821 result[0], a[0], a[1], b[0], b[1]); 822 } 823 } 824 break; 825 case OPCODE_DP2A: 826 { 827 GLfloat a[4], b[4], c, result[4]; 828 fetch_vector4(&inst->SrcReg[0], machine, a); 829 fetch_vector4(&inst->SrcReg[1], machine, b); 830 fetch_vector1(&inst->SrcReg[1], machine, &c); 831 result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c; 832 store_vector4(inst, machine, result); 833 if (DEBUG_PROG) { 834 printf("DP2A %g = (%g %g) . (%g %g) + %g\n", 835 result[0], a[0], a[1], b[0], b[1], c); 836 } 837 } 838 break; 839 case OPCODE_DP3: 840 { 841 GLfloat a[4], b[4], result[4]; 842 fetch_vector4(&inst->SrcReg[0], machine, a); 843 fetch_vector4(&inst->SrcReg[1], machine, b); 844 result[0] = result[1] = result[2] = result[3] = DOT3(a, b); 845 store_vector4(inst, machine, result); 846 if (DEBUG_PROG) { 847 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n", 848 result[0], a[0], a[1], a[2], b[0], b[1], b[2]); 849 } 850 } 851 break; 852 case OPCODE_DP4: 853 { 854 GLfloat a[4], b[4], result[4]; 855 fetch_vector4(&inst->SrcReg[0], machine, a); 856 fetch_vector4(&inst->SrcReg[1], machine, b); 857 result[0] = result[1] = result[2] = result[3] = DOT4(a, b); 858 store_vector4(inst, machine, result); 859 if (DEBUG_PROG) { 860 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n", 861 result[0], a[0], a[1], a[2], a[3], 862 b[0], b[1], b[2], b[3]); 863 } 864 } 865 break; 866 case OPCODE_DPH: 867 { 868 GLfloat a[4], b[4], result[4]; 869 fetch_vector4(&inst->SrcReg[0], machine, a); 870 fetch_vector4(&inst->SrcReg[1], machine, b); 871 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3]; 872 store_vector4(inst, machine, result); 873 } 874 break; 875 case OPCODE_DST: /* Distance vector */ 876 { 877 GLfloat a[4], b[4], result[4]; 878 fetch_vector4(&inst->SrcReg[0], machine, a); 879 fetch_vector4(&inst->SrcReg[1], machine, b); 880 result[0] = 1.0F; 881 result[1] = a[1] * b[1]; 882 result[2] = a[2]; 883 result[3] = b[3]; 884 store_vector4(inst, machine, result); 885 } 886 break; 887 case OPCODE_EXP: 888 { 889 GLfloat t[4], q[4], floor_t0; 890 fetch_vector1(&inst->SrcReg[0], machine, t); 891 floor_t0 = FLOORF(t[0]); 892 if (floor_t0 > FLT_MAX_EXP) { 893 SET_POS_INFINITY(q[0]); 894 SET_POS_INFINITY(q[2]); 895 } 896 else if (floor_t0 < FLT_MIN_EXP) { 897 q[0] = 0.0F; 898 q[2] = 0.0F; 899 } 900 else { 901 q[0] = LDEXPF(1.0, (int) floor_t0); 902 /* Note: GL_NV_vertex_program expects 903 * result.z = result.x * APPX(result.y) 904 * We do what the ARB extension says. 905 */ 906 q[2] = (GLfloat) pow(2.0, t[0]); 907 } 908 q[1] = t[0] - floor_t0; 909 q[3] = 1.0F; 910 store_vector4( inst, machine, q ); 911 } 912 break; 913 case OPCODE_EX2: /* Exponential base 2 */ 914 { 915 GLfloat a[4], result[4], val; 916 fetch_vector1(&inst->SrcReg[0], machine, a); 917 val = (GLfloat) pow(2.0, a[0]); 918 /* 919 if (IS_INF_OR_NAN(val)) 920 val = 1.0e10; 921 */ 922 result[0] = result[1] = result[2] = result[3] = val; 923 store_vector4(inst, machine, result); 924 } 925 break; 926 case OPCODE_FLR: 927 { 928 GLfloat a[4], result[4]; 929 fetch_vector4(&inst->SrcReg[0], machine, a); 930 result[0] = FLOORF(a[0]); 931 result[1] = FLOORF(a[1]); 932 result[2] = FLOORF(a[2]); 933 result[3] = FLOORF(a[3]); 934 store_vector4(inst, machine, result); 935 } 936 break; 937 case OPCODE_FRC: 938 { 939 GLfloat a[4], result[4]; 940 fetch_vector4(&inst->SrcReg[0], machine, a); 941 result[0] = a[0] - FLOORF(a[0]); 942 result[1] = a[1] - FLOORF(a[1]); 943 result[2] = a[2] - FLOORF(a[2]); 944 result[3] = a[3] - FLOORF(a[3]); 945 store_vector4(inst, machine, result); 946 } 947 break; 948 case OPCODE_IF: 949 { 950 GLboolean cond; 951 ASSERT(program->Instructions[inst->BranchTarget].Opcode 952 == OPCODE_ELSE || 953 program->Instructions[inst->BranchTarget].Opcode 954 == OPCODE_ENDIF); 955 /* eval condition */ 956 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { 957 GLfloat a[4]; 958 fetch_vector1(&inst->SrcReg[0], machine, a); 959 cond = (a[0] != 0.0); 960 } 961 else { 962 cond = eval_condition(machine, inst); 963 } 964 if (DEBUG_PROG) { 965 printf("IF: %d\n", cond); 966 } 967 /* do if/else */ 968 if (cond) { 969 /* do if-clause (just continue execution) */ 970 } 971 else { 972 /* go to the instruction after ELSE or ENDIF */ 973 assert(inst->BranchTarget >= 0); 974 pc = inst->BranchTarget; 975 } 976 } 977 break; 978 case OPCODE_ELSE: 979 /* goto ENDIF */ 980 ASSERT(program->Instructions[inst->BranchTarget].Opcode 981 == OPCODE_ENDIF); 982 assert(inst->BranchTarget >= 0); 983 pc = inst->BranchTarget; 984 break; 985 case OPCODE_ENDIF: 986 /* nothing */ 987 break; 988 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */ 989 if (eval_condition(machine, inst)) { 990 return GL_FALSE; 991 } 992 break; 993 case OPCODE_KIL: /* ARB_f_p only */ 994 { 995 GLfloat a[4]; 996 fetch_vector4(&inst->SrcReg[0], machine, a); 997 if (DEBUG_PROG) { 998 printf("KIL if (%g %g %g %g) <= 0.0\n", 999 a[0], a[1], a[2], a[3]); 1000 } 1001 1002 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) { 1003 return GL_FALSE; 1004 } 1005 } 1006 break; 1007 case OPCODE_LG2: /* log base 2 */ 1008 { 1009 GLfloat a[4], result[4], val; 1010 fetch_vector1(&inst->SrcReg[0], machine, a); 1011 /* The fast LOG2 macro doesn't meet the precision requirements. 1012 */ 1013 if (a[0] == 0.0F) { 1014 val = -FLT_MAX; 1015 } 1016 else { 1017 val = (float)(log(a[0]) * 1.442695F); 1018 } 1019 result[0] = result[1] = result[2] = result[3] = val; 1020 store_vector4(inst, machine, result); 1021 } 1022 break; 1023 case OPCODE_LIT: 1024 { 1025 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */ 1026 GLfloat a[4], result[4]; 1027 fetch_vector4(&inst->SrcReg[0], machine, a); 1028 a[0] = MAX2(a[0], 0.0F); 1029 a[1] = MAX2(a[1], 0.0F); 1030 /* XXX ARB version clamps a[3], NV version doesn't */ 1031 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon)); 1032 result[0] = 1.0F; 1033 result[1] = a[0]; 1034 /* XXX we could probably just use pow() here */ 1035 if (a[0] > 0.0F) { 1036 if (a[1] == 0.0 && a[3] == 0.0) 1037 result[2] = 1.0F; 1038 else 1039 result[2] = (GLfloat) pow(a[1], a[3]); 1040 } 1041 else { 1042 result[2] = 0.0F; 1043 } 1044 result[3] = 1.0F; 1045 store_vector4(inst, machine, result); 1046 if (DEBUG_PROG) { 1047 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n", 1048 result[0], result[1], result[2], result[3], 1049 a[0], a[1], a[2], a[3]); 1050 } 1051 } 1052 break; 1053 case OPCODE_LOG: 1054 { 1055 GLfloat t[4], q[4], abs_t0; 1056 fetch_vector1(&inst->SrcReg[0], machine, t); 1057 abs_t0 = FABSF(t[0]); 1058 if (abs_t0 != 0.0F) { 1059 /* Since we really can't handle infinite values on VMS 1060 * like other OSes we'll use __MAXFLOAT to represent 1061 * infinity. This may need some tweaking. 1062 */ 1063#ifdef VMS 1064 if (abs_t0 == __MAXFLOAT) 1065#else 1066 if (IS_INF_OR_NAN(abs_t0)) 1067#endif 1068 { 1069 SET_POS_INFINITY(q[0]); 1070 q[1] = 1.0F; 1071 SET_POS_INFINITY(q[2]); 1072 } 1073 else { 1074 int exponent; 1075 GLfloat mantissa = FREXPF(t[0], &exponent); 1076 q[0] = (GLfloat) (exponent - 1); 1077 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */ 1078 1079 /* The fast LOG2 macro doesn't meet the precision 1080 * requirements. 1081 */ 1082 q[2] = (float)(log(t[0]) * 1.442695F); 1083 } 1084 } 1085 else { 1086 SET_NEG_INFINITY(q[0]); 1087 q[1] = 1.0F; 1088 SET_NEG_INFINITY(q[2]); 1089 } 1090 q[3] = 1.0; 1091 store_vector4(inst, machine, q); 1092 } 1093 break; 1094 case OPCODE_LRP: 1095 { 1096 GLfloat a[4], b[4], c[4], result[4]; 1097 fetch_vector4(&inst->SrcReg[0], machine, a); 1098 fetch_vector4(&inst->SrcReg[1], machine, b); 1099 fetch_vector4(&inst->SrcReg[2], machine, c); 1100 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0]; 1101 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1]; 1102 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2]; 1103 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3]; 1104 store_vector4(inst, machine, result); 1105 if (DEBUG_PROG) { 1106 printf("LRP (%g %g %g %g) = (%g %g %g %g), " 1107 "(%g %g %g %g), (%g %g %g %g)\n", 1108 result[0], result[1], result[2], result[3], 1109 a[0], a[1], a[2], a[3], 1110 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]); 1111 } 1112 } 1113 break; 1114 case OPCODE_MAD: 1115 { 1116 GLfloat a[4], b[4], c[4], result[4]; 1117 fetch_vector4(&inst->SrcReg[0], machine, a); 1118 fetch_vector4(&inst->SrcReg[1], machine, b); 1119 fetch_vector4(&inst->SrcReg[2], machine, c); 1120 result[0] = a[0] * b[0] + c[0]; 1121 result[1] = a[1] * b[1] + c[1]; 1122 result[2] = a[2] * b[2] + c[2]; 1123 result[3] = a[3] * b[3] + c[3]; 1124 store_vector4(inst, machine, result); 1125 if (DEBUG_PROG) { 1126 printf("MAD (%g %g %g %g) = (%g %g %g %g) * " 1127 "(%g %g %g %g) + (%g %g %g %g)\n", 1128 result[0], result[1], result[2], result[3], 1129 a[0], a[1], a[2], a[3], 1130 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]); 1131 } 1132 } 1133 break; 1134 case OPCODE_MAX: 1135 { 1136 GLfloat a[4], b[4], result[4]; 1137 fetch_vector4(&inst->SrcReg[0], machine, a); 1138 fetch_vector4(&inst->SrcReg[1], machine, b); 1139 result[0] = MAX2(a[0], b[0]); 1140 result[1] = MAX2(a[1], b[1]); 1141 result[2] = MAX2(a[2], b[2]); 1142 result[3] = MAX2(a[3], b[3]); 1143 store_vector4(inst, machine, result); 1144 if (DEBUG_PROG) { 1145 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n", 1146 result[0], result[1], result[2], result[3], 1147 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); 1148 } 1149 } 1150 break; 1151 case OPCODE_MIN: 1152 { 1153 GLfloat a[4], b[4], result[4]; 1154 fetch_vector4(&inst->SrcReg[0], machine, a); 1155 fetch_vector4(&inst->SrcReg[1], machine, b); 1156 result[0] = MIN2(a[0], b[0]); 1157 result[1] = MIN2(a[1], b[1]); 1158 result[2] = MIN2(a[2], b[2]); 1159 result[3] = MIN2(a[3], b[3]); 1160 store_vector4(inst, machine, result); 1161 } 1162 break; 1163 case OPCODE_MOV: 1164 { 1165 GLfloat result[4]; 1166 fetch_vector4(&inst->SrcReg[0], machine, result); 1167 store_vector4(inst, machine, result); 1168 if (DEBUG_PROG) { 1169 printf("MOV (%g %g %g %g)\n", 1170 result[0], result[1], result[2], result[3]); 1171 } 1172 } 1173 break; 1174 case OPCODE_MUL: 1175 { 1176 GLfloat a[4], b[4], result[4]; 1177 fetch_vector4(&inst->SrcReg[0], machine, a); 1178 fetch_vector4(&inst->SrcReg[1], machine, b); 1179 result[0] = a[0] * b[0]; 1180 result[1] = a[1] * b[1]; 1181 result[2] = a[2] * b[2]; 1182 result[3] = a[3] * b[3]; 1183 store_vector4(inst, machine, result); 1184 if (DEBUG_PROG) { 1185 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n", 1186 result[0], result[1], result[2], result[3], 1187 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); 1188 } 1189 } 1190 break; 1191 case OPCODE_NOISE1: 1192 { 1193 GLfloat a[4], result[4]; 1194 fetch_vector1(&inst->SrcReg[0], machine, a); 1195 result[0] = 1196 result[1] = 1197 result[2] = 1198 result[3] = _mesa_noise1(a[0]); 1199 store_vector4(inst, machine, result); 1200 } 1201 break; 1202 case OPCODE_NOISE2: 1203 { 1204 GLfloat a[4], result[4]; 1205 fetch_vector4(&inst->SrcReg[0], machine, a); 1206 result[0] = 1207 result[1] = 1208 result[2] = result[3] = _mesa_noise2(a[0], a[1]); 1209 store_vector4(inst, machine, result); 1210 } 1211 break; 1212 case OPCODE_NOISE3: 1213 { 1214 GLfloat a[4], result[4]; 1215 fetch_vector4(&inst->SrcReg[0], machine, a); 1216 result[0] = 1217 result[1] = 1218 result[2] = 1219 result[3] = _mesa_noise3(a[0], a[1], a[2]); 1220 store_vector4(inst, machine, result); 1221 } 1222 break; 1223 case OPCODE_NOISE4: 1224 { 1225 GLfloat a[4], result[4]; 1226 fetch_vector4(&inst->SrcReg[0], machine, a); 1227 result[0] = 1228 result[1] = 1229 result[2] = 1230 result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]); 1231 store_vector4(inst, machine, result); 1232 } 1233 break; 1234 case OPCODE_NOP: 1235 break; 1236 case OPCODE_NOT: /* bitwise NOT */ 1237 { 1238 GLuint a[4], result[4]; 1239 fetch_vector4ui(&inst->SrcReg[0], machine, a); 1240 result[0] = ~a[0]; 1241 result[1] = ~a[1]; 1242 result[2] = ~a[2]; 1243 result[3] = ~a[3]; 1244 store_vector4ui(inst, machine, result); 1245 } 1246 break; 1247 case OPCODE_NRM3: /* 3-component normalization */ 1248 { 1249 GLfloat a[4], result[4]; 1250 GLfloat tmp; 1251 fetch_vector4(&inst->SrcReg[0], machine, a); 1252 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2]; 1253 if (tmp != 0.0F) 1254 tmp = INV_SQRTF(tmp); 1255 result[0] = tmp * a[0]; 1256 result[1] = tmp * a[1]; 1257 result[2] = tmp * a[2]; 1258 result[3] = 0.0; /* undefined, but prevent valgrind warnings */ 1259 store_vector4(inst, machine, result); 1260 } 1261 break; 1262 case OPCODE_NRM4: /* 4-component normalization */ 1263 { 1264 GLfloat a[4], result[4]; 1265 GLfloat tmp; 1266 fetch_vector4(&inst->SrcReg[0], machine, a); 1267 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3]; 1268 if (tmp != 0.0F) 1269 tmp = INV_SQRTF(tmp); 1270 result[0] = tmp * a[0]; 1271 result[1] = tmp * a[1]; 1272 result[2] = tmp * a[2]; 1273 result[3] = tmp * a[3]; 1274 store_vector4(inst, machine, result); 1275 } 1276 break; 1277 case OPCODE_OR: /* bitwise OR */ 1278 { 1279 GLuint a[4], b[4], result[4]; 1280 fetch_vector4ui(&inst->SrcReg[0], machine, a); 1281 fetch_vector4ui(&inst->SrcReg[1], machine, b); 1282 result[0] = a[0] | b[0]; 1283 result[1] = a[1] | b[1]; 1284 result[2] = a[2] | b[2]; 1285 result[3] = a[3] | b[3]; 1286 store_vector4ui(inst, machine, result); 1287 } 1288 break; 1289 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */ 1290 { 1291 GLfloat a[4]; 1292 GLuint result[4]; 1293 GLhalfNV hx, hy; 1294 fetch_vector4(&inst->SrcReg[0], machine, a); 1295 hx = _mesa_float_to_half(a[0]); 1296 hy = _mesa_float_to_half(a[1]); 1297 result[0] = 1298 result[1] = 1299 result[2] = 1300 result[3] = hx | (hy << 16); 1301 store_vector4ui(inst, machine, result); 1302 } 1303 break; 1304 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */ 1305 { 1306 GLfloat a[4]; 1307 GLuint result[4], usx, usy; 1308 fetch_vector4(&inst->SrcReg[0], machine, a); 1309 a[0] = CLAMP(a[0], 0.0F, 1.0F); 1310 a[1] = CLAMP(a[1], 0.0F, 1.0F); 1311 usx = F_TO_I(a[0] * 65535.0F); 1312 usy = F_TO_I(a[1] * 65535.0F); 1313 result[0] = 1314 result[1] = 1315 result[2] = 1316 result[3] = usx | (usy << 16); 1317 store_vector4ui(inst, machine, result); 1318 } 1319 break; 1320 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */ 1321 { 1322 GLfloat a[4]; 1323 GLuint result[4], ubx, uby, ubz, ubw; 1324 fetch_vector4(&inst->SrcReg[0], machine, a); 1325 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F); 1326 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F); 1327 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F); 1328 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F); 1329 ubx = F_TO_I(127.0F * a[0] + 128.0F); 1330 uby = F_TO_I(127.0F * a[1] + 128.0F); 1331 ubz = F_TO_I(127.0F * a[2] + 128.0F); 1332 ubw = F_TO_I(127.0F * a[3] + 128.0F); 1333 result[0] = 1334 result[1] = 1335 result[2] = 1336 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24); 1337 store_vector4ui(inst, machine, result); 1338 } 1339 break; 1340 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */ 1341 { 1342 GLfloat a[4]; 1343 GLuint result[4], ubx, uby, ubz, ubw; 1344 fetch_vector4(&inst->SrcReg[0], machine, a); 1345 a[0] = CLAMP(a[0], 0.0F, 1.0F); 1346 a[1] = CLAMP(a[1], 0.0F, 1.0F); 1347 a[2] = CLAMP(a[2], 0.0F, 1.0F); 1348 a[3] = CLAMP(a[3], 0.0F, 1.0F); 1349 ubx = F_TO_I(255.0F * a[0]); 1350 uby = F_TO_I(255.0F * a[1]); 1351 ubz = F_TO_I(255.0F * a[2]); 1352 ubw = F_TO_I(255.0F * a[3]); 1353 result[0] = 1354 result[1] = 1355 result[2] = 1356 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24); 1357 store_vector4ui(inst, machine, result); 1358 } 1359 break; 1360 case OPCODE_POW: 1361 { 1362 GLfloat a[4], b[4], result[4]; 1363 fetch_vector1(&inst->SrcReg[0], machine, a); 1364 fetch_vector1(&inst->SrcReg[1], machine, b); 1365 result[0] = result[1] = result[2] = result[3] 1366 = (GLfloat) pow(a[0], b[0]); 1367 store_vector4(inst, machine, result); 1368 } 1369 break; 1370 case OPCODE_RCC: /* clamped riciprocal */ 1371 { 1372 const float largest = 1.884467e+19, smallest = 5.42101e-20; 1373 GLfloat a[4], r, result[4]; 1374 fetch_vector1(&inst->SrcReg[0], machine, a); 1375 if (DEBUG_PROG) { 1376 if (a[0] == 0) 1377 printf("RCC(0)\n"); 1378 else if (IS_INF_OR_NAN(a[0])) 1379 printf("RCC(inf)\n"); 1380 } 1381 if (a[0] == 1.0F) { 1382 r = 1.0F; 1383 } 1384 else { 1385 r = 1.0F / a[0]; 1386 } 1387 if (positive(r)) { 1388 if (r > largest) { 1389 r = largest; 1390 } 1391 else if (r < smallest) { 1392 r = smallest; 1393 } 1394 } 1395 else { 1396 if (r < -largest) { 1397 r = -largest; 1398 } 1399 else if (r > -smallest) { 1400 r = -smallest; 1401 } 1402 } 1403 result[0] = result[1] = result[2] = result[3] = r; 1404 store_vector4(inst, machine, result); 1405 } 1406 break; 1407 1408 case OPCODE_RCP: 1409 { 1410 GLfloat a[4], result[4]; 1411 fetch_vector1(&inst->SrcReg[0], machine, a); 1412 if (DEBUG_PROG) { 1413 if (a[0] == 0) 1414 printf("RCP(0)\n"); 1415 else if (IS_INF_OR_NAN(a[0])) 1416 printf("RCP(inf)\n"); 1417 } 1418 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0]; 1419 store_vector4(inst, machine, result); 1420 } 1421 break; 1422 case OPCODE_RET: /* return from subroutine (conditional) */ 1423 if (eval_condition(machine, inst)) { 1424 if (machine->StackDepth == 0) { 1425 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ 1426 } 1427 /* subtract one because of pc++ in the for loop */ 1428 pc = machine->CallStack[--machine->StackDepth] - 1; 1429 } 1430 break; 1431 case OPCODE_RFL: /* reflection vector */ 1432 { 1433 GLfloat axis[4], dir[4], result[4], tmpX, tmpW; 1434 fetch_vector4(&inst->SrcReg[0], machine, axis); 1435 fetch_vector4(&inst->SrcReg[1], machine, dir); 1436 tmpW = DOT3(axis, axis); 1437 tmpX = (2.0F * DOT3(axis, dir)) / tmpW; 1438 result[0] = tmpX * axis[0] - dir[0]; 1439 result[1] = tmpX * axis[1] - dir[1]; 1440 result[2] = tmpX * axis[2] - dir[2]; 1441 /* result[3] is never written! XXX enforce in parser! */ 1442 store_vector4(inst, machine, result); 1443 } 1444 break; 1445 case OPCODE_RSQ: /* 1 / sqrt() */ 1446 { 1447 GLfloat a[4], result[4]; 1448 fetch_vector1(&inst->SrcReg[0], machine, a); 1449 a[0] = FABSF(a[0]); 1450 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]); 1451 store_vector4(inst, machine, result); 1452 if (DEBUG_PROG) { 1453 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]); 1454 } 1455 } 1456 break; 1457 case OPCODE_SCS: /* sine and cos */ 1458 { 1459 GLfloat a[4], result[4]; 1460 fetch_vector1(&inst->SrcReg[0], machine, a); 1461 result[0] = (GLfloat) cos(a[0]); 1462 result[1] = (GLfloat) sin(a[0]); 1463 result[2] = 0.0; /* undefined! */ 1464 result[3] = 0.0; /* undefined! */ 1465 store_vector4(inst, machine, result); 1466 } 1467 break; 1468 case OPCODE_SEQ: /* set on equal */ 1469 { 1470 GLfloat a[4], b[4], result[4]; 1471 fetch_vector4(&inst->SrcReg[0], machine, a); 1472 fetch_vector4(&inst->SrcReg[1], machine, b); 1473 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F; 1474 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F; 1475 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F; 1476 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F; 1477 store_vector4(inst, machine, result); 1478 if (DEBUG_PROG) { 1479 printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n", 1480 result[0], result[1], result[2], result[3], 1481 a[0], a[1], a[2], a[3], 1482 b[0], b[1], b[2], b[3]); 1483 } 1484 } 1485 break; 1486 case OPCODE_SFL: /* set false, operands ignored */ 1487 { 1488 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; 1489 store_vector4(inst, machine, result); 1490 } 1491 break; 1492 case OPCODE_SGE: /* set on greater or equal */ 1493 { 1494 GLfloat a[4], b[4], result[4]; 1495 fetch_vector4(&inst->SrcReg[0], machine, a); 1496 fetch_vector4(&inst->SrcReg[1], machine, b); 1497 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F; 1498 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F; 1499 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F; 1500 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F; 1501 store_vector4(inst, machine, result); 1502 if (DEBUG_PROG) { 1503 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n", 1504 result[0], result[1], result[2], result[3], 1505 a[0], a[1], a[2], a[3], 1506 b[0], b[1], b[2], b[3]); 1507 } 1508 } 1509 break; 1510 case OPCODE_SGT: /* set on greater */ 1511 { 1512 GLfloat a[4], b[4], result[4]; 1513 fetch_vector4(&inst->SrcReg[0], machine, a); 1514 fetch_vector4(&inst->SrcReg[1], machine, b); 1515 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F; 1516 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F; 1517 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F; 1518 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F; 1519 store_vector4(inst, machine, result); 1520 if (DEBUG_PROG) { 1521 printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n", 1522 result[0], result[1], result[2], result[3], 1523 a[0], a[1], a[2], a[3], 1524 b[0], b[1], b[2], b[3]); 1525 } 1526 } 1527 break; 1528 case OPCODE_SIN: 1529 { 1530 GLfloat a[4], result[4]; 1531 fetch_vector1(&inst->SrcReg[0], machine, a); 1532 result[0] = result[1] = result[2] = result[3] 1533 = (GLfloat) sin(a[0]); 1534 store_vector4(inst, machine, result); 1535 } 1536 break; 1537 case OPCODE_SLE: /* set on less or equal */ 1538 { 1539 GLfloat a[4], b[4], result[4]; 1540 fetch_vector4(&inst->SrcReg[0], machine, a); 1541 fetch_vector4(&inst->SrcReg[1], machine, b); 1542 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F; 1543 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F; 1544 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F; 1545 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F; 1546 store_vector4(inst, machine, result); 1547 if (DEBUG_PROG) { 1548 printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n", 1549 result[0], result[1], result[2], result[3], 1550 a[0], a[1], a[2], a[3], 1551 b[0], b[1], b[2], b[3]); 1552 } 1553 } 1554 break; 1555 case OPCODE_SLT: /* set on less */ 1556 { 1557 GLfloat a[4], b[4], result[4]; 1558 fetch_vector4(&inst->SrcReg[0], machine, a); 1559 fetch_vector4(&inst->SrcReg[1], machine, b); 1560 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F; 1561 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F; 1562 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F; 1563 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F; 1564 store_vector4(inst, machine, result); 1565 if (DEBUG_PROG) { 1566 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n", 1567 result[0], result[1], result[2], result[3], 1568 a[0], a[1], a[2], a[3], 1569 b[0], b[1], b[2], b[3]); 1570 } 1571 } 1572 break; 1573 case OPCODE_SNE: /* set on not equal */ 1574 { 1575 GLfloat a[4], b[4], result[4]; 1576 fetch_vector4(&inst->SrcReg[0], machine, a); 1577 fetch_vector4(&inst->SrcReg[1], machine, b); 1578 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F; 1579 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F; 1580 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F; 1581 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F; 1582 store_vector4(inst, machine, result); 1583 if (DEBUG_PROG) { 1584 printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n", 1585 result[0], result[1], result[2], result[3], 1586 a[0], a[1], a[2], a[3], 1587 b[0], b[1], b[2], b[3]); 1588 } 1589 } 1590 break; 1591 case OPCODE_SSG: /* set sign (-1, 0 or +1) */ 1592 { 1593 GLfloat a[4], result[4]; 1594 fetch_vector4(&inst->SrcReg[0], machine, a); 1595 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F)); 1596 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F)); 1597 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F)); 1598 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F)); 1599 store_vector4(inst, machine, result); 1600 } 1601 break; 1602 case OPCODE_STR: /* set true, operands ignored */ 1603 { 1604 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F }; 1605 store_vector4(inst, machine, result); 1606 } 1607 break; 1608 case OPCODE_SUB: 1609 { 1610 GLfloat a[4], b[4], result[4]; 1611 fetch_vector4(&inst->SrcReg[0], machine, a); 1612 fetch_vector4(&inst->SrcReg[1], machine, b); 1613 result[0] = a[0] - b[0]; 1614 result[1] = a[1] - b[1]; 1615 result[2] = a[2] - b[2]; 1616 result[3] = a[3] - b[3]; 1617 store_vector4(inst, machine, result); 1618 if (DEBUG_PROG) { 1619 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n", 1620 result[0], result[1], result[2], result[3], 1621 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); 1622 } 1623 } 1624 break; 1625 case OPCODE_SWZ: /* extended swizzle */ 1626 { 1627 const struct prog_src_register *source = &inst->SrcReg[0]; 1628 const GLfloat *src = get_src_register_pointer(source, machine); 1629 GLfloat result[4]; 1630 GLuint i; 1631 for (i = 0; i < 4; i++) { 1632 const GLuint swz = GET_SWZ(source->Swizzle, i); 1633 if (swz == SWIZZLE_ZERO) 1634 result[i] = 0.0; 1635 else if (swz == SWIZZLE_ONE) 1636 result[i] = 1.0; 1637 else { 1638 ASSERT(swz >= 0); 1639 ASSERT(swz <= 3); 1640 result[i] = src[swz]; 1641 } 1642 if (source->Negate & (1 << i)) 1643 result[i] = -result[i]; 1644 } 1645 store_vector4(inst, machine, result); 1646 } 1647 break; 1648 case OPCODE_TEX: /* Both ARB and NV frag prog */ 1649 /* Simple texel lookup */ 1650 { 1651 GLfloat texcoord[4], color[4]; 1652 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1653 1654 /* For TEX, texcoord.Q should not be used and its value should not 1655 * matter (at most, we pass coord.xyz to texture3D() in GLSL). 1656 * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value 1657 * which is effectively what happens when the texcoord swizzle 1658 * is .xyzz 1659 */ 1660 texcoord[3] = 1.0f; 1661 1662 fetch_texel(ctx, machine, inst, texcoord, 0.0, color); 1663 1664 if (DEBUG_PROG) { 1665 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n", 1666 color[0], color[1], color[2], color[3], 1667 inst->TexSrcUnit, 1668 texcoord[0], texcoord[1], texcoord[2], texcoord[3]); 1669 } 1670 store_vector4(inst, machine, color); 1671 } 1672 break; 1673 case OPCODE_TXB: /* GL_ARB_fragment_program only */ 1674 /* Texel lookup with LOD bias */ 1675 { 1676 GLfloat texcoord[4], color[4], lodBias; 1677 1678 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1679 1680 /* texcoord[3] is the bias to add to lambda */ 1681 lodBias = texcoord[3]; 1682 1683 fetch_texel(ctx, machine, inst, texcoord, lodBias, color); 1684 1685 if (DEBUG_PROG) { 1686 printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]" 1687 " bias %g\n", 1688 color[0], color[1], color[2], color[3], 1689 inst->TexSrcUnit, 1690 texcoord[0], 1691 texcoord[1], 1692 texcoord[2], 1693 texcoord[3], 1694 lodBias); 1695 } 1696 1697 store_vector4(inst, machine, color); 1698 } 1699 break; 1700 case OPCODE_TXD: /* GL_NV_fragment_program only */ 1701 /* Texture lookup w/ partial derivatives for LOD */ 1702 { 1703 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4]; 1704 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1705 fetch_vector4(&inst->SrcReg[1], machine, dtdx); 1706 fetch_vector4(&inst->SrcReg[2], machine, dtdy); 1707 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy, 1708 0.0, /* lodBias */ 1709 inst->TexSrcUnit, color); 1710 store_vector4(inst, machine, color); 1711 } 1712 break; 1713 case OPCODE_TXL: 1714 /* Texel lookup with explicit LOD */ 1715 { 1716 GLfloat texcoord[4], color[4], lod; 1717 1718 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1719 1720 /* texcoord[3] is the LOD */ 1721 lod = texcoord[3]; 1722 1723 machine->FetchTexelLod(ctx, texcoord, lod, 1724 machine->Samplers[inst->TexSrcUnit], color); 1725 1726 store_vector4(inst, machine, color); 1727 } 1728 break; 1729 case OPCODE_TXP: /* GL_ARB_fragment_program only */ 1730 /* Texture lookup w/ projective divide */ 1731 { 1732 GLfloat texcoord[4], color[4]; 1733 1734 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1735 /* Not so sure about this test - if texcoord[3] is 1736 * zero, we'd probably be fine except for an ASSERT in 1737 * IROUND_POS() which gets triggered by the inf values created. 1738 */ 1739 if (texcoord[3] != 0.0) { 1740 texcoord[0] /= texcoord[3]; 1741 texcoord[1] /= texcoord[3]; 1742 texcoord[2] /= texcoord[3]; 1743 } 1744 1745 fetch_texel(ctx, machine, inst, texcoord, 0.0, color); 1746 1747 store_vector4(inst, machine, color); 1748 } 1749 break; 1750 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */ 1751 /* Texture lookup w/ projective divide, as above, but do not 1752 * do the divide by w if sampling from a cube map. 1753 */ 1754 { 1755 GLfloat texcoord[4], color[4]; 1756 1757 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1758 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX && 1759 texcoord[3] != 0.0) { 1760 texcoord[0] /= texcoord[3]; 1761 texcoord[1] /= texcoord[3]; 1762 texcoord[2] /= texcoord[3]; 1763 } 1764 1765 fetch_texel(ctx, machine, inst, texcoord, 0.0, color); 1766 1767 store_vector4(inst, machine, color); 1768 } 1769 break; 1770 case OPCODE_TRUNC: /* truncate toward zero */ 1771 { 1772 GLfloat a[4], result[4]; 1773 fetch_vector4(&inst->SrcReg[0], machine, a); 1774 result[0] = (GLfloat) (GLint) a[0]; 1775 result[1] = (GLfloat) (GLint) a[1]; 1776 result[2] = (GLfloat) (GLint) a[2]; 1777 result[3] = (GLfloat) (GLint) a[3]; 1778 store_vector4(inst, machine, result); 1779 } 1780 break; 1781 case OPCODE_UP2H: /* unpack two 16-bit floats */ 1782 { 1783 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); 1784 GLfloat result[4]; 1785 GLushort hx, hy; 1786 hx = raw & 0xffff; 1787 hy = raw >> 16; 1788 result[0] = result[2] = _mesa_half_to_float(hx); 1789 result[1] = result[3] = _mesa_half_to_float(hy); 1790 store_vector4(inst, machine, result); 1791 } 1792 break; 1793 case OPCODE_UP2US: /* unpack two GLushorts */ 1794 { 1795 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); 1796 GLfloat result[4]; 1797 GLushort usx, usy; 1798 usx = raw & 0xffff; 1799 usy = raw >> 16; 1800 result[0] = result[2] = usx * (1.0f / 65535.0f); 1801 result[1] = result[3] = usy * (1.0f / 65535.0f); 1802 store_vector4(inst, machine, result); 1803 } 1804 break; 1805 case OPCODE_UP4B: /* unpack four GLbytes */ 1806 { 1807 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); 1808 GLfloat result[4]; 1809 result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F; 1810 result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F; 1811 result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F; 1812 result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F; 1813 store_vector4(inst, machine, result); 1814 } 1815 break; 1816 case OPCODE_UP4UB: /* unpack four GLubytes */ 1817 { 1818 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine); 1819 GLfloat result[4]; 1820 result[0] = ((raw >> 0) & 0xff) / 255.0F; 1821 result[1] = ((raw >> 8) & 0xff) / 255.0F; 1822 result[2] = ((raw >> 16) & 0xff) / 255.0F; 1823 result[3] = ((raw >> 24) & 0xff) / 255.0F; 1824 store_vector4(inst, machine, result); 1825 } 1826 break; 1827 case OPCODE_XOR: /* bitwise XOR */ 1828 { 1829 GLuint a[4], b[4], result[4]; 1830 fetch_vector4ui(&inst->SrcReg[0], machine, a); 1831 fetch_vector4ui(&inst->SrcReg[1], machine, b); 1832 result[0] = a[0] ^ b[0]; 1833 result[1] = a[1] ^ b[1]; 1834 result[2] = a[2] ^ b[2]; 1835 result[3] = a[3] ^ b[3]; 1836 store_vector4ui(inst, machine, result); 1837 } 1838 break; 1839 case OPCODE_XPD: /* cross product */ 1840 { 1841 GLfloat a[4], b[4], result[4]; 1842 fetch_vector4(&inst->SrcReg[0], machine, a); 1843 fetch_vector4(&inst->SrcReg[1], machine, b); 1844 result[0] = a[1] * b[2] - a[2] * b[1]; 1845 result[1] = a[2] * b[0] - a[0] * b[2]; 1846 result[2] = a[0] * b[1] - a[1] * b[0]; 1847 result[3] = 1.0; 1848 store_vector4(inst, machine, result); 1849 if (DEBUG_PROG) { 1850 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n", 1851 result[0], result[1], result[2], result[3], 1852 a[0], a[1], a[2], b[0], b[1], b[2]); 1853 } 1854 } 1855 break; 1856 case OPCODE_X2D: /* 2-D matrix transform */ 1857 { 1858 GLfloat a[4], b[4], c[4], result[4]; 1859 fetch_vector4(&inst->SrcReg[0], machine, a); 1860 fetch_vector4(&inst->SrcReg[1], machine, b); 1861 fetch_vector4(&inst->SrcReg[2], machine, c); 1862 result[0] = a[0] + b[0] * c[0] + b[1] * c[1]; 1863 result[1] = a[1] + b[0] * c[2] + b[1] * c[3]; 1864 result[2] = a[2] + b[0] * c[0] + b[1] * c[1]; 1865 result[3] = a[3] + b[0] * c[2] + b[1] * c[3]; 1866 store_vector4(inst, machine, result); 1867 } 1868 break; 1869 case OPCODE_PRINT: 1870 { 1871 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { 1872 GLfloat a[4]; 1873 fetch_vector4(&inst->SrcReg[0], machine, a); 1874 printf("%s%g, %g, %g, %g\n", (const char *) inst->Data, 1875 a[0], a[1], a[2], a[3]); 1876 } 1877 else { 1878 printf("%s\n", (const char *) inst->Data); 1879 } 1880 } 1881 break; 1882 case OPCODE_END: 1883 return GL_TRUE; 1884 default: 1885 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program", 1886 inst->Opcode); 1887 return GL_TRUE; /* return value doesn't matter */ 1888 } 1889 1890 numExec++; 1891 if (numExec > maxExec) { 1892 static GLboolean reported = GL_FALSE; 1893 if (!reported) { 1894 _mesa_problem(ctx, "Infinite loop detected in fragment program"); 1895 reported = GL_TRUE; 1896 } 1897 return GL_TRUE; 1898 } 1899 1900 } /* for pc */ 1901 1902 return GL_TRUE; 1903} 1904