s_atifragshader.c revision b30898f4ab533085d97a33638ad0a1cf9ddb1d67
1/* 2 * Copyright (C) 2004 David Airlie All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 */ 21 22#include "main/glheader.h" 23#include "main/colormac.h" 24#include "main/context.h" 25#include "main/macros.h" 26#include "shader/atifragshader.h" 27#include "swrast/s_atifragshader.h" 28 29 30/** 31 * State for executing ATI fragment shader. 32 */ 33struct atifs_machine 34{ 35 GLfloat Registers[6][4]; /** six temporary registers */ 36 GLfloat PrevPassRegisters[6][4]; 37 GLfloat Inputs[2][4]; /** Primary, secondary input colors */ 38}; 39 40 41 42/** 43 * Fetch a texel. 44 */ 45static void 46fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda, 47 GLuint unit, GLfloat color[4]) 48{ 49 SWcontext *swrast = SWRAST_CONTEXT(ctx); 50 51 /* XXX use a float-valued TextureSample routine here!!! */ 52 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current, 53 1, (const GLfloat(*)[4]) texcoord, 54 &lambda, (GLfloat (*)[4]) color); 55} 56 57static void 58apply_swizzle(GLfloat values[4], GLuint swizzle) 59{ 60 GLfloat s, t, r, q; 61 62 s = values[0]; 63 t = values[1]; 64 r = values[2]; 65 q = values[3]; 66 67 switch (swizzle) { 68 case GL_SWIZZLE_STR_ATI: 69 values[0] = s; 70 values[1] = t; 71 values[2] = r; 72 break; 73 case GL_SWIZZLE_STQ_ATI: 74 values[0] = s; 75 values[1] = t; 76 values[2] = q; 77 break; 78 case GL_SWIZZLE_STR_DR_ATI: 79 values[0] = s / r; 80 values[1] = t / r; 81 values[2] = 1 / r; 82 break; 83 case GL_SWIZZLE_STQ_DQ_ATI: 84/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */ 85 if (q == 0.0F) 86 q = 0.000000001F; 87 values[0] = s / q; 88 values[1] = t / q; 89 values[2] = 1.0F / q; 90 break; 91 } 92 values[3] = 0.0; 93} 94 95static void 96apply_src_rep(GLint optype, GLuint rep, GLfloat * val) 97{ 98 GLint i; 99 GLint start, end; 100 if (!rep) 101 return; 102 103 start = optype ? 3 : 0; 104 end = 4; 105 106 for (i = start; i < end; i++) { 107 switch (rep) { 108 case GL_RED: 109 val[i] = val[0]; 110 break; 111 case GL_GREEN: 112 val[i] = val[1]; 113 break; 114 case GL_BLUE: 115 val[i] = val[2]; 116 break; 117 case GL_ALPHA: 118 val[i] = val[3]; 119 break; 120 } 121 } 122} 123 124static void 125apply_src_mod(GLint optype, GLuint mod, GLfloat * val) 126{ 127 GLint i; 128 GLint start, end; 129 130 if (!mod) 131 return; 132 133 start = optype ? 3 : 0; 134 end = 4; 135 136 for (i = start; i < end; i++) { 137 if (mod & GL_COMP_BIT_ATI) 138 val[i] = 1 - val[i]; 139 140 if (mod & GL_BIAS_BIT_ATI) 141 val[i] = val[i] - 0.5F; 142 143 if (mod & GL_2X_BIT_ATI) 144 val[i] = 2 * val[i]; 145 146 if (mod & GL_NEGATE_BIT_ATI) 147 val[i] = -val[i]; 148 } 149} 150 151static void 152apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val) 153{ 154 GLint i; 155 GLint has_sat = mod & GL_SATURATE_BIT_ATI; 156 GLint start, end; 157 158 mod &= ~GL_SATURATE_BIT_ATI; 159 160 start = optype ? 3 : 0; 161 end = optype ? 4 : 3; 162 163 for (i = start; i < end; i++) { 164 switch (mod) { 165 case GL_2X_BIT_ATI: 166 val[i] = 2 * val[i]; 167 break; 168 case GL_4X_BIT_ATI: 169 val[i] = 4 * val[i]; 170 break; 171 case GL_8X_BIT_ATI: 172 val[i] = 8 * val[i]; 173 break; 174 case GL_HALF_BIT_ATI: 175 val[i] = val[i] * 0.5F; 176 break; 177 case GL_QUARTER_BIT_ATI: 178 val[i] = val[i] * 0.25F; 179 break; 180 case GL_EIGHTH_BIT_ATI: 181 val[i] = val[i] * 0.125F; 182 break; 183 } 184 185 if (has_sat) { 186 if (val[i] < 0.0F) 187 val[i] = 0.0F; 188 else if (val[i] > 1.0F) 189 val[i] = 1.0F; 190 } 191 else { 192 if (val[i] < -8.0F) 193 val[i] = -8.0F; 194 else if (val[i] > 8.0F) 195 val[i] = 8.0F; 196 } 197 } 198} 199 200 201static void 202write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src, 203 GLfloat * dst) 204{ 205 GLint i; 206 apply_dst_mod(optype, mod, src); 207 208 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) { 209 if (mask) { 210 if (mask & GL_RED_BIT_ATI) 211 dst[0] = src[0]; 212 213 if (mask & GL_GREEN_BIT_ATI) 214 dst[1] = src[1]; 215 216 if (mask & GL_BLUE_BIT_ATI) 217 dst[2] = src[2]; 218 } 219 else { 220 for (i = 0; i < 3; i++) 221 dst[i] = src[i]; 222 } 223 } 224 else 225 dst[3] = src[3]; 226} 227 228static void 229finish_pass(struct atifs_machine *machine) 230{ 231 GLint i; 232 233 for (i = 0; i < 6; i++) { 234 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]); 235 } 236} 237 238struct ati_fs_opcode_st ati_fs_opcodes[] = { 239 {GL_ADD_ATI, 2}, 240 {GL_SUB_ATI, 2}, 241 {GL_MUL_ATI, 2}, 242 {GL_MAD_ATI, 3}, 243 {GL_LERP_ATI, 3}, 244 {GL_MOV_ATI, 1}, 245 {GL_CND_ATI, 3}, 246 {GL_CND0_ATI, 3}, 247 {GL_DOT2_ADD_ATI, 3}, 248 {GL_DOT3_ATI, 2}, 249 {GL_DOT4_ATI, 2} 250}; 251 252 253 254static void 255handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst, 256 const SWspan *span, GLuint column, GLuint idx) 257{ 258 GLuint swizzle = texinst->swizzle; 259 GLuint pass_tex = texinst->src; 260 261 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { 262 pass_tex -= GL_TEXTURE0_ARB; 263 COPY_4V(machine->Registers[idx], 264 span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]); 265 } 266 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) { 267 pass_tex -= GL_REG_0_ATI; 268 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]); 269 } 270 apply_swizzle(machine->Registers[idx], swizzle); 271 272} 273 274static void 275handle_sample_op(GLcontext * ctx, struct atifs_machine *machine, 276 struct atifs_setupinst *texinst, const SWspan *span, 277 GLuint column, GLuint idx) 278{ 279/* sample from unit idx using texinst->src as coords */ 280 GLuint swizzle = texinst->swizzle; 281 GLuint coord_source = texinst->src; 282 GLfloat tex_coords[4] = { 0 }; 283 284 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) { 285 coord_source -= GL_TEXTURE0_ARB; 286 COPY_4V(tex_coords, 287 span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]); 288 } 289 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) { 290 coord_source -= GL_REG_0_ATI; 291 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]); 292 } 293 apply_swizzle(tex_coords, swizzle); 294 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]); 295} 296 297#define SETUP_SRC_REG(optype, i, x) \ 298do { \ 299 COPY_4V(src[optype][i], x); \ 300} while (0) 301 302 303 304/** 305 * Execute the given fragment shader. 306 * NOTE: we do everything in single-precision floating point 307 * \param ctx - rendering context 308 * \param shader - the shader to execute 309 * \param machine - virtual machine state 310 * \param span - the SWspan we're operating on 311 * \param column - which pixel [i] we're operating on in the span 312 */ 313static void 314execute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader, 315 struct atifs_machine *machine, const SWspan *span, 316 GLuint column) 317{ 318 GLuint pc; 319 struct atifs_instruction *inst; 320 struct atifs_setupinst *texinst; 321 GLint optype; 322 GLuint i; 323 GLint j, pass; 324 GLint dstreg; 325 GLfloat src[2][3][4]; 326 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 }; 327 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 }; 328 GLfloat dst[2][4], *dstp; 329 330 for (pass = 0; pass < shader->NumPasses; pass++) { 331 if (pass > 0) 332 finish_pass(machine); 333 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) { 334 texinst = &shader->SetupInst[pass][j]; 335 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) 336 handle_pass_op(machine, texinst, span, column, j); 337 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) 338 handle_sample_op(ctx, machine, texinst, span, column, j); 339 } 340 341 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) { 342 inst = &shader->Instructions[pass][pc]; 343 344 /* setup the source registers for color and alpha ops */ 345 for (optype = 0; optype < 2; optype++) { 346 for (i = 0; i < inst->ArgCount[optype]; i++) { 347 GLint index = inst->SrcReg[optype][i].Index; 348 349 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) 350 SETUP_SRC_REG(optype, i, 351 machine->Registers[index - GL_REG_0_ATI]); 352 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) { 353 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) { 354 SETUP_SRC_REG(optype, i, 355 shader->Constants[index - GL_CON_0_ATI]); 356 } else { 357 SETUP_SRC_REG(optype, i, 358 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]); 359 } 360 } 361 else if (index == GL_ONE) 362 SETUP_SRC_REG(optype, i, ones); 363 else if (index == GL_ZERO) 364 SETUP_SRC_REG(optype, i, zeros); 365 else if (index == GL_PRIMARY_COLOR_EXT) 366 SETUP_SRC_REG(optype, i, 367 machine->Inputs[ATI_FS_INPUT_PRIMARY]); 368 else if (index == GL_SECONDARY_INTERPOLATOR_ATI) 369 SETUP_SRC_REG(optype, i, 370 machine->Inputs[ATI_FS_INPUT_SECONDARY]); 371 372 apply_src_rep(optype, inst->SrcReg[optype][i].argRep, 373 src[optype][i]); 374 apply_src_mod(optype, inst->SrcReg[optype][i].argMod, 375 src[optype][i]); 376 } 377 } 378 379 /* Execute the operations - color then alpha */ 380 for (optype = 0; optype < 2; optype++) { 381 if (inst->Opcode[optype]) { 382 switch (inst->Opcode[optype]) { 383 case GL_ADD_ATI: 384 if (!optype) 385 for (i = 0; i < 3; i++) { 386 dst[optype][i] = 387 src[optype][0][i] + src[optype][1][i]; 388 } 389 else 390 dst[optype][3] = src[optype][0][3] + src[optype][1][3]; 391 break; 392 case GL_SUB_ATI: 393 if (!optype) 394 for (i = 0; i < 3; i++) { 395 dst[optype][i] = 396 src[optype][0][i] - src[optype][1][i]; 397 } 398 else 399 dst[optype][3] = src[optype][0][3] - src[optype][1][3]; 400 break; 401 case GL_MUL_ATI: 402 if (!optype) 403 for (i = 0; i < 3; i++) { 404 dst[optype][i] = 405 src[optype][0][i] * src[optype][1][i]; 406 } 407 else 408 dst[optype][3] = src[optype][0][3] * src[optype][1][3]; 409 break; 410 case GL_MAD_ATI: 411 if (!optype) 412 for (i = 0; i < 3; i++) { 413 dst[optype][i] = 414 src[optype][0][i] * src[optype][1][i] + 415 src[optype][2][i]; 416 } 417 else 418 dst[optype][3] = 419 src[optype][0][3] * src[optype][1][3] + 420 src[optype][2][3]; 421 break; 422 case GL_LERP_ATI: 423 if (!optype) 424 for (i = 0; i < 3; i++) { 425 dst[optype][i] = 426 src[optype][0][i] * src[optype][1][i] + (1 - 427 src 428 [optype] 429 [0][i]) * 430 src[optype][2][i]; 431 } 432 else 433 dst[optype][3] = 434 src[optype][0][3] * src[optype][1][3] + (1 - 435 src[optype] 436 [0][3]) * 437 src[optype][2][3]; 438 break; 439 440 case GL_MOV_ATI: 441 if (!optype) 442 for (i = 0; i < 3; i++) { 443 dst[optype][i] = src[optype][0][i]; 444 } 445 else 446 dst[optype][3] = src[optype][0][3]; 447 break; 448 case GL_CND_ATI: 449 if (!optype) { 450 for (i = 0; i < 3; i++) { 451 dst[optype][i] = 452 (src[optype][2][i] > 453 0.5) ? src[optype][0][i] : src[optype][1][i]; 454 } 455 } 456 else { 457 dst[optype][3] = 458 (src[optype][2][3] > 459 0.5) ? src[optype][0][3] : src[optype][1][3]; 460 } 461 break; 462 463 case GL_CND0_ATI: 464 if (!optype) 465 for (i = 0; i < 3; i++) { 466 dst[optype][i] = 467 (src[optype][2][i] >= 468 0) ? src[optype][0][i] : src[optype][1][i]; 469 } 470 else { 471 dst[optype][3] = 472 (src[optype][2][3] >= 473 0) ? src[optype][0][3] : src[optype][1][3]; 474 } 475 break; 476 case GL_DOT2_ADD_ATI: 477 { 478 GLfloat result; 479 480 /* DOT 2 always uses the source from the color op */ 481 /* could save recalculation of dot products for alpha inst */ 482 result = src[0][0][0] * src[0][1][0] + 483 src[0][0][1] * src[0][1][1] + src[0][2][2]; 484 if (!optype) { 485 for (i = 0; i < 3; i++) { 486 dst[optype][i] = result; 487 } 488 } 489 else 490 dst[optype][3] = result; 491 } 492 break; 493 case GL_DOT3_ATI: 494 { 495 GLfloat result; 496 497 /* DOT 3 always uses the source from the color op */ 498 result = src[0][0][0] * src[0][1][0] + 499 src[0][0][1] * src[0][1][1] + 500 src[0][0][2] * src[0][1][2]; 501 502 if (!optype) { 503 for (i = 0; i < 3; i++) { 504 dst[optype][i] = result; 505 } 506 } 507 else 508 dst[optype][3] = result; 509 } 510 break; 511 case GL_DOT4_ATI: 512 { 513 GLfloat result; 514 515 /* DOT 4 always uses the source from the color op */ 516 result = src[0][0][0] * src[0][1][0] + 517 src[0][0][1] * src[0][1][1] + 518 src[0][0][2] * src[0][1][2] + 519 src[0][0][3] * src[0][1][3]; 520 if (!optype) { 521 for (i = 0; i < 3; i++) { 522 dst[optype][i] = result; 523 } 524 } 525 else 526 dst[optype][3] = result; 527 } 528 break; 529 530 } 531 } 532 } 533 534 /* write out the destination registers */ 535 for (optype = 0; optype < 2; optype++) { 536 if (inst->Opcode[optype]) { 537 dstreg = inst->DstReg[optype].Index; 538 dstp = machine->Registers[dstreg - GL_REG_0_ATI]; 539 540 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) && 541 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI))) 542 write_dst_addr(optype, inst->DstReg[optype].dstMod, 543 inst->DstReg[optype].dstMask, dst[optype], 544 dstp); 545 else 546 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp); 547 } 548 } 549 } 550 } 551} 552 553 554/** 555 * Init fragment shader virtual machine state. 556 */ 557static void 558init_machine(GLcontext * ctx, struct atifs_machine *machine, 559 const struct ati_fragment_shader *shader, 560 const SWspan *span, GLuint col) 561{ 562 GLfloat (*inputs)[4] = machine->Inputs; 563 GLint i, j; 564 565 for (i = 0; i < 6; i++) { 566 for (j = 0; j < 4; j++) 567 machine->Registers[i][j] = 0.0; 568 } 569 570 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]); 571 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]); 572} 573 574 575 576/** 577 * Execute the current ATI shader program, operating on the given span. 578 */ 579void 580_swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span) 581{ 582 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; 583 struct atifs_machine machine; 584 GLuint i; 585 586 /* incoming colors should be floats */ 587 ASSERT(span->array->ChanType == GL_FLOAT); 588 589 for (i = 0; i < span->end; i++) { 590 if (span->array->mask[i]) { 591 init_machine(ctx, &machine, shader, span, i); 592 593 execute_shader(ctx, shader, &machine, span, i); 594 595 /* store result color */ 596 { 597 const GLfloat *colOut = machine.Registers[0]; 598 /*fprintf(stderr,"outputs %f %f %f %f\n", 599 colOut[0], colOut[1], colOut[2], colOut[3]); */ 600 COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut); 601 } 602 } 603 } 604} 605