nv30_fragprog.c revision a55e50b082ca068d35d695ff323603507e2b64aa
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/tgsi_dump.h" 8#include "tgsi/tgsi_parse.h" 9#include "tgsi/tgsi_util.h" 10 11#include "nv30_context.h" 12 13#define SWZ_X 0 14#define SWZ_Y 1 15#define SWZ_Z 2 16#define SWZ_W 3 17#define MASK_X 1 18#define MASK_Y 2 19#define MASK_Z 4 20#define MASK_W 8 21#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) 22#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X 23#define DEF_CTEST NV30_FP_OP_COND_TR 24#include "nv30_shader.h" 25 26#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) 27#define neg(s) nv30_sr_neg((s)) 28#define abs(s) nv30_sr_abs((s)) 29#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) 30 31#define MAX_CONSTS 128 32#define MAX_IMM 32 33struct nv30_fpc { 34 struct nv30_fragment_program *fp; 35 36 uint attrib_map[PIPE_MAX_SHADER_INPUTS]; 37 38 int high_temp; 39 int temp_temp_count; 40 int num_regs; 41 42 uint depth_id; 43 uint colour_id; 44 45 unsigned inst_offset; 46 47 struct { 48 int pipe; 49 float vals[4]; 50 } consts[MAX_CONSTS]; 51 int nr_consts; 52 53 struct nv30_sreg imm[MAX_IMM]; 54 unsigned nr_imm; 55}; 56 57static INLINE struct nv30_sreg 58temp(struct nv30_fpc *fpc) 59{ 60 int idx; 61 62 idx = fpc->temp_temp_count++; 63 idx += fpc->high_temp + 1; 64 return nv30_sr(NV30SR_TEMP, idx); 65} 66 67static INLINE struct nv30_sreg 68constant(struct nv30_fpc *fpc, int pipe, float vals[4]) 69{ 70 int idx; 71 72 if (fpc->nr_consts == MAX_CONSTS) 73 assert(0); 74 idx = fpc->nr_consts++; 75 76 fpc->consts[idx].pipe = pipe; 77 if (pipe == -1) 78 memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); 79 return nv30_sr(NV30SR_CONST, idx); 80} 81 82#define arith(cc,s,o,d,m,s0,s1,s2) \ 83 nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ 84 (d), (m), (s0), (s1), (s2)) 85#define tex(cc,s,o,u,d,m,s0,s1,s2) \ 86 nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ 87 (d), (m), (s0), none, none) 88 89static void 90grow_insns(struct nv30_fpc *fpc, int size) 91{ 92 struct nv30_fragment_program *fp = fpc->fp; 93 94 fp->insn_len += size; 95 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); 96} 97 98static void 99emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) 100{ 101 struct nv30_fragment_program *fp = fpc->fp; 102 uint32_t *hw = &fp->insn[fpc->inst_offset]; 103 uint32_t sr = 0; 104 105 switch (src.type) { 106 case NV30SR_INPUT: 107 sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); 108 hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); 109 break; 110 case NV30SR_OUTPUT: 111 sr |= NV30_FP_REG_SRC_HALF; 112 /* fall-through */ 113 case NV30SR_TEMP: 114 sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); 115 sr |= (src.index << NV30_FP_REG_SRC_SHIFT); 116 break; 117 case NV30SR_CONST: 118 grow_insns(fpc, 4); 119 hw = &fp->insn[fpc->inst_offset]; 120 if (fpc->consts[src.index].pipe >= 0) { 121 struct nv30_fragment_program_data *fpd; 122 123 fp->consts = realloc(fp->consts, ++fp->nr_consts * 124 sizeof(*fpd)); 125 fpd = &fp->consts[fp->nr_consts - 1]; 126 fpd->offset = fpc->inst_offset + 4; 127 fpd->index = fpc->consts[src.index].pipe; 128 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); 129 } else { 130 memcpy(&fp->insn[fpc->inst_offset + 4], 131 fpc->consts[src.index].vals, 132 sizeof(uint32_t) * 4); 133 } 134 135 sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); 136 break; 137 case NV30SR_NONE: 138 sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); 139 break; 140 default: 141 assert(0); 142 } 143 144 if (src.negate) 145 sr |= NV30_FP_REG_NEGATE; 146 147 if (src.abs) 148 hw[1] |= (1 << (29 + pos)); 149 150 sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | 151 (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | 152 (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | 153 (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); 154 155 hw[pos + 1] |= sr; 156} 157 158static void 159emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) 160{ 161 struct nv30_fragment_program *fp = fpc->fp; 162 uint32_t *hw = &fp->insn[fpc->inst_offset]; 163 164 switch (dst.type) { 165 case NV30SR_TEMP: 166 if (fpc->num_regs < (dst.index + 1)) 167 fpc->num_regs = dst.index + 1; 168 break; 169 case NV30SR_OUTPUT: 170 if (dst.index == 1) { 171 fp->fp_control |= 0xe; 172 } else { 173 hw[0] |= NV30_FP_OP_OUT_REG_HALF; 174 } 175 break; 176 case NV30SR_NONE: 177 hw[0] |= (1 << 30); 178 break; 179 default: 180 assert(0); 181 } 182 183 hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); 184} 185 186static void 187nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, 188 struct nv30_sreg dst, int mask, 189 struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) 190{ 191 struct nv30_fragment_program *fp = fpc->fp; 192 uint32_t *hw; 193 194 fpc->inst_offset = fp->insn_len; 195 grow_insns(fpc, 4); 196 hw = &fp->insn[fpc->inst_offset]; 197 memset(hw, 0, sizeof(uint32_t) * 4); 198 199 if (op == NV30_FP_OP_OPCODE_KIL) 200 fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; 201 hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); 202 hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); 203 hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); 204 205 if (sat) 206 hw[0] |= NV30_FP_OP_OUT_SAT; 207 208 if (dst.cc_update) 209 hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; 210 hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); 211 hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | 212 (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | 213 (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | 214 (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); 215 216 emit_dst(fpc, dst); 217 emit_src(fpc, 0, s0); 218 emit_src(fpc, 1, s1); 219 emit_src(fpc, 2, s2); 220} 221 222static void 223nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, 224 struct nv30_sreg dst, int mask, 225 struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) 226{ 227 struct nv30_fragment_program *fp = fpc->fp; 228 229 nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); 230 231 fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); 232 fp->samplers |= (1 << unit); 233} 234 235static INLINE struct nv30_sreg 236tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) 237{ 238 struct nv30_sreg src; 239 240 switch (fsrc->Register.File) { 241 case TGSI_FILE_INPUT: 242 src = nv30_sr(NV30SR_INPUT, 243 fpc->attrib_map[fsrc->Register.Index]); 244 break; 245 case TGSI_FILE_CONSTANT: 246 src = constant(fpc, fsrc->Register.Index, NULL); 247 break; 248 case TGSI_FILE_IMMEDIATE: 249 assert(fsrc->Register.Index < fpc->nr_imm); 250 src = fpc->imm[fsrc->Register.Index]; 251 break; 252 case TGSI_FILE_TEMPORARY: 253 src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1); 254 if (fpc->high_temp < src.index) 255 fpc->high_temp = src.index; 256 break; 257 /* This is clearly insane, but gallium hands us shaders like this. 258 * Luckily fragprog results are just temp regs.. 259 */ 260 case TGSI_FILE_OUTPUT: 261 if (fsrc->Register.Index == fpc->colour_id) 262 return nv30_sr(NV30SR_OUTPUT, 0); 263 else 264 return nv30_sr(NV30SR_OUTPUT, 1); 265 break; 266 default: 267 NOUVEAU_ERR("bad src file\n"); 268 break; 269 } 270 271 src.abs = fsrc->Register.Absolute; 272 src.negate = fsrc->Register.Negate; 273 src.swz[0] = fsrc->Register.SwizzleX; 274 src.swz[1] = fsrc->Register.SwizzleY; 275 src.swz[2] = fsrc->Register.SwizzleZ; 276 src.swz[3] = fsrc->Register.SwizzleW; 277 return src; 278} 279 280static INLINE struct nv30_sreg 281tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { 282 int idx; 283 284 switch (fdst->Register.File) { 285 case TGSI_FILE_OUTPUT: 286 if (fdst->Register.Index == fpc->colour_id) 287 return nv30_sr(NV30SR_OUTPUT, 0); 288 else 289 return nv30_sr(NV30SR_OUTPUT, 1); 290 break; 291 case TGSI_FILE_TEMPORARY: 292 idx = fdst->Register.Index + 1; 293 if (fpc->high_temp < idx) 294 fpc->high_temp = idx; 295 return nv30_sr(NV30SR_TEMP, idx); 296 case TGSI_FILE_NULL: 297 return nv30_sr(NV30SR_NONE, 0); 298 default: 299 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); 300 return nv30_sr(NV30SR_NONE, 0); 301 } 302} 303 304static INLINE int 305tgsi_mask(uint tgsi) 306{ 307 int mask = 0; 308 309 if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; 310 if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; 311 if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; 312 if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; 313 return mask; 314} 315 316static boolean 317src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, 318 struct nv30_sreg *src) 319{ 320 const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); 321 struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); 322 uint mask = 0; 323 uint c; 324 325 for (c = 0; c < 4; c++) { 326 switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { 327 case TGSI_SWIZZLE_X: 328 case TGSI_SWIZZLE_Y: 329 case TGSI_SWIZZLE_Z: 330 case TGSI_SWIZZLE_W: 331 mask |= (1 << c); 332 break; 333 default: 334 assert(0); 335 } 336 } 337 338 if (mask == MASK_ALL) 339 return TRUE; 340 341 *src = temp(fpc); 342 343 if (mask) 344 arith(fpc, 0, MOV, *src, mask, tgsi, none, none); 345 346 return FALSE; 347} 348 349static boolean 350nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, 351 const struct tgsi_full_instruction *finst) 352{ 353 const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); 354 struct nv30_sreg src[3], dst, tmp; 355 int mask, sat, unit = 0; 356 int ai = -1, ci = -1; 357 int i; 358 359 if (finst->Instruction.Opcode == TGSI_OPCODE_END) 360 return TRUE; 361 362 fpc->temp_temp_count = 0; 363 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 364 const struct tgsi_full_src_register *fsrc; 365 366 fsrc = &finst->Src[i]; 367 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { 368 src[i] = tgsi_src(fpc, fsrc); 369 } 370 } 371 372 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 373 const struct tgsi_full_src_register *fsrc; 374 375 fsrc = &finst->Src[i]; 376 377 switch (fsrc->Register.File) { 378 case TGSI_FILE_INPUT: 379 case TGSI_FILE_CONSTANT: 380 case TGSI_FILE_TEMPORARY: 381 if (!src_native_swz(fpc, fsrc, &src[i])) 382 continue; 383 break; 384 default: 385 break; 386 } 387 388 switch (fsrc->Register.File) { 389 case TGSI_FILE_INPUT: 390 if (ai == -1 || ai == fsrc->Register.Index) { 391 ai = fsrc->Register.Index; 392 src[i] = tgsi_src(fpc, fsrc); 393 } else { 394 NOUVEAU_MSG("extra src attr %d\n", 395 fsrc->Register.Index); 396 src[i] = temp(fpc); 397 arith(fpc, 0, MOV, src[i], MASK_ALL, 398 tgsi_src(fpc, fsrc), none, none); 399 } 400 break; 401 case TGSI_FILE_CONSTANT: 402 case TGSI_FILE_IMMEDIATE: 403 if (ci == -1 || ci == fsrc->Register.Index) { 404 ci = fsrc->Register.Index; 405 src[i] = tgsi_src(fpc, fsrc); 406 } else { 407 src[i] = temp(fpc); 408 arith(fpc, 0, MOV, src[i], MASK_ALL, 409 tgsi_src(fpc, fsrc), none, none); 410 } 411 break; 412 case TGSI_FILE_TEMPORARY: 413 /* handled above */ 414 break; 415 case TGSI_FILE_SAMPLER: 416 unit = fsrc->Register.Index; 417 break; 418 case TGSI_FILE_OUTPUT: 419 break; 420 default: 421 NOUVEAU_ERR("bad src file\n"); 422 return FALSE; 423 } 424 } 425 426 dst = tgsi_dst(fpc, &finst->Dst[0]); 427 mask = tgsi_mask(finst->Dst[0].Register.WriteMask); 428 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); 429 430 switch (finst->Instruction.Opcode) { 431 case TGSI_OPCODE_ABS: 432 arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); 433 break; 434 case TGSI_OPCODE_ADD: 435 arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); 436 break; 437 case TGSI_OPCODE_CMP: 438 tmp = nv30_sr(NV30SR_NONE, 0); 439 tmp.cc_update = 1; 440 arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); 441 dst.cc_test = NV30_VP_INST_COND_GE; 442 arith(fpc, sat, MOV, dst, mask, src[2], none, none); 443 dst.cc_test = NV30_VP_INST_COND_LT; 444 arith(fpc, sat, MOV, dst, mask, src[1], none, none); 445 break; 446 case TGSI_OPCODE_COS: 447 arith(fpc, sat, COS, dst, mask, src[0], none, none); 448 break; 449 case TGSI_OPCODE_DP3: 450 arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); 451 break; 452 case TGSI_OPCODE_DP4: 453 arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); 454 break; 455 case TGSI_OPCODE_DPH: 456 tmp = temp(fpc); 457 arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); 458 arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), 459 swz(src[1], W, W, W, W), none); 460 break; 461 case TGSI_OPCODE_DST: 462 arith(fpc, sat, DST, dst, mask, src[0], src[1], none); 463 break; 464 case TGSI_OPCODE_EX2: 465 arith(fpc, sat, EX2, dst, mask, src[0], none, none); 466 break; 467 case TGSI_OPCODE_FLR: 468 arith(fpc, sat, FLR, dst, mask, src[0], none, none); 469 break; 470 case TGSI_OPCODE_FRC: 471 arith(fpc, sat, FRC, dst, mask, src[0], none, none); 472 break; 473 case TGSI_OPCODE_KILP: 474 arith(fpc, 0, KIL, none, 0, none, none, none); 475 break; 476 case TGSI_OPCODE_KIL: 477 dst = nv30_sr(NV30SR_NONE, 0); 478 dst.cc_update = 1; 479 arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); 480 dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; 481 arith(fpc, 0, KIL, dst, 0, none, none, none); 482 break; 483 case TGSI_OPCODE_LG2: 484 arith(fpc, sat, LG2, dst, mask, src[0], none, none); 485 break; 486// case TGSI_OPCODE_LIT: 487 case TGSI_OPCODE_LRP: 488 arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); 489 break; 490 case TGSI_OPCODE_MAD: 491 arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); 492 break; 493 case TGSI_OPCODE_MAX: 494 arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); 495 break; 496 case TGSI_OPCODE_MIN: 497 arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); 498 break; 499 case TGSI_OPCODE_MOV: 500 arith(fpc, sat, MOV, dst, mask, src[0], none, none); 501 break; 502 case TGSI_OPCODE_MUL: 503 arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); 504 break; 505 case TGSI_OPCODE_POW: 506 arith(fpc, sat, POW, dst, mask, src[0], src[1], none); 507 break; 508 case TGSI_OPCODE_RCP: 509 arith(fpc, sat, RCP, dst, mask, src[0], none, none); 510 break; 511 case TGSI_OPCODE_RET: 512 assert(0); 513 break; 514 case TGSI_OPCODE_RFL: 515 arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); 516 break; 517 case TGSI_OPCODE_RSQ: 518 arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); 519 break; 520 case TGSI_OPCODE_SCS: 521 /* avoid overwriting the source */ 522 if(src[0].swz[SWZ_X] != SWZ_X) 523 { 524 if (mask & MASK_X) { 525 arith(fpc, sat, COS, dst, MASK_X, 526 swz(src[0], X, X, X, X), none, none); 527 } 528 if (mask & MASK_Y) { 529 arith(fpc, sat, SIN, dst, MASK_Y, 530 swz(src[0], X, X, X, X), none, none); 531 } 532 } 533 else 534 { 535 if (mask & MASK_Y) { 536 arith(fpc, sat, SIN, dst, MASK_Y, 537 swz(src[0], X, X, X, X), none, none); 538 } 539 if (mask & MASK_X) { 540 arith(fpc, sat, COS, dst, MASK_X, 541 swz(src[0], X, X, X, X), none, none); 542 } 543 } 544 break; 545 case TGSI_OPCODE_SIN: 546 arith(fpc, sat, SIN, dst, mask, src[0], none, none); 547 break; 548 case TGSI_OPCODE_SGE: 549 arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); 550 break; 551 case TGSI_OPCODE_SGT: 552 arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); 553 break; 554 case TGSI_OPCODE_SLT: 555 arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); 556 break; 557 case TGSI_OPCODE_SUB: 558 arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); 559 break; 560 case TGSI_OPCODE_TEX: 561 tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); 562 break; 563 case TGSI_OPCODE_TXB: 564 tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); 565 break; 566 case TGSI_OPCODE_TXP: 567 tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); 568 break; 569 case TGSI_OPCODE_XPD: 570 tmp = temp(fpc); 571 arith(fpc, 0, MUL, tmp, mask, 572 swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); 573 arith(fpc, sat, MAD, dst, (mask & ~MASK_W), 574 swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), 575 neg(tmp)); 576 break; 577 default: 578 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 579 return FALSE; 580 } 581 582 return TRUE; 583} 584 585static boolean 586nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, 587 const struct tgsi_full_declaration *fdec) 588{ 589 int hw; 590 591 switch (fdec->Semantic.Name) { 592 case TGSI_SEMANTIC_POSITION: 593 hw = NV30_FP_OP_INPUT_SRC_POSITION; 594 break; 595 case TGSI_SEMANTIC_COLOR: 596 if (fdec->Semantic.Index == 0) { 597 hw = NV30_FP_OP_INPUT_SRC_COL0; 598 } else 599 if (fdec->Semantic.Index == 1) { 600 hw = NV30_FP_OP_INPUT_SRC_COL1; 601 } else { 602 NOUVEAU_ERR("bad colour semantic index\n"); 603 return FALSE; 604 } 605 break; 606 case TGSI_SEMANTIC_FOG: 607 hw = NV30_FP_OP_INPUT_SRC_FOGC; 608 break; 609 case TGSI_SEMANTIC_GENERIC: 610 if (fdec->Semantic.Index <= 7) { 611 hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. 612 Index); 613 } else { 614 NOUVEAU_ERR("bad generic semantic index\n"); 615 return FALSE; 616 } 617 break; 618 default: 619 NOUVEAU_ERR("bad input semantic\n"); 620 return FALSE; 621 } 622 623 fpc->attrib_map[fdec->Range.First] = hw; 624 return TRUE; 625} 626 627static boolean 628nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, 629 const struct tgsi_full_declaration *fdec) 630{ 631 switch (fdec->Semantic.Name) { 632 case TGSI_SEMANTIC_POSITION: 633 fpc->depth_id = fdec->Range.First; 634 break; 635 case TGSI_SEMANTIC_COLOR: 636 fpc->colour_id = fdec->Range.First; 637 break; 638 default: 639 NOUVEAU_ERR("bad output semantic\n"); 640 return FALSE; 641 } 642 643 return TRUE; 644} 645 646static boolean 647nv30_fragprog_prepare(struct nv30_fpc *fpc) 648{ 649 struct tgsi_parse_context p; 650 /*int high_temp = -1, i;*/ 651 652 tgsi_parse_init(&p, fpc->fp->pipe.tokens); 653 while (!tgsi_parse_end_of_tokens(&p)) { 654 const union tgsi_full_token *tok = &p.FullToken; 655 656 tgsi_parse_token(&p); 657 switch(tok->Token.Type) { 658 case TGSI_TOKEN_TYPE_DECLARATION: 659 { 660 const struct tgsi_full_declaration *fdec; 661 fdec = &p.FullToken.FullDeclaration; 662 switch (fdec->Declaration.File) { 663 case TGSI_FILE_INPUT: 664 if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) 665 goto out_err; 666 break; 667 case TGSI_FILE_OUTPUT: 668 if (!nv30_fragprog_parse_decl_output(fpc, fdec)) 669 goto out_err; 670 break; 671 /*case TGSI_FILE_TEMPORARY: 672 if (fdec->Range.Last > high_temp) { 673 high_temp = 674 fdec->Range.Last; 675 } 676 break;*/ 677 default: 678 break; 679 } 680 } 681 break; 682 case TGSI_TOKEN_TYPE_IMMEDIATE: 683 { 684 struct tgsi_full_immediate *imm; 685 float vals[4]; 686 687 imm = &p.FullToken.FullImmediate; 688 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 689 assert(fpc->nr_imm < MAX_IMM); 690 691 vals[0] = imm->u[0].Float; 692 vals[1] = imm->u[1].Float; 693 vals[2] = imm->u[2].Float; 694 vals[3] = imm->u[3].Float; 695 fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); 696 } 697 break; 698 default: 699 break; 700 } 701 } 702 tgsi_parse_free(&p); 703 704 /*if (++high_temp) { 705 fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); 706 for (i = 0; i < high_temp; i++) 707 fpc->r_temp[i] = temp(fpc); 708 fpc->r_temps_discard = 0; 709 }*/ 710 711 return TRUE; 712 713out_err: 714 /*if (fpc->r_temp) 715 FREE(fpc->r_temp);*/ 716 tgsi_parse_free(&p); 717 return FALSE; 718} 719 720static void 721nv30_fragprog_translate(struct nv30_context *nv30, 722 struct nv30_fragment_program *fp) 723{ 724 struct tgsi_parse_context parse; 725 struct nv30_fpc *fpc = NULL; 726 727 tgsi_dump(fp->pipe.tokens,0); 728 729 fpc = CALLOC(1, sizeof(struct nv30_fpc)); 730 if (!fpc) 731 return; 732 fpc->fp = fp; 733 fpc->high_temp = -1; 734 fpc->num_regs = 2; 735 736 if (!nv30_fragprog_prepare(fpc)) { 737 FREE(fpc); 738 return; 739 } 740 741 tgsi_parse_init(&parse, fp->pipe.tokens); 742 743 while (!tgsi_parse_end_of_tokens(&parse)) { 744 tgsi_parse_token(&parse); 745 746 switch (parse.FullToken.Token.Type) { 747 case TGSI_TOKEN_TYPE_INSTRUCTION: 748 { 749 const struct tgsi_full_instruction *finst; 750 751 finst = &parse.FullToken.FullInstruction; 752 if (!nv30_fragprog_parse_instruction(fpc, finst)) 753 goto out_err; 754 } 755 break; 756 default: 757 break; 758 } 759 } 760 761 fp->fp_control |= (fpc->num_regs-1)/2; 762 fp->fp_reg_control = (1<<16)|0x4; 763 764 /* Terminate final instruction */ 765 fp->insn[fpc->inst_offset] |= 0x00000001; 766 767 /* Append NOP + END instruction, may or may not be necessary. */ 768 fpc->inst_offset = fp->insn_len; 769 grow_insns(fpc, 4); 770 fp->insn[fpc->inst_offset + 0] = 0x00000001; 771 fp->insn[fpc->inst_offset + 1] = 0x00000000; 772 fp->insn[fpc->inst_offset + 2] = 0x00000000; 773 fp->insn[fpc->inst_offset + 3] = 0x00000000; 774 775 fp->translated = TRUE; 776 fp->on_hw = FALSE; 777out_err: 778 tgsi_parse_free(&parse); 779 FREE(fpc); 780} 781 782static void 783nv30_fragprog_upload(struct nv30_context *nv30, 784 struct nv30_fragment_program *fp) 785{ 786 struct pipe_screen *pscreen = nv30->pipe.screen; 787 const uint32_t le = 1; 788 uint32_t *map; 789 int i; 790 791 map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 792 793#if 0 794 for (i = 0; i < fp->insn_len; i++) { 795 fflush(stdout); fflush(stderr); 796 NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); 797 fflush(stdout); fflush(stderr); 798 } 799#endif 800 801 if ((*(const uint8_t *)&le)) { 802 for (i = 0; i < fp->insn_len; i++) { 803 map[i] = fp->insn[i]; 804 } 805 } else { 806 /* Weird swapping for big-endian chips */ 807 for (i = 0; i < fp->insn_len; i++) { 808 map[i] = ((fp->insn[i] & 0xffff) << 16) | 809 ((fp->insn[i] >> 16) & 0xffff); 810 } 811 } 812 813 pipe_buffer_unmap(pscreen, fp->buffer); 814} 815 816static boolean 817nv30_fragprog_validate(struct nv30_context *nv30) 818{ 819 struct nv30_fragment_program *fp = nv30->fragprog; 820 struct pipe_buffer *constbuf = 821 nv30->constbuf[PIPE_SHADER_FRAGMENT]; 822 struct pipe_screen *pscreen = nv30->pipe.screen; 823 struct nouveau_stateobj *so; 824 boolean new_consts = FALSE; 825 int i; 826 827 if (fp->translated) 828 goto update_constants; 829 830 /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ 831 nv30_fragprog_translate(nv30, fp); 832 if (!fp->translated) { 833 /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ 834 return FALSE; 835 } 836 837 fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); 838 nv30_fragprog_upload(nv30, fp); 839 840 so = so_new(8, 1); 841 so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); 842 so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | 843 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | 844 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, 845 NV34TCL_FP_ACTIVE_PROGRAM_DMA1); 846 so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); 847 so_data (so, fp->fp_control); 848 so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); 849 so_data (so, fp->fp_reg_control); 850 so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); 851 so_data (so, fp->samplers); 852 so_ref(so, &fp->so); 853 so_ref(NULL, &so); 854 855update_constants: 856 if (fp->nr_consts) { 857 float *map; 858 859 map = pipe_buffer_map(pscreen, constbuf, 860 PIPE_BUFFER_USAGE_CPU_READ); 861 for (i = 0; i < fp->nr_consts; i++) { 862 struct nv30_fragment_program_data *fpd = &fp->consts[i]; 863 uint32_t *p = &fp->insn[fpd->offset]; 864 uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; 865 866 if (!memcmp(p, cb, 4 * sizeof(float))) 867 continue; 868 memcpy(p, cb, 4 * sizeof(float)); 869 new_consts = TRUE; 870 } 871 pipe_buffer_unmap(pscreen, constbuf); 872 873 if (new_consts) 874 nv30_fragprog_upload(nv30, fp); 875 } 876 877 if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { 878 so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); 879 return TRUE; 880 } 881 882 return FALSE; 883} 884 885void 886nv30_fragprog_destroy(struct nv30_context *nv30, 887 struct nv30_fragment_program *fp) 888{ 889 if (fp->insn_len) 890 FREE(fp->insn); 891} 892 893struct nv30_state_entry nv30_state_fragprog = { 894 .validate = nv30_fragprog_validate, 895 .dirty = { 896 .pipe = NV30_NEW_FRAGPROG, 897 .hw = NV30_STATE_FRAGPROG 898 } 899}; 900