nv30_fragprog.c revision 7d6c8f980d1e23ad6f557d650e89c715861a3b0c
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/tgsi_dump.h" 8#include "tgsi/tgsi_parse.h" 9#include "tgsi/tgsi_util.h" 10 11#include "nv30_context.h" 12 13#define SWZ_X 0 14#define SWZ_Y 1 15#define SWZ_Z 2 16#define SWZ_W 3 17#define MASK_X 1 18#define MASK_Y 2 19#define MASK_Z 4 20#define MASK_W 8 21#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) 22#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X 23#define DEF_CTEST NV30_FP_OP_COND_TR 24#include "nv30_shader.h" 25 26#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) 27#define neg(s) nv30_sr_neg((s)) 28#define abs(s) nv30_sr_abs((s)) 29#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) 30 31#define MAX_CONSTS 128 32#define MAX_IMM 32 33struct nv30_fpc { 34 struct nv30_fragment_program *fp; 35 36 uint attrib_map[PIPE_MAX_SHADER_INPUTS]; 37 38 int high_temp; 39 int temp_temp_count; 40 int num_regs; 41 42 uint depth_id; 43 uint colour_id; 44 45 unsigned inst_offset; 46 47 struct { 48 int pipe; 49 float vals[4]; 50 } consts[MAX_CONSTS]; 51 int nr_consts; 52 53 struct nv30_sreg imm[MAX_IMM]; 54 unsigned nr_imm; 55}; 56 57static INLINE struct nv30_sreg 58temp(struct nv30_fpc *fpc) 59{ 60 int idx; 61 62 idx = fpc->temp_temp_count++; 63 idx += fpc->high_temp + 1; 64 return nv30_sr(NV30SR_TEMP, idx); 65} 66 67static INLINE struct nv30_sreg 68constant(struct nv30_fpc *fpc, int pipe, float vals[4]) 69{ 70 int idx; 71 72 if (fpc->nr_consts == MAX_CONSTS) 73 assert(0); 74 idx = fpc->nr_consts++; 75 76 fpc->consts[idx].pipe = pipe; 77 if (pipe == -1) 78 memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); 79 return nv30_sr(NV30SR_CONST, idx); 80} 81 82#define arith(cc,s,o,d,m,s0,s1,s2) \ 83 nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ 84 (d), (m), (s0), (s1), (s2)) 85#define tex(cc,s,o,u,d,m,s0,s1,s2) \ 86 nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ 87 (d), (m), (s0), none, none) 88 89static void 90grow_insns(struct nv30_fpc *fpc, int size) 91{ 92 struct nv30_fragment_program *fp = fpc->fp; 93 94 fp->insn_len += size; 95 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); 96} 97 98static void 99emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) 100{ 101 struct nv30_fragment_program *fp = fpc->fp; 102 uint32_t *hw = &fp->insn[fpc->inst_offset]; 103 uint32_t sr = 0; 104 105 switch (src.type) { 106 case NV30SR_INPUT: 107 sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); 108 hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); 109 break; 110 case NV30SR_OUTPUT: 111 sr |= NV30_FP_REG_SRC_HALF; 112 /* fall-through */ 113 case NV30SR_TEMP: 114 sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); 115 sr |= (src.index << NV30_FP_REG_SRC_SHIFT); 116 break; 117 case NV30SR_CONST: 118 grow_insns(fpc, 4); 119 hw = &fp->insn[fpc->inst_offset]; 120 if (fpc->consts[src.index].pipe >= 0) { 121 struct nv30_fragment_program_data *fpd; 122 123 fp->consts = realloc(fp->consts, ++fp->nr_consts * 124 sizeof(*fpd)); 125 fpd = &fp->consts[fp->nr_consts - 1]; 126 fpd->offset = fpc->inst_offset + 4; 127 fpd->index = fpc->consts[src.index].pipe; 128 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); 129 } else { 130 memcpy(&fp->insn[fpc->inst_offset + 4], 131 fpc->consts[src.index].vals, 132 sizeof(uint32_t) * 4); 133 } 134 135 sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); 136 break; 137 case NV30SR_NONE: 138 sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); 139 break; 140 default: 141 assert(0); 142 } 143 144 if (src.negate) 145 sr |= NV30_FP_REG_NEGATE; 146 147 if (src.abs) 148 hw[1] |= (1 << (29 + pos)); 149 150 sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | 151 (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | 152 (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | 153 (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); 154 155 hw[pos + 1] |= sr; 156} 157 158static void 159emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) 160{ 161 struct nv30_fragment_program *fp = fpc->fp; 162 uint32_t *hw = &fp->insn[fpc->inst_offset]; 163 164 switch (dst.type) { 165 case NV30SR_TEMP: 166 if (fpc->num_regs < (dst.index + 1)) 167 fpc->num_regs = dst.index + 1; 168 break; 169 case NV30SR_OUTPUT: 170 if (dst.index == 1) { 171 fp->fp_control |= 0xe; 172 } else { 173 hw[0] |= NV30_FP_OP_OUT_REG_HALF; 174 } 175 break; 176 case NV30SR_NONE: 177 hw[0] |= (1 << 30); 178 break; 179 default: 180 assert(0); 181 } 182 183 hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); 184} 185 186static void 187nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, 188 struct nv30_sreg dst, int mask, 189 struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) 190{ 191 struct nv30_fragment_program *fp = fpc->fp; 192 uint32_t *hw; 193 194 fpc->inst_offset = fp->insn_len; 195 grow_insns(fpc, 4); 196 hw = &fp->insn[fpc->inst_offset]; 197 memset(hw, 0, sizeof(uint32_t) * 4); 198 199 if (op == NV30_FP_OP_OPCODE_KIL) 200 fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; 201 hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); 202 hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); 203 hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); 204 205 if (sat) 206 hw[0] |= NV30_FP_OP_OUT_SAT; 207 208 if (dst.cc_update) 209 hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; 210 hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); 211 hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | 212 (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | 213 (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | 214 (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); 215 216 emit_dst(fpc, dst); 217 emit_src(fpc, 0, s0); 218 emit_src(fpc, 1, s1); 219 emit_src(fpc, 2, s2); 220} 221 222static void 223nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, 224 struct nv30_sreg dst, int mask, 225 struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) 226{ 227 struct nv30_fragment_program *fp = fpc->fp; 228 229 nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); 230 231 fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); 232 fp->samplers |= (1 << unit); 233} 234 235static INLINE struct nv30_sreg 236tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) 237{ 238 struct nv30_sreg src; 239 240 switch (fsrc->SrcRegister.File) { 241 case TGSI_FILE_INPUT: 242 src = nv30_sr(NV30SR_INPUT, 243 fpc->attrib_map[fsrc->SrcRegister.Index]); 244 break; 245 case TGSI_FILE_CONSTANT: 246 src = constant(fpc, fsrc->SrcRegister.Index, NULL); 247 break; 248 case TGSI_FILE_IMMEDIATE: 249 assert(fsrc->SrcRegister.Index < fpc->nr_imm); 250 src = fpc->imm[fsrc->SrcRegister.Index]; 251 break; 252 case TGSI_FILE_TEMPORARY: 253 src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); 254 if (fpc->high_temp < src.index) 255 fpc->high_temp = src.index; 256 break; 257 /* This is clearly insane, but gallium hands us shaders like this. 258 * Luckily fragprog results are just temp regs.. 259 */ 260 case TGSI_FILE_OUTPUT: 261 if (fsrc->SrcRegister.Index == fpc->colour_id) 262 return nv30_sr(NV30SR_OUTPUT, 0); 263 else 264 return nv30_sr(NV30SR_OUTPUT, 1); 265 break; 266 default: 267 NOUVEAU_ERR("bad src file\n"); 268 break; 269 } 270 271 src.abs = fsrc->SrcRegister.Absolute; 272 src.negate = fsrc->SrcRegister.Negate; 273 src.swz[0] = fsrc->SrcRegister.SwizzleX; 274 src.swz[1] = fsrc->SrcRegister.SwizzleY; 275 src.swz[2] = fsrc->SrcRegister.SwizzleZ; 276 src.swz[3] = fsrc->SrcRegister.SwizzleW; 277 return src; 278} 279 280static INLINE struct nv30_sreg 281tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { 282 int idx; 283 284 switch (fdst->DstRegister.File) { 285 case TGSI_FILE_OUTPUT: 286 if (fdst->DstRegister.Index == fpc->colour_id) 287 return nv30_sr(NV30SR_OUTPUT, 0); 288 else 289 return nv30_sr(NV30SR_OUTPUT, 1); 290 break; 291 case TGSI_FILE_TEMPORARY: 292 idx = fdst->DstRegister.Index + 1; 293 if (fpc->high_temp < idx) 294 fpc->high_temp = idx; 295 return nv30_sr(NV30SR_TEMP, idx); 296 case TGSI_FILE_NULL: 297 return nv30_sr(NV30SR_NONE, 0); 298 default: 299 NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); 300 return nv30_sr(NV30SR_NONE, 0); 301 } 302} 303 304static INLINE int 305tgsi_mask(uint tgsi) 306{ 307 int mask = 0; 308 309 if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; 310 if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; 311 if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; 312 if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; 313 return mask; 314} 315 316static boolean 317src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, 318 struct nv30_sreg *src) 319{ 320 const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); 321 struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); 322 uint mask = 0; 323 uint c; 324 325 for (c = 0; c < 4; c++) { 326 switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { 327 case TGSI_SWIZZLE_X: 328 case TGSI_SWIZZLE_Y: 329 case TGSI_SWIZZLE_Z: 330 case TGSI_SWIZZLE_W: 331 mask |= (1 << c); 332 break; 333 default: 334 assert(0); 335 } 336 } 337 338 if (mask == MASK_ALL) 339 return TRUE; 340 341 *src = temp(fpc); 342 343 if (mask) 344 arith(fpc, 0, MOV, *src, mask, tgsi, none, none); 345 346 return FALSE; 347} 348 349static boolean 350nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, 351 const struct tgsi_full_instruction *finst) 352{ 353 const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); 354 struct nv30_sreg src[3], dst, tmp; 355 int mask, sat, unit = 0; 356 int ai = -1, ci = -1; 357 int i; 358 359 if (finst->Instruction.Opcode == TGSI_OPCODE_END) 360 return TRUE; 361 362 fpc->temp_temp_count = 0; 363 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 364 const struct tgsi_full_src_register *fsrc; 365 366 fsrc = &finst->Src[i]; 367 if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { 368 src[i] = tgsi_src(fpc, fsrc); 369 } 370 } 371 372 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { 373 const struct tgsi_full_src_register *fsrc; 374 375 fsrc = &finst->Src[i]; 376 377 switch (fsrc->SrcRegister.File) { 378 case TGSI_FILE_INPUT: 379 case TGSI_FILE_CONSTANT: 380 case TGSI_FILE_TEMPORARY: 381 if (!src_native_swz(fpc, fsrc, &src[i])) 382 continue; 383 break; 384 default: 385 break; 386 } 387 388 switch (fsrc->SrcRegister.File) { 389 case TGSI_FILE_INPUT: 390 if (ai == -1 || ai == fsrc->SrcRegister.Index) { 391 ai = fsrc->SrcRegister.Index; 392 src[i] = tgsi_src(fpc, fsrc); 393 } else { 394 NOUVEAU_MSG("extra src attr %d\n", 395 fsrc->SrcRegister.Index); 396 src[i] = temp(fpc); 397 arith(fpc, 0, MOV, src[i], MASK_ALL, 398 tgsi_src(fpc, fsrc), none, none); 399 } 400 break; 401 case TGSI_FILE_CONSTANT: 402 case TGSI_FILE_IMMEDIATE: 403 if (ci == -1 || ci == fsrc->SrcRegister.Index) { 404 ci = fsrc->SrcRegister.Index; 405 src[i] = tgsi_src(fpc, fsrc); 406 } else { 407 src[i] = temp(fpc); 408 arith(fpc, 0, MOV, src[i], MASK_ALL, 409 tgsi_src(fpc, fsrc), none, none); 410 } 411 break; 412 case TGSI_FILE_TEMPORARY: 413 /* handled above */ 414 break; 415 case TGSI_FILE_SAMPLER: 416 unit = fsrc->SrcRegister.Index; 417 break; 418 case TGSI_FILE_OUTPUT: 419 break; 420 default: 421 NOUVEAU_ERR("bad src file\n"); 422 return FALSE; 423 } 424 } 425 426 dst = tgsi_dst(fpc, &finst->Dst[0]); 427 mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask); 428 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); 429 430 switch (finst->Instruction.Opcode) { 431 case TGSI_OPCODE_ABS: 432 arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); 433 break; 434 case TGSI_OPCODE_ADD: 435 arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); 436 break; 437 case TGSI_OPCODE_CMP: 438 tmp = temp(fpc); 439 arith(fpc, sat, MOV, dst, mask, src[2], none, none); 440 tmp.cc_update = 1; 441 arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); 442 dst.cc_test = NV30_VP_INST_COND_LT; 443 arith(fpc, sat, MOV, dst, mask, src[1], none, none); 444 break; 445 case TGSI_OPCODE_COS: 446 arith(fpc, sat, COS, dst, mask, src[0], none, none); 447 break; 448 case TGSI_OPCODE_DP3: 449 arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); 450 break; 451 case TGSI_OPCODE_DP4: 452 arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); 453 break; 454 case TGSI_OPCODE_DPH: 455 tmp = temp(fpc); 456 arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); 457 arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), 458 swz(src[1], W, W, W, W), none); 459 break; 460 case TGSI_OPCODE_DST: 461 arith(fpc, sat, DST, dst, mask, src[0], src[1], none); 462 break; 463 case TGSI_OPCODE_EX2: 464 arith(fpc, sat, EX2, dst, mask, src[0], none, none); 465 break; 466 case TGSI_OPCODE_FLR: 467 arith(fpc, sat, FLR, dst, mask, src[0], none, none); 468 break; 469 case TGSI_OPCODE_FRC: 470 arith(fpc, sat, FRC, dst, mask, src[0], none, none); 471 break; 472 case TGSI_OPCODE_KILP: 473 arith(fpc, 0, KIL, none, 0, none, none, none); 474 break; 475 case TGSI_OPCODE_KIL: 476 dst = nv30_sr(NV30SR_NONE, 0); 477 dst.cc_update = 1; 478 arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); 479 dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; 480 arith(fpc, 0, KIL, dst, 0, none, none, none); 481 break; 482 case TGSI_OPCODE_LG2: 483 arith(fpc, sat, LG2, dst, mask, src[0], none, none); 484 break; 485// case TGSI_OPCODE_LIT: 486 case TGSI_OPCODE_LRP: 487 arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); 488 break; 489 case TGSI_OPCODE_MAD: 490 arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); 491 break; 492 case TGSI_OPCODE_MAX: 493 arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); 494 break; 495 case TGSI_OPCODE_MIN: 496 arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); 497 break; 498 case TGSI_OPCODE_MOV: 499 arith(fpc, sat, MOV, dst, mask, src[0], none, none); 500 break; 501 case TGSI_OPCODE_MUL: 502 arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); 503 break; 504 case TGSI_OPCODE_POW: 505 arith(fpc, sat, POW, dst, mask, src[0], src[1], none); 506 break; 507 case TGSI_OPCODE_RCP: 508 arith(fpc, sat, RCP, dst, mask, src[0], none, none); 509 break; 510 case TGSI_OPCODE_RET: 511 assert(0); 512 break; 513 case TGSI_OPCODE_RFL: 514 arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); 515 break; 516 case TGSI_OPCODE_RSQ: 517 arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); 518 break; 519 case TGSI_OPCODE_SCS: 520 if (mask & MASK_X) { 521 arith(fpc, sat, COS, dst, MASK_X, 522 swz(src[0], X, X, X, X), none, none); 523 } 524 if (mask & MASK_Y) { 525 arith(fpc, sat, SIN, dst, MASK_Y, 526 swz(src[0], X, X, X, X), none, none); 527 } 528 break; 529 case TGSI_OPCODE_SIN: 530 arith(fpc, sat, SIN, dst, mask, src[0], none, none); 531 break; 532 case TGSI_OPCODE_SGE: 533 arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); 534 break; 535 case TGSI_OPCODE_SGT: 536 arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); 537 break; 538 case TGSI_OPCODE_SLT: 539 arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); 540 break; 541 case TGSI_OPCODE_SUB: 542 arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); 543 break; 544 case TGSI_OPCODE_TEX: 545 tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); 546 break; 547 case TGSI_OPCODE_TXB: 548 tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); 549 break; 550 case TGSI_OPCODE_TXP: 551 tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); 552 break; 553 case TGSI_OPCODE_XPD: 554 tmp = temp(fpc); 555 arith(fpc, 0, MUL, tmp, mask, 556 swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); 557 arith(fpc, sat, MAD, dst, (mask & ~MASK_W), 558 swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), 559 neg(tmp)); 560 break; 561 default: 562 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); 563 return FALSE; 564 } 565 566 return TRUE; 567} 568 569static boolean 570nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, 571 const struct tgsi_full_declaration *fdec) 572{ 573 int hw; 574 575 switch (fdec->Semantic.Name) { 576 case TGSI_SEMANTIC_POSITION: 577 hw = NV30_FP_OP_INPUT_SRC_POSITION; 578 break; 579 case TGSI_SEMANTIC_COLOR: 580 if (fdec->Semantic.Index == 0) { 581 hw = NV30_FP_OP_INPUT_SRC_COL0; 582 } else 583 if (fdec->Semantic.Index == 1) { 584 hw = NV30_FP_OP_INPUT_SRC_COL1; 585 } else { 586 NOUVEAU_ERR("bad colour semantic index\n"); 587 return FALSE; 588 } 589 break; 590 case TGSI_SEMANTIC_FOG: 591 hw = NV30_FP_OP_INPUT_SRC_FOGC; 592 break; 593 case TGSI_SEMANTIC_GENERIC: 594 if (fdec->Semantic.Index <= 7) { 595 hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. 596 Index); 597 } else { 598 NOUVEAU_ERR("bad generic semantic index\n"); 599 return FALSE; 600 } 601 break; 602 default: 603 NOUVEAU_ERR("bad input semantic\n"); 604 return FALSE; 605 } 606 607 fpc->attrib_map[fdec->DeclarationRange.First] = hw; 608 return TRUE; 609} 610 611static boolean 612nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, 613 const struct tgsi_full_declaration *fdec) 614{ 615 switch (fdec->Semantic.Name) { 616 case TGSI_SEMANTIC_POSITION: 617 fpc->depth_id = fdec->DeclarationRange.First; 618 break; 619 case TGSI_SEMANTIC_COLOR: 620 fpc->colour_id = fdec->DeclarationRange.First; 621 break; 622 default: 623 NOUVEAU_ERR("bad output semantic\n"); 624 return FALSE; 625 } 626 627 return TRUE; 628} 629 630static boolean 631nv30_fragprog_prepare(struct nv30_fpc *fpc) 632{ 633 struct tgsi_parse_context p; 634 /*int high_temp = -1, i;*/ 635 636 tgsi_parse_init(&p, fpc->fp->pipe.tokens); 637 while (!tgsi_parse_end_of_tokens(&p)) { 638 const union tgsi_full_token *tok = &p.FullToken; 639 640 tgsi_parse_token(&p); 641 switch(tok->Token.Type) { 642 case TGSI_TOKEN_TYPE_DECLARATION: 643 { 644 const struct tgsi_full_declaration *fdec; 645 fdec = &p.FullToken.FullDeclaration; 646 switch (fdec->Declaration.File) { 647 case TGSI_FILE_INPUT: 648 if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) 649 goto out_err; 650 break; 651 case TGSI_FILE_OUTPUT: 652 if (!nv30_fragprog_parse_decl_output(fpc, fdec)) 653 goto out_err; 654 break; 655 /*case TGSI_FILE_TEMPORARY: 656 if (fdec->DeclarationRange.Last > high_temp) { 657 high_temp = 658 fdec->DeclarationRange.Last; 659 } 660 break;*/ 661 default: 662 break; 663 } 664 } 665 break; 666 case TGSI_TOKEN_TYPE_IMMEDIATE: 667 { 668 struct tgsi_full_immediate *imm; 669 float vals[4]; 670 671 imm = &p.FullToken.FullImmediate; 672 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); 673 assert(fpc->nr_imm < MAX_IMM); 674 675 vals[0] = imm->u[0].Float; 676 vals[1] = imm->u[1].Float; 677 vals[2] = imm->u[2].Float; 678 vals[3] = imm->u[3].Float; 679 fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); 680 } 681 break; 682 default: 683 break; 684 } 685 } 686 tgsi_parse_free(&p); 687 688 /*if (++high_temp) { 689 fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); 690 for (i = 0; i < high_temp; i++) 691 fpc->r_temp[i] = temp(fpc); 692 fpc->r_temps_discard = 0; 693 }*/ 694 695 return TRUE; 696 697out_err: 698 /*if (fpc->r_temp) 699 FREE(fpc->r_temp);*/ 700 tgsi_parse_free(&p); 701 return FALSE; 702} 703 704static void 705nv30_fragprog_translate(struct nv30_context *nv30, 706 struct nv30_fragment_program *fp) 707{ 708 struct tgsi_parse_context parse; 709 struct nv30_fpc *fpc = NULL; 710 711 tgsi_dump(fp->pipe.tokens,0); 712 713 fpc = CALLOC(1, sizeof(struct nv30_fpc)); 714 if (!fpc) 715 return; 716 fpc->fp = fp; 717 fpc->high_temp = -1; 718 fpc->num_regs = 2; 719 720 if (!nv30_fragprog_prepare(fpc)) { 721 FREE(fpc); 722 return; 723 } 724 725 tgsi_parse_init(&parse, fp->pipe.tokens); 726 727 while (!tgsi_parse_end_of_tokens(&parse)) { 728 tgsi_parse_token(&parse); 729 730 switch (parse.FullToken.Token.Type) { 731 case TGSI_TOKEN_TYPE_INSTRUCTION: 732 { 733 const struct tgsi_full_instruction *finst; 734 735 finst = &parse.FullToken.FullInstruction; 736 if (!nv30_fragprog_parse_instruction(fpc, finst)) 737 goto out_err; 738 } 739 break; 740 default: 741 break; 742 } 743 } 744 745 fp->fp_control |= (fpc->num_regs-1)/2; 746 fp->fp_reg_control = (1<<16)|0x4; 747 748 /* Terminate final instruction */ 749 fp->insn[fpc->inst_offset] |= 0x00000001; 750 751 /* Append NOP + END instruction, may or may not be necessary. */ 752 fpc->inst_offset = fp->insn_len; 753 grow_insns(fpc, 4); 754 fp->insn[fpc->inst_offset + 0] = 0x00000001; 755 fp->insn[fpc->inst_offset + 1] = 0x00000000; 756 fp->insn[fpc->inst_offset + 2] = 0x00000000; 757 fp->insn[fpc->inst_offset + 3] = 0x00000000; 758 759 fp->translated = TRUE; 760 fp->on_hw = FALSE; 761out_err: 762 tgsi_parse_free(&parse); 763 FREE(fpc); 764} 765 766static void 767nv30_fragprog_upload(struct nv30_context *nv30, 768 struct nv30_fragment_program *fp) 769{ 770 struct pipe_screen *pscreen = nv30->pipe.screen; 771 const uint32_t le = 1; 772 uint32_t *map; 773 int i; 774 775 map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 776 777#if 0 778 for (i = 0; i < fp->insn_len; i++) { 779 fflush(stdout); fflush(stderr); 780 NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); 781 fflush(stdout); fflush(stderr); 782 } 783#endif 784 785 if ((*(const uint8_t *)&le)) { 786 for (i = 0; i < fp->insn_len; i++) { 787 map[i] = fp->insn[i]; 788 } 789 } else { 790 /* Weird swapping for big-endian chips */ 791 for (i = 0; i < fp->insn_len; i++) { 792 map[i] = ((fp->insn[i] & 0xffff) << 16) | 793 ((fp->insn[i] >> 16) & 0xffff); 794 } 795 } 796 797 pipe_buffer_unmap(pscreen, fp->buffer); 798} 799 800static boolean 801nv30_fragprog_validate(struct nv30_context *nv30) 802{ 803 struct nv30_fragment_program *fp = nv30->fragprog; 804 struct pipe_buffer *constbuf = 805 nv30->constbuf[PIPE_SHADER_FRAGMENT]; 806 struct pipe_screen *pscreen = nv30->pipe.screen; 807 struct nouveau_stateobj *so; 808 boolean new_consts = FALSE; 809 int i; 810 811 if (fp->translated) 812 goto update_constants; 813 814 /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ 815 nv30_fragprog_translate(nv30, fp); 816 if (!fp->translated) { 817 /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ 818 return FALSE; 819 } 820 821 fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); 822 nv30_fragprog_upload(nv30, fp); 823 824 so = so_new(8, 1); 825 so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); 826 so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | 827 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | 828 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, 829 NV34TCL_FP_ACTIVE_PROGRAM_DMA1); 830 so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); 831 so_data (so, fp->fp_control); 832 so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); 833 so_data (so, fp->fp_reg_control); 834 so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); 835 so_data (so, fp->samplers); 836 so_ref(so, &fp->so); 837 so_ref(NULL, &so); 838 839update_constants: 840 if (fp->nr_consts) { 841 float *map; 842 843 map = pipe_buffer_map(pscreen, constbuf, 844 PIPE_BUFFER_USAGE_CPU_READ); 845 for (i = 0; i < fp->nr_consts; i++) { 846 struct nv30_fragment_program_data *fpd = &fp->consts[i]; 847 uint32_t *p = &fp->insn[fpd->offset]; 848 uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; 849 850 if (!memcmp(p, cb, 4 * sizeof(float))) 851 continue; 852 memcpy(p, cb, 4 * sizeof(float)); 853 new_consts = TRUE; 854 } 855 pipe_buffer_unmap(pscreen, constbuf); 856 857 if (new_consts) 858 nv30_fragprog_upload(nv30, fp); 859 } 860 861 if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { 862 so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); 863 return TRUE; 864 } 865 866 return FALSE; 867} 868 869void 870nv30_fragprog_destroy(struct nv30_context *nv30, 871 struct nv30_fragment_program *fp) 872{ 873 if (fp->insn_len) 874 FREE(fp->insn); 875} 876 877struct nv30_state_entry nv30_state_fragprog = { 878 .validate = nv30_fragprog_validate, 879 .dirty = { 880 .pipe = NV30_NEW_FRAGPROG, 881 .hw = NV30_STATE_FRAGPROG 882 } 883}; 884