nv50_program.c revision 34abb858e2aaef2c1a066a7cdb3e0376d6c9f6bd
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL 16 * LIT - other buggery 17 * POW 18 * SAT 19 * 20 * MSB - Like MAD, but MUL+SUB 21 * - Fuck it off, introduce a way to negate args for ops that 22 * support it. 23 * 24 * Look into inlining IMMD for ops other than MOV 25 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 26 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 27 */ 28struct nv50_reg { 29 enum { 30 P_TEMP, 31 P_ATTR, 32 P_RESULT, 33 P_CONST, 34 P_IMMD 35 } type; 36 int index; 37 38 int hw; 39 int neg; 40}; 41 42struct nv50_pc { 43 struct nv50_program *p; 44 45 /* hw resources */ 46 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 47 48 /* tgsi resources */ 49 struct nv50_reg *temp; 50 int temp_nr; 51 struct nv50_reg *attr; 52 int attr_nr; 53 struct nv50_reg *result; 54 int result_nr; 55 struct nv50_reg *param; 56 int param_nr; 57 struct nv50_reg *immd; 58 float *immd_buf; 59 int immd_nr; 60 61 struct nv50_reg *temp_temp[8]; 62 unsigned temp_temp_nr; 63}; 64 65static void 66alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 67{ 68 int i; 69 70 if (reg->type != P_TEMP) 71 return; 72 73 if (reg->hw >= 0) { 74 /*XXX: do this here too to catch FP temp-as-attr usage.. 75 * not clean, but works */ 76 if (pc->p->cfg.high_temp < (reg->hw + 1)) 77 pc->p->cfg.high_temp = reg->hw + 1; 78 return; 79 } 80 81 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 82 if (!(pc->r_temp[i])) { 83 pc->r_temp[i] = reg; 84 reg->hw = i; 85 if (pc->p->cfg.high_temp < (i + 1)) 86 pc->p->cfg.high_temp = i + 1; 87 return; 88 } 89 } 90 91 assert(0); 92} 93 94static struct nv50_reg * 95alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 96{ 97 struct nv50_reg *r; 98 int i; 99 100 if (dst && dst->type == P_TEMP && dst->hw == -1) 101 return dst; 102 103 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 104 if (!pc->r_temp[i]) { 105 r = CALLOC_STRUCT(nv50_reg); 106 r->type = P_TEMP; 107 r->index = -1; 108 r->hw = i; 109 pc->r_temp[i] = r; 110 return r; 111 } 112 } 113 114 assert(0); 115 return NULL; 116} 117 118static void 119free_temp(struct nv50_pc *pc, struct nv50_reg *r) 120{ 121 if (r->index == -1) { 122 FREE(pc->r_temp[r->hw]); 123 pc->r_temp[r->hw] = NULL; 124 } 125} 126 127static struct nv50_reg * 128temp_temp(struct nv50_pc *pc) 129{ 130 if (pc->temp_temp_nr >= 8) 131 assert(0); 132 133 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 134 return pc->temp_temp[pc->temp_temp_nr++]; 135} 136 137static void 138kill_temp_temp(struct nv50_pc *pc) 139{ 140 int i; 141 142 for (i = 0; i < pc->temp_temp_nr; i++) 143 free_temp(pc, pc->temp_temp[i]); 144 pc->temp_temp_nr = 0; 145} 146 147static int 148ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 149{ 150 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 151 sizeof(float)); 152 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 153 pc->immd_buf[(pc->immd_nr * 4) + 1] = x; 154 pc->immd_buf[(pc->immd_nr * 4) + 2] = x; 155 pc->immd_buf[(pc->immd_nr * 4) + 3] = x; 156 157 return pc->immd_nr++; 158} 159 160static struct nv50_reg * 161alloc_immd(struct nv50_pc *pc, float f) 162{ 163 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 164 unsigned hw; 165 166 hw = ctor_immd(pc, f, 0, 0, 0); 167 r->type = P_IMMD; 168 r->hw = hw; 169 r->index = -1; 170 return r; 171} 172 173static struct nv50_reg * 174tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 175{ 176 switch (dst->DstRegister.File) { 177 case TGSI_FILE_TEMPORARY: 178 return &pc->temp[dst->DstRegister.Index * 4 + c]; 179 case TGSI_FILE_OUTPUT: 180 return &pc->result[dst->DstRegister.Index * 4 + c]; 181 case TGSI_FILE_NULL: 182 return NULL; 183 default: 184 break; 185 } 186 187 return NULL; 188} 189 190static struct nv50_reg * 191tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 192{ 193 struct nv50_reg *r = NULL; 194 unsigned c; 195 196 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 197 switch (c) { 198 case TGSI_EXTSWIZZLE_X: 199 case TGSI_EXTSWIZZLE_Y: 200 case TGSI_EXTSWIZZLE_Z: 201 case TGSI_EXTSWIZZLE_W: 202 switch (src->SrcRegister.File) { 203 case TGSI_FILE_INPUT: 204 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 205 break; 206 case TGSI_FILE_TEMPORARY: 207 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 208 break; 209 case TGSI_FILE_CONSTANT: 210 r = &pc->param[src->SrcRegister.Index * 4 + c]; 211 break; 212 case TGSI_FILE_IMMEDIATE: 213 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 214 break; 215 default: 216 assert(0); 217 break; 218 } 219 break; 220 case TGSI_EXTSWIZZLE_ZERO: 221 r = alloc_immd(pc, 0.0); 222 break; 223 case TGSI_EXTSWIZZLE_ONE: 224 r = alloc_immd(pc, 1.0); 225 break; 226 default: 227 assert(0); 228 break; 229 } 230 231 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 232 case TGSI_UTIL_SIGN_KEEP: 233 break; 234 default: 235 assert(0); 236 break; 237 } 238 239 return r; 240} 241 242static void 243emit(struct nv50_pc *pc, unsigned *inst) 244{ 245 struct nv50_program *p = pc->p; 246 247 if (inst[0] & 1) { 248 p->insns_nr += 2; 249 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 250 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 251 } else { 252 p->insns_nr += 1; 253 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 254 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 255 } 256} 257 258static INLINE void set_long(struct nv50_pc *, unsigned *); 259 260static boolean 261is_long(unsigned *inst) 262{ 263 if (inst[0] & 1) 264 return TRUE; 265 return FALSE; 266} 267 268static boolean 269is_immd(unsigned *inst) 270{ 271 if (is_long(inst) && (inst[1] & 3) == 3) 272 return TRUE; 273 return FALSE; 274} 275 276static INLINE void 277set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 278{ 279 set_long(pc, inst); 280 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 281 inst[1] |= (pred << 7) | (idx << 12); 282} 283 284static INLINE void 285set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 286{ 287 set_long(pc, inst); 288 inst[1] &= ~((0x3 << 4) | (1 << 6)); 289 inst[1] |= (idx << 4) | (on << 6); 290} 291 292static INLINE void 293set_long(struct nv50_pc *pc, unsigned *inst) 294{ 295 if (is_long(inst)) 296 return; 297 298 inst[0] |= 1; 299 set_pred(pc, 0xf, 0, inst); 300 set_pred_wr(pc, 0, 0, inst); 301} 302 303static INLINE void 304set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 305{ 306 if (dst->type == P_RESULT) { 307 set_long(pc, inst); 308 inst[1] |= 0x00000008; 309 } 310 311 alloc_reg(pc, dst); 312 inst[0] |= (dst->hw << 2); 313} 314 315static INLINE void 316set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 317{ 318 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 319 320 set_long(pc, inst); 321 /*XXX: can't be predicated - bits overlap.. catch cases where both 322 * are required and avoid them. */ 323 set_pred(pc, 0, 0, inst); 324 set_pred_wr(pc, 0, 0, inst); 325 326 inst[1] |= 0x00000002 | 0x00000001; 327 inst[0] |= (val & 0x3f) << 16; 328 inst[1] |= (val >> 6) << 2; 329} 330 331static void 332emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 333 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 334{ 335 unsigned inst[2] = { 0, 0 }; 336 337 inst[0] |= 0x80000000; 338 set_dst(pc, dst, inst); 339 alloc_reg(pc, iv); 340 inst[0] |= (iv->hw << 9); 341 alloc_reg(pc, src); 342 inst[0] |= (src->hw << 16); 343 if (noperspective) 344 inst[0] |= (1 << 25); 345 346 emit(pc, inst); 347} 348 349static void 350set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 351{ 352 set_long(pc, inst); 353 if (src->type == P_IMMD) { 354 inst[1] |= (NV50_CB_PMISC << 22); 355 } else { 356 if (pc->p->type == NV50_PROG_VERTEX) 357 inst[1] |= (NV50_CB_PVP << 22); 358 else 359 inst[1] |= (NV50_CB_PFP << 22); 360 } 361} 362 363static void 364emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 365{ 366 unsigned inst[2] = { 0, 0 }; 367 368 inst[0] |= 0x10000000; 369 370 set_dst(pc, dst, inst); 371 372 if (dst->type != P_RESULT && src->type == P_IMMD) { 373 set_immd(pc, src, inst); 374 /*XXX: 32-bit, but steals part of "half" reg space - need to 375 * catch and handle this case if/when we do half-regs 376 */ 377 inst[0] |= 0x00008000; 378 } else 379 if (src->type == P_IMMD || src->type == P_CONST) { 380 set_long(pc, inst); 381 set_cseg(pc, src, inst); 382 inst[0] |= (src->hw << 9); 383 inst[1] |= 0x20000000; /* src0 const? */ 384 } else { 385 if (src->type == P_ATTR) { 386 set_long(pc, inst); 387 inst[1] |= 0x00200000; 388 } 389 390 alloc_reg(pc, src); 391 inst[0] |= (src->hw << 9); 392 } 393 394 /* We really should support "half" instructions here at some point, 395 * but I don't feel confident enough about them yet. 396 */ 397 set_long(pc, inst); 398 if (is_long(inst) && !is_immd(inst)) { 399 inst[1] |= 0x04000000; /* 32-bit */ 400 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 401 } 402 403 emit(pc, inst); 404} 405 406static boolean 407check_swap_src_0_1(struct nv50_pc *pc, 408 struct nv50_reg **s0, struct nv50_reg **s1) 409{ 410 struct nv50_reg *src0 = *s0, *src1 = *s1; 411 412 if (src0->type == P_CONST) { 413 if (src1->type != P_CONST) { 414 *s0 = src1; 415 *s1 = src0; 416 return TRUE; 417 } 418 } else 419 if (src1->type == P_ATTR) { 420 if (src0->type != P_ATTR) { 421 *s0 = src1; 422 *s1 = src0; 423 return TRUE; 424 } 425 } 426 427 return FALSE; 428} 429 430static void 431set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 432{ 433 if (src->type == P_ATTR) { 434 set_long(pc, inst); 435 inst[1] |= 0x00200000; 436 } else 437 if (src->type == P_CONST || src->type == P_IMMD) { 438 struct nv50_reg *temp = temp_temp(pc); 439 440 emit_mov(pc, temp, src); 441 src = temp; 442 } 443 444 alloc_reg(pc, src); 445 inst[0] |= (src->hw << 9); 446} 447 448static void 449set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 450{ 451 if (src->type == P_ATTR) { 452 struct nv50_reg *temp = temp_temp(pc); 453 454 emit_mov(pc, temp, src); 455 src = temp; 456 } else 457 if (src->type == P_CONST || src->type == P_IMMD) { 458 set_cseg(pc, src, inst); 459 inst[0] |= 0x00800000; 460 } 461 462 alloc_reg(pc, src); 463 inst[0] |= (src->hw << 16); 464} 465 466static void 467set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 468{ 469 set_long(pc, inst); 470 471 if (src->type == P_ATTR) { 472 struct nv50_reg *temp = temp_temp(pc); 473 474 emit_mov(pc, temp, src); 475 src = temp; 476 } else 477 if (src->type == P_CONST || src->type == P_IMMD) { 478 set_cseg(pc, src, inst); 479 inst[0] |= 0x01000000; 480 } 481 482 alloc_reg(pc, src); 483 inst[1] |= (src->hw << 14); 484} 485 486static void 487emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 488 struct nv50_reg *src1) 489{ 490 unsigned inst[2] = { 0, 0 }; 491 492 inst[0] |= 0xc0000000; 493 494 check_swap_src_0_1(pc, &src0, &src1); 495 set_dst(pc, dst, inst); 496 set_src_0(pc, src0, inst); 497 set_src_1(pc, src1, inst); 498 499 emit(pc, inst); 500} 501 502static void 503emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 504 struct nv50_reg *src0, struct nv50_reg *src1) 505{ 506 unsigned inst[2] = { 0, 0 }; 507 508 inst[0] |= 0xb0000000; 509 510 check_swap_src_0_1(pc, &src0, &src1); 511 set_dst(pc, dst, inst); 512 set_src_0(pc, src0, inst); 513 if (is_long(inst)) 514 set_src_2(pc, src1, inst); 515 else 516 set_src_1(pc, src1, inst); 517 518 emit(pc, inst); 519} 520 521static void 522emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 523 struct nv50_reg *src0, struct nv50_reg *src1) 524{ 525 unsigned inst[2] = { 0, 0 }; 526 527 set_long(pc, inst); 528 inst[0] |= 0xb0000000; 529 inst[1] |= (sub << 29); 530 531 check_swap_src_0_1(pc, &src0, &src1); 532 set_dst(pc, dst, inst); 533 set_src_0(pc, src0, inst); 534 set_src_1(pc, src1, inst); 535 536 emit(pc, inst); 537} 538 539static void 540emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 541 struct nv50_reg *src1) 542{ 543 unsigned inst[2] = { 0, 0 }; 544 545 inst[0] |= 0xb0000000; 546 547 set_long(pc, inst); 548 if (check_swap_src_0_1(pc, &src0, &src1)) 549 inst[1] |= 0x04000000; 550 else 551 inst[1] |= 0x08000000; 552 553 set_dst(pc, dst, inst); 554 set_src_0(pc, src0, inst); 555 set_src_2(pc, src1, inst); 556 557 emit(pc, inst); 558} 559 560static void 561emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 562 struct nv50_reg *src1, struct nv50_reg *src2) 563{ 564 unsigned inst[2] = { 0, 0 }; 565 566 inst[0] |= 0xe0000000; 567 568 check_swap_src_0_1(pc, &src0, &src1); 569 set_dst(pc, dst, inst); 570 set_src_0(pc, src0, inst); 571 set_src_1(pc, src1, inst); 572 set_src_2(pc, src2, inst); 573 574 emit(pc, inst); 575} 576 577static void 578emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 579 struct nv50_reg *src1, struct nv50_reg *src2) 580{ 581 unsigned inst[2] = { 0, 0 }; 582 583 inst[0] |= 0xe0000000; 584 set_long(pc, inst); 585 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 586 587 check_swap_src_0_1(pc, &src0, &src1); 588 set_dst(pc, dst, inst); 589 set_src_0(pc, src0, inst); 590 set_src_1(pc, src1, inst); 591 set_src_2(pc, src2, inst); 592 593 emit(pc, inst); 594} 595 596static void 597emit_flop(struct nv50_pc *pc, unsigned sub, 598 struct nv50_reg *dst, struct nv50_reg *src) 599{ 600 unsigned inst[2] = { 0, 0 }; 601 602 inst[0] |= 0x90000000; 603 if (sub) { 604 set_long(pc, inst); 605 inst[1] |= (sub << 29); 606 } 607 608 set_dst(pc, dst, inst); 609 set_src_0(pc, src, inst); 610 611 emit(pc, inst); 612} 613 614static void 615emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 616{ 617 unsigned inst[2] = { 0, 0 }; 618 619 inst[0] |= 0xb0000000; 620 621 set_dst(pc, dst, inst); 622 set_src_0(pc, src, inst); 623 set_long(pc, inst); 624 inst[1] |= (6 << 29) | 0x00004000; 625 626 emit(pc, inst); 627} 628/*XXX: inaccurate results.. why? */ 629#define ALLOW_SET_SWAP 0 630 631static void 632emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 633 struct nv50_reg *src0, struct nv50_reg *src1) 634{ 635 unsigned inst[2] = { 0, 0 }; 636#if ALLOW_SET_SWAP 637 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; 638#endif 639 struct nv50_reg *rdst; 640 641#if ALLOW_SET_SWAP 642 assert(c_op <= 7); 643 if (check_swap_src_0_1(pc, &src0, &src1)) 644 c_op = inv_cop[c_op]; 645#endif 646 647 rdst = dst; 648 if (dst->type != P_TEMP) 649 dst = alloc_temp(pc, NULL); 650 651 /* set.u32 */ 652 set_long(pc, inst); 653 inst[0] |= 0xb0000000; 654 inst[1] |= (3 << 29); 655 inst[1] |= (c_op << 14); 656 /*XXX: breaks things, .u32 by default? 657 * decuda will disasm as .u16 and use .lo/.hi regs, but this 658 * doesn't seem to match what the hw actually does. 659 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 660 */ 661 set_dst(pc, dst, inst); 662 set_src_0(pc, src0, inst); 663 set_src_1(pc, src1, inst); 664 emit(pc, inst); 665 666 /* cvt.f32.u32 */ 667 inst[0] = 0xa0000001; 668 inst[1] = 0x64014780; 669 set_dst(pc, rdst, inst); 670 set_src_0(pc, dst, inst); 671 emit(pc, inst); 672 673 if (dst != rdst) 674 free_temp(pc, dst); 675} 676 677static void 678emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 679{ 680 unsigned inst[2] = { 0, 0 }; 681 682 set_long(pc, inst); 683 inst[0] = 0xa0000000; /* cvt */ 684 inst[1] |= (6 << 29); /* cvt */ 685 inst[1] |= 0x08000000; /* integer mode */ 686 inst[1] |= 0x04000000; /* 32 bit */ 687 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 688 inst[1] |= (1 << 14); /* src .f32 */ 689 set_dst(pc, dst, inst); 690 set_src_0(pc, src, inst); 691 692 emit(pc, inst); 693} 694 695static boolean 696nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 697{ 698 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 699 struct nv50_reg *dst[4], *src[3][4], *temp; 700 unsigned mask; 701 int i, c; 702 703 NOUVEAU_ERR("insn %p\n", tok); 704 705 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 706 707 for (c = 0; c < 4; c++) { 708 if (mask & (1 << c)) 709 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 710 else 711 dst[c] = NULL; 712 } 713 714 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 715 for (c = 0; c < 4; c++) 716 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 717 } 718 719 switch (inst->Instruction.Opcode) { 720 case TGSI_OPCODE_ABS: 721 for (c = 0; c < 4; c++) { 722 unsigned inst[2] = { 0, 0 }; 723 724 set_long(pc, inst); 725 inst[0] = 0xa0000000; /* cvt */ 726 inst[1] |= (6 << 29); /* cvt */ 727 inst[1] |= 0x04000000; /* 32 bit */ 728 inst[1] |= (1 << 14); /* src .f32 */ 729 inst[1] |= ((1 << 6) << 14); /* .abs */ 730 set_dst(pc, dst[c], inst); 731 set_src_0(pc, src[0][c], inst); 732 emit(pc, inst); 733 } 734 break; 735 case TGSI_OPCODE_ADD: 736 for (c = 0; c < 4; c++) { 737 if (!(mask & (1 << c))) 738 continue; 739 emit_add(pc, dst[c], src[0][c], src[1][c]); 740 } 741 break; 742 case TGSI_OPCODE_COS: 743 for (c = 0; c < 4; c++) { 744 if (!(mask & (1 << c))) 745 continue; 746 emit_flop(pc, 5, dst[c], src[0][c]); 747 } 748 break; 749 case TGSI_OPCODE_DP3: 750 temp = alloc_temp(pc, NULL); 751 emit_mul(pc, temp, src[0][0], src[1][0]); 752 emit_mad(pc, temp, src[0][1], src[1][1], temp); 753 emit_mad(pc, temp, src[0][2], src[1][2], temp); 754 for (c = 0; c < 4; c++) { 755 if (!(mask & (1 << c))) 756 continue; 757 emit_mov(pc, dst[c], temp); 758 } 759 free_temp(pc, temp); 760 break; 761 case TGSI_OPCODE_DP4: 762 temp = alloc_temp(pc, NULL); 763 emit_mul(pc, temp, src[0][0], src[1][0]); 764 emit_mad(pc, temp, src[0][1], src[1][1], temp); 765 emit_mad(pc, temp, src[0][2], src[1][2], temp); 766 emit_mad(pc, temp, src[0][3], src[1][3], temp); 767 for (c = 0; c < 4; c++) { 768 if (!(mask & (1 << c))) 769 continue; 770 emit_mov(pc, dst[c], temp); 771 } 772 free_temp(pc, temp); 773 break; 774 case TGSI_OPCODE_DPH: 775 temp = alloc_temp(pc, NULL); 776 emit_mul(pc, temp, src[0][0], src[1][0]); 777 emit_mad(pc, temp, src[0][1], src[1][1], temp); 778 emit_mad(pc, temp, src[0][2], src[1][2], temp); 779 emit_add(pc, temp, src[1][3], temp); 780 for (c = 0; c < 4; c++) { 781 if (!(mask & (1 << c))) 782 continue; 783 emit_mov(pc, dst[c], temp); 784 } 785 free_temp(pc, temp); 786 break; 787 case TGSI_OPCODE_DST: 788 { 789 struct nv50_reg *one = alloc_immd(pc, 1.0); 790 emit_mov(pc, dst[0], one); 791 emit_mul(pc, dst[1], src[0][1], src[1][1]); 792 emit_mov(pc, dst[2], src[0][2]); 793 emit_mov(pc, dst[3], src[1][3]); 794 FREE(one); 795 } 796 break; 797 case TGSI_OPCODE_EX2: 798 temp = alloc_temp(pc, NULL); 799 for (c = 0; c < 4; c++) { 800 if (!(mask & (1 << c))) 801 continue; 802 emit_preex2(pc, temp, src[0][c]); 803 emit_flop(pc, 6, dst[c], temp); 804 } 805 free_temp(pc, temp); 806 break; 807 case TGSI_OPCODE_FLR: 808 for (c = 0; c < 4; c++) { 809 if (!(mask & (1 << c))) 810 continue; 811 emit_flr(pc, dst[c], src[0][c]); 812 } 813 break; 814 case TGSI_OPCODE_FRC: 815 temp = alloc_temp(pc, NULL); 816 for (c = 0; c < 4; c++) { 817 if (!(mask & (1 << c))) 818 continue; 819 emit_flr(pc, temp, src[0][c]); 820 emit_sub(pc, dst[c], src[0][c], temp); 821 } 822 free_temp(pc, temp); 823 break; 824 case TGSI_OPCODE_LG2: 825 for (c = 0; c < 4; c++) { 826 if (!(mask & (1 << c))) 827 continue; 828 emit_flop(pc, 3, dst[c], src[0][c]); 829 } 830 break; 831 case TGSI_OPCODE_MAD: 832 for (c = 0; c < 4; c++) { 833 if (!(mask & (1 << c))) 834 continue; 835 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 836 } 837 break; 838 case TGSI_OPCODE_MAX: 839 for (c = 0; c < 4; c++) { 840 if (!(mask & (1 << c))) 841 continue; 842 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 843 } 844 break; 845 case TGSI_OPCODE_MIN: 846 for (c = 0; c < 4; c++) { 847 if (!(mask & (1 << c))) 848 continue; 849 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 850 } 851 break; 852 case TGSI_OPCODE_MOV: 853 for (c = 0; c < 4; c++) { 854 if (!(mask & (1 << c))) 855 continue; 856 emit_mov(pc, dst[c], src[0][c]); 857 } 858 break; 859 case TGSI_OPCODE_MUL: 860 for (c = 0; c < 4; c++) { 861 if (!(mask & (1 << c))) 862 continue; 863 emit_mul(pc, dst[c], src[0][c], src[1][c]); 864 } 865 break; 866 case TGSI_OPCODE_RCP: 867 for (c = 0; c < 4; c++) { 868 if (!(mask & (1 << c))) 869 continue; 870 emit_flop(pc, 0, dst[c], src[0][c]); 871 } 872 break; 873 case TGSI_OPCODE_RSQ: 874 for (c = 0; c < 4; c++) { 875 if (!(mask & (1 << c))) 876 continue; 877 emit_flop(pc, 2, dst[c], src[0][c]); 878 } 879 break; 880 case TGSI_OPCODE_SGE: 881 for (c = 0; c < 4; c++) { 882 if (!(mask & (1 << c))) 883 continue; 884 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 885 } 886 break; 887 case TGSI_OPCODE_SIN: 888 for (c = 0; c < 4; c++) { 889 if (!(mask & (1 << c))) 890 continue; 891 emit_flop(pc, 4, dst[c], src[0][c]); 892 } 893 break; 894 case TGSI_OPCODE_SLT: 895 for (c = 0; c < 4; c++) { 896 if (!(mask & (1 << c))) 897 continue; 898 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 899 } 900 break; 901 case TGSI_OPCODE_SUB: 902 for (c = 0; c < 4; c++) { 903 if (!(mask & (1 << c))) 904 continue; 905 emit_sub(pc, dst[c], src[0][c], src[1][c]); 906 } 907 break; 908 case TGSI_OPCODE_XPD: 909 temp = alloc_temp(pc, NULL); 910 emit_mul(pc, temp, src[0][2], src[1][1]); 911 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 912 emit_mul(pc, temp, src[0][0], src[1][2]); 913 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 914 emit_mul(pc, temp, src[0][1], src[1][0]); 915 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 916 free_temp(pc, temp); 917 break; 918 case TGSI_OPCODE_END: 919 break; 920 default: 921 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 922 return FALSE; 923 } 924 925 kill_temp_temp(pc); 926 return TRUE; 927} 928 929static boolean 930nv50_program_tx_prep(struct nv50_pc *pc) 931{ 932 struct tgsi_parse_context p; 933 boolean ret = FALSE; 934 unsigned i, c; 935 936 tgsi_parse_init(&p, pc->p->pipe.tokens); 937 while (!tgsi_parse_end_of_tokens(&p)) { 938 const union tgsi_full_token *tok = &p.FullToken; 939 940 tgsi_parse_token(&p); 941 switch (tok->Token.Type) { 942 case TGSI_TOKEN_TYPE_IMMEDIATE: 943 { 944 const struct tgsi_full_immediate *imm = 945 &p.FullToken.FullImmediate; 946 947 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 948 imm->u.ImmediateFloat32[1].Float, 949 imm->u.ImmediateFloat32[2].Float, 950 imm->u.ImmediateFloat32[3].Float); 951 } 952 break; 953 case TGSI_TOKEN_TYPE_DECLARATION: 954 { 955 const struct tgsi_full_declaration *d; 956 unsigned last; 957 958 d = &p.FullToken.FullDeclaration; 959 last = d->u.DeclarationRange.Last; 960 961 switch (d->Declaration.File) { 962 case TGSI_FILE_TEMPORARY: 963 if (pc->temp_nr < (last + 1)) 964 pc->temp_nr = last + 1; 965 break; 966 case TGSI_FILE_OUTPUT: 967 if (pc->result_nr < (last + 1)) 968 pc->result_nr = last + 1; 969 break; 970 case TGSI_FILE_INPUT: 971 if (pc->attr_nr < (last + 1)) 972 pc->attr_nr = last + 1; 973 break; 974 case TGSI_FILE_CONSTANT: 975 if (pc->param_nr < (last + 1)) 976 pc->param_nr = last + 1; 977 break; 978 default: 979 NOUVEAU_ERR("bad decl file %d\n", 980 d->Declaration.File); 981 goto out_err; 982 } 983 } 984 break; 985 case TGSI_TOKEN_TYPE_INSTRUCTION: 986 break; 987 default: 988 break; 989 } 990 } 991 992 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 993 if (pc->temp_nr) { 994 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 995 if (!pc->temp) 996 goto out_err; 997 998 for (i = 0; i < pc->temp_nr; i++) { 999 for (c = 0; c < 4; c++) { 1000 pc->temp[i*4+c].type = P_TEMP; 1001 pc->temp[i*4+c].hw = -1; 1002 pc->temp[i*4+c].index = i; 1003 } 1004 } 1005 } 1006 1007 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1008 if (pc->attr_nr) { 1009 struct nv50_reg *iv = NULL, *tmp = NULL; 1010 int aid = 0; 1011 1012 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1013 if (!pc->attr) 1014 goto out_err; 1015 1016 if (pc->p->type == NV50_PROG_FRAGMENT) { 1017 iv = alloc_temp(pc, NULL); 1018 aid++; 1019 } 1020 1021 for (i = 0; i < pc->attr_nr; i++) { 1022 struct nv50_reg *a = &pc->attr[i*4]; 1023 1024 for (c = 0; c < 4; c++) { 1025 if (pc->p->type == NV50_PROG_FRAGMENT) { 1026 struct nv50_reg *at = 1027 alloc_temp(pc, NULL); 1028 pc->attr[i*4+c].type = at->type; 1029 pc->attr[i*4+c].hw = at->hw; 1030 pc->attr[i*4+c].index = at->index; 1031 } else { 1032 pc->p->cfg.vp.attr[aid/32] |= 1033 (1 << (aid % 32)); 1034 pc->attr[i*4+c].type = P_ATTR; 1035 pc->attr[i*4+c].hw = aid++; 1036 pc->attr[i*4+c].index = i; 1037 } 1038 } 1039 1040 if (pc->p->type != NV50_PROG_FRAGMENT) 1041 continue; 1042 1043 emit_interp(pc, iv, iv, iv, FALSE); 1044 tmp = alloc_temp(pc, NULL); 1045 { 1046 unsigned inst[2] = { 0, 0 }; 1047 inst[0] = 0x90000000; 1048 inst[0] |= (tmp->hw << 2); 1049 emit(pc, inst); 1050 } 1051 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1052 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1053 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1054 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1055 free_temp(pc, tmp); 1056 } 1057 1058 if (iv) 1059 free_temp(pc, iv); 1060 } 1061 1062 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1063 if (pc->result_nr) { 1064 int rid = 0; 1065 1066 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1067 if (!pc->result) 1068 goto out_err; 1069 1070 for (i = 0; i < pc->result_nr; i++) { 1071 for (c = 0; c < 4; c++) { 1072 if (pc->p->type == NV50_PROG_FRAGMENT) 1073 pc->result[i*4+c].type = P_TEMP; 1074 else 1075 pc->result[i*4+c].type = P_RESULT; 1076 pc->result[i*4+c].hw = rid++; 1077 pc->result[i*4+c].index = i; 1078 } 1079 } 1080 } 1081 1082 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1083 if (pc->param_nr) { 1084 int rid = 0; 1085 1086 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1087 if (!pc->param) 1088 goto out_err; 1089 1090 for (i = 0; i < pc->param_nr; i++) { 1091 for (c = 0; c < 4; c++) { 1092 pc->param[i*4+c].type = P_CONST; 1093 pc->param[i*4+c].hw = rid++; 1094 pc->param[i*4+c].index = i; 1095 } 1096 } 1097 } 1098 1099 if (pc->immd_nr) { 1100 int rid = 0; 1101 1102 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1103 if (!pc->immd) 1104 goto out_err; 1105 1106 for (i = 0; i < pc->immd_nr; i++) { 1107 for (c = 0; c < 4; c++) { 1108 pc->immd[i*4+c].type = P_IMMD; 1109 pc->immd[i*4+c].hw = rid++; 1110 pc->immd[i*4+c].index = i; 1111 } 1112 } 1113 } 1114 1115 ret = TRUE; 1116out_err: 1117 tgsi_parse_free(&p); 1118 return ret; 1119} 1120 1121static boolean 1122nv50_program_tx(struct nv50_program *p) 1123{ 1124 struct tgsi_parse_context parse; 1125 struct nv50_pc *pc; 1126 boolean ret; 1127 1128 pc = CALLOC_STRUCT(nv50_pc); 1129 if (!pc) 1130 return FALSE; 1131 pc->p = p; 1132 pc->p->cfg.high_temp = 4; 1133 1134 ret = nv50_program_tx_prep(pc); 1135 if (ret == FALSE) 1136 goto out_cleanup; 1137 1138 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1139 while (!tgsi_parse_end_of_tokens(&parse)) { 1140 const union tgsi_full_token *tok = &parse.FullToken; 1141 1142 tgsi_parse_token(&parse); 1143 1144 switch (tok->Token.Type) { 1145 case TGSI_TOKEN_TYPE_INSTRUCTION: 1146 ret = nv50_program_tx_insn(pc, tok); 1147 if (ret == FALSE) 1148 goto out_err; 1149 break; 1150 default: 1151 break; 1152 } 1153 } 1154 1155 p->immd_nr = pc->immd_nr * 4; 1156 p->immd = pc->immd_buf; 1157 1158out_err: 1159 tgsi_parse_free(&parse); 1160 1161out_cleanup: 1162 return ret; 1163} 1164 1165static void 1166nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1167{ 1168 int i; 1169 1170 if (nv50_program_tx(p) == FALSE) 1171 assert(0); 1172 /* *not* sufficient, it's fine if last inst is long and 1173 * NOT immd - otherwise it's fucked fucked fucked */ 1174 p->insns[p->insns_nr - 1] |= 0x00000001; 1175 1176 if (p->type == NV50_PROG_VERTEX) { 1177 for (i = 0; i < p->insns_nr; i++) 1178 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1179 } else { 1180 for (i = 0; i < p->insns_nr; i++) 1181 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1182 } 1183 1184 p->translated = TRUE; 1185} 1186 1187static void 1188nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1189{ 1190 int i; 1191 1192 for (i = 0; i < p->immd_nr; i++) { 1193 BEGIN_RING(tesla, 0x0f00, 2); 1194 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 1195 OUT_RING (fui(p->immd[i])); 1196 } 1197} 1198 1199static void 1200nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1201{ 1202 struct pipe_winsys *ws = nv50->pipe.winsys; 1203 void *map; 1204 1205 if (!p->buffer) 1206 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1207 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1208 memcpy(map, p->insns, p->insns_nr * 4); 1209 ws->buffer_unmap(ws, p->buffer); 1210} 1211 1212void 1213nv50_vertprog_validate(struct nv50_context *nv50) 1214{ 1215 struct nouveau_grobj *tesla = nv50->screen->tesla; 1216 struct nv50_program *p = nv50->vertprog; 1217 struct nouveau_stateobj *so; 1218 1219 if (!p->translated) { 1220 nv50_program_validate(nv50, p); 1221 if (!p->translated) 1222 assert(0); 1223 } 1224 1225 nv50_program_validate_data(nv50, p); 1226 nv50_program_validate_code(nv50, p); 1227 1228 so = so_new(11, 2); 1229 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1230 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1231 NOUVEAU_BO_HIGH, 0, 0); 1232 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1233 NOUVEAU_BO_LOW, 0, 0); 1234 so_method(so, tesla, 0x1650, 2); 1235 so_data (so, p->cfg.vp.attr[0]); 1236 so_data (so, p->cfg.vp.attr[1]); 1237 so_method(so, tesla, 0x16ac, 2); 1238 so_data (so, 8); 1239 so_data (so, p->cfg.high_temp); 1240 so_method(so, tesla, 0x140c, 1); 1241 so_data (so, 0); /* program start offset */ 1242 so_emit(nv50->screen->nvws, so); 1243 so_ref(NULL, &so); 1244} 1245 1246void 1247nv50_fragprog_validate(struct nv50_context *nv50) 1248{ 1249 struct nouveau_grobj *tesla = nv50->screen->tesla; 1250 struct nv50_program *p = nv50->fragprog; 1251 struct nouveau_stateobj *so; 1252 1253 if (!p->translated) { 1254 nv50_program_validate(nv50, p); 1255 if (!p->translated) 1256 assert(0); 1257 } 1258 1259 nv50_program_validate_data(nv50, p); 1260 nv50_program_validate_code(nv50, p); 1261 1262 so = so_new(7, 2); 1263 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1264 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1265 NOUVEAU_BO_HIGH, 0, 0); 1266 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1267 NOUVEAU_BO_LOW, 0, 0); 1268 so_method(so, tesla, 0x198c, 1); 1269 so_data (so, p->cfg.high_temp); 1270 so_method(so, tesla, 0x1414, 1); 1271 so_data (so, 0); /* program start offset */ 1272 so_emit(nv50->screen->nvws, so); 1273 so_ref(NULL, &so); 1274} 1275 1276void 1277nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1278{ 1279 struct pipe_winsys *ws = nv50->pipe.winsys; 1280 1281 if (p->insns_nr) { 1282 if (p->insns) 1283 FREE(p->insns); 1284 p->insns_nr = 0; 1285 } 1286 1287 if (p->buffer) 1288 pipe_buffer_reference(ws, &p->buffer, NULL); 1289 1290 p->translated = 0; 1291} 1292 1293