nv50_program.c revision 33e4d30d50344be26398a51365bea1be37487403
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL 16 * LIT - other buggery 17 * POW 18 * SWZ - negation ARGH 19 * SAT 20 * 21 * MSB - Like MAD, but MUL+SUB 22 * - Fuck it off, introduce a way to negate args for ops that 23 * support it. 24 * 25 * Look into inlining IMMD for ops other than MOV 26 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 27 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 28 */ 29struct nv50_reg { 30 enum { 31 P_TEMP, 32 P_ATTR, 33 P_RESULT, 34 P_CONST, 35 P_IMMD 36 } type; 37 int index; 38 39 int hw; 40 int neg; 41}; 42 43struct nv50_pc { 44 struct nv50_program *p; 45 46 /* hw resources */ 47 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 48 49 /* tgsi resources */ 50 struct nv50_reg *temp; 51 int temp_nr; 52 struct nv50_reg *attr; 53 int attr_nr; 54 struct nv50_reg *result; 55 int result_nr; 56 struct nv50_reg *param; 57 int param_nr; 58 struct nv50_reg *immd; 59 float *immd_buf; 60 int immd_nr; 61 62 struct nv50_reg *temp_temp[8]; 63 unsigned temp_temp_nr; 64}; 65 66static void 67alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 68{ 69 int i; 70 71 if (reg->type != P_TEMP) 72 return; 73 74 if (reg->hw >= 0) { 75 /*XXX: do this here too to catch FP temp-as-attr usage.. 76 * not clean, but works */ 77 if (pc->p->cfg.high_temp < (reg->hw + 1)) 78 pc->p->cfg.high_temp = reg->hw + 1; 79 return; 80 } 81 82 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 83 if (!(pc->r_temp[i])) { 84 pc->r_temp[i] = reg; 85 reg->hw = i; 86 if (pc->p->cfg.high_temp < (i + 1)) 87 pc->p->cfg.high_temp = i + 1; 88 return; 89 } 90 } 91 92 assert(0); 93} 94 95static struct nv50_reg * 96alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 97{ 98 struct nv50_reg *r; 99 int i; 100 101 if (dst && dst->type == P_TEMP && dst->hw == -1) 102 return dst; 103 104 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 105 if (!pc->r_temp[i]) { 106 r = CALLOC_STRUCT(nv50_reg); 107 r->type = P_TEMP; 108 r->index = -1; 109 r->hw = i; 110 pc->r_temp[i] = r; 111 return r; 112 } 113 } 114 115 assert(0); 116 return NULL; 117} 118 119static void 120free_temp(struct nv50_pc *pc, struct nv50_reg *r) 121{ 122 if (r->index == -1) { 123 FREE(pc->r_temp[r->hw]); 124 pc->r_temp[r->hw] = NULL; 125 } 126} 127 128static struct nv50_reg * 129temp_temp(struct nv50_pc *pc) 130{ 131 if (pc->temp_temp_nr >= 8) 132 assert(0); 133 134 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 135 return pc->temp_temp[pc->temp_temp_nr++]; 136} 137 138static void 139kill_temp_temp(struct nv50_pc *pc) 140{ 141 int i; 142 143 for (i = 0; i < pc->temp_temp_nr; i++) 144 free_temp(pc, pc->temp_temp[i]); 145 pc->temp_temp_nr = 0; 146} 147 148static int 149ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 150{ 151 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 152 sizeof(float)); 153 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 154 pc->immd_buf[(pc->immd_nr * 4) + 1] = x; 155 pc->immd_buf[(pc->immd_nr * 4) + 2] = x; 156 pc->immd_buf[(pc->immd_nr * 4) + 3] = x; 157 158 return pc->immd_nr++; 159} 160 161static struct nv50_reg * 162alloc_immd(struct nv50_pc *pc, float f) 163{ 164 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 165 unsigned hw; 166 167 hw = ctor_immd(pc, f, 0, 0, 0); 168 r->type = P_IMMD; 169 r->hw = hw; 170 r->index = -1; 171 return r; 172} 173 174static struct nv50_reg * 175tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 176{ 177 switch (dst->DstRegister.File) { 178 case TGSI_FILE_TEMPORARY: 179 return &pc->temp[dst->DstRegister.Index * 4 + c]; 180 case TGSI_FILE_OUTPUT: 181 return &pc->result[dst->DstRegister.Index * 4 + c]; 182 case TGSI_FILE_NULL: 183 return NULL; 184 default: 185 break; 186 } 187 188 return NULL; 189} 190 191static struct nv50_reg * 192tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) 193{ 194 /* Handle swizzling */ 195 switch (c) { 196 case 0: c = src->SrcRegister.SwizzleX; break; 197 case 1: c = src->SrcRegister.SwizzleY; break; 198 case 2: c = src->SrcRegister.SwizzleZ; break; 199 case 3: c = src->SrcRegister.SwizzleW; break; 200 default: 201 assert(0); 202 } 203 204 switch (src->SrcRegister.File) { 205 case TGSI_FILE_INPUT: 206 return &pc->attr[src->SrcRegister.Index * 4 + c]; 207 case TGSI_FILE_TEMPORARY: 208 return &pc->temp[src->SrcRegister.Index * 4 + c]; 209 case TGSI_FILE_CONSTANT: 210 return &pc->param[src->SrcRegister.Index * 4 + c]; 211 case TGSI_FILE_IMMEDIATE: 212 return &pc->immd[src->SrcRegister.Index * 4 + c]; 213 default: 214 break; 215 } 216 217 return NULL; 218} 219 220static void 221emit(struct nv50_pc *pc, unsigned *inst) 222{ 223 struct nv50_program *p = pc->p; 224 225 if (inst[0] & 1) { 226 p->insns_nr += 2; 227 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 228 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 229 } else { 230 p->insns_nr += 1; 231 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 232 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 233 } 234} 235 236static INLINE void set_long(struct nv50_pc *, unsigned *); 237 238static boolean 239is_long(unsigned *inst) 240{ 241 if (inst[0] & 1) 242 return TRUE; 243 return FALSE; 244} 245 246static boolean 247is_immd(unsigned *inst) 248{ 249 if (is_long(inst) && (inst[1] & 3) == 3) 250 return TRUE; 251 return FALSE; 252} 253 254static INLINE void 255set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 256{ 257 set_long(pc, inst); 258 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 259 inst[1] |= (pred << 7) | (idx << 12); 260} 261 262static INLINE void 263set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 264{ 265 set_long(pc, inst); 266 inst[1] &= ~((0x3 << 4) | (1 << 6)); 267 inst[1] |= (idx << 4) | (on << 6); 268} 269 270static INLINE void 271set_long(struct nv50_pc *pc, unsigned *inst) 272{ 273 if (is_long(inst)) 274 return; 275 276 inst[0] |= 1; 277 set_pred(pc, 0xf, 0, inst); 278 set_pred_wr(pc, 0, 0, inst); 279} 280 281static INLINE void 282set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 283{ 284 if (dst->type == P_RESULT) { 285 set_long(pc, inst); 286 inst[1] |= 0x00000008; 287 } 288 289 alloc_reg(pc, dst); 290 inst[0] |= (dst->hw << 2); 291} 292 293static INLINE void 294set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 295{ 296 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 297 298 set_long(pc, inst); 299 /*XXX: can't be predicated - bits overlap.. catch cases where both 300 * are required and avoid them. */ 301 set_pred(pc, 0, 0, inst); 302 set_pred_wr(pc, 0, 0, inst); 303 304 inst[1] |= 0x00000002 | 0x00000001; 305 inst[0] |= (val & 0x3f) << 16; 306 inst[1] |= (val >> 6) << 2; 307} 308 309static void 310emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 311 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 312{ 313 unsigned inst[2] = { 0, 0 }; 314 315 inst[0] |= 0x80000000; 316 set_dst(pc, dst, inst); 317 alloc_reg(pc, iv); 318 inst[0] |= (iv->hw << 9); 319 alloc_reg(pc, src); 320 inst[0] |= (src->hw << 16); 321 if (noperspective) 322 inst[0] |= (1 << 25); 323 324 emit(pc, inst); 325} 326 327static void 328set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 329{ 330 set_long(pc, inst); 331 if (src->type == P_IMMD) { 332 inst[1] |= (NV50_CB_PMISC << 22); 333 } else { 334 if (pc->p->type == NV50_PROG_VERTEX) 335 inst[1] |= (NV50_CB_PVP << 22); 336 else 337 inst[1] |= (NV50_CB_PFP << 22); 338 } 339} 340 341static void 342emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 343{ 344 unsigned inst[2] = { 0, 0 }; 345 346 inst[0] |= 0x10000000; 347 348 set_dst(pc, dst, inst); 349 350 if (dst->type != P_RESULT && src->type == P_IMMD) { 351 set_immd(pc, src, inst); 352 /*XXX: 32-bit, but steals part of "half" reg space - need to 353 * catch and handle this case if/when we do half-regs 354 */ 355 inst[0] |= 0x00008000; 356 } else 357 if (src->type == P_IMMD || src->type == P_CONST) { 358 set_long(pc, inst); 359 set_cseg(pc, src, inst); 360 inst[0] |= (src->hw << 9); 361 inst[1] |= 0x20000000; /* src0 const? */ 362 } else { 363 if (src->type == P_ATTR) { 364 set_long(pc, inst); 365 inst[1] |= 0x00200000; 366 } 367 368 alloc_reg(pc, src); 369 inst[0] |= (src->hw << 9); 370 } 371 372 /* We really should support "half" instructions here at some point, 373 * but I don't feel confident enough about them yet. 374 */ 375 set_long(pc, inst); 376 if (is_long(inst) && !is_immd(inst)) { 377 inst[1] |= 0x04000000; /* 32-bit */ 378 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 379 } 380 381 emit(pc, inst); 382} 383 384static boolean 385check_swap_src_0_1(struct nv50_pc *pc, 386 struct nv50_reg **s0, struct nv50_reg **s1) 387{ 388 struct nv50_reg *src0 = *s0, *src1 = *s1; 389 390 if (src0->type == P_CONST) { 391 if (src1->type != P_CONST) { 392 *s0 = src1; 393 *s1 = src0; 394 return TRUE; 395 } 396 } else 397 if (src1->type == P_ATTR) { 398 if (src0->type != P_ATTR) { 399 *s0 = src1; 400 *s1 = src0; 401 return TRUE; 402 } 403 } 404 405 return FALSE; 406} 407 408static void 409set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 410{ 411 if (src->type == P_ATTR) { 412 set_long(pc, inst); 413 inst[1] |= 0x00200000; 414 } else 415 if (src->type == P_CONST || src->type == P_IMMD) { 416 struct nv50_reg *temp = temp_temp(pc); 417 418 emit_mov(pc, temp, src); 419 src = temp; 420 } 421 422 alloc_reg(pc, src); 423 inst[0] |= (src->hw << 9); 424} 425 426static void 427set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 428{ 429 if (src->type == P_ATTR) { 430 struct nv50_reg *temp = temp_temp(pc); 431 432 emit_mov(pc, temp, src); 433 src = temp; 434 } else 435 if (src->type == P_CONST || src->type == P_IMMD) { 436 set_cseg(pc, src, inst); 437 inst[0] |= 0x00800000; 438 } 439 440 alloc_reg(pc, src); 441 inst[0] |= (src->hw << 16); 442} 443 444static void 445set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 446{ 447 set_long(pc, inst); 448 449 if (src->type == P_ATTR) { 450 struct nv50_reg *temp = temp_temp(pc); 451 452 emit_mov(pc, temp, src); 453 src = temp; 454 } else 455 if (src->type == P_CONST || src->type == P_IMMD) { 456 set_cseg(pc, src, inst); 457 inst[0] |= 0x01000000; 458 } 459 460 alloc_reg(pc, src); 461 inst[1] |= (src->hw << 14); 462} 463 464static void 465emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 466 struct nv50_reg *src1) 467{ 468 unsigned inst[2] = { 0, 0 }; 469 470 inst[0] |= 0xc0000000; 471 472 check_swap_src_0_1(pc, &src0, &src1); 473 set_dst(pc, dst, inst); 474 set_src_0(pc, src0, inst); 475 set_src_1(pc, src1, inst); 476 477 emit(pc, inst); 478} 479 480static void 481emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 482 struct nv50_reg *src0, struct nv50_reg *src1) 483{ 484 unsigned inst[2] = { 0, 0 }; 485 486 inst[0] |= 0xb0000000; 487 488 check_swap_src_0_1(pc, &src0, &src1); 489 set_dst(pc, dst, inst); 490 set_src_0(pc, src0, inst); 491 if (is_long(inst)) 492 set_src_2(pc, src1, inst); 493 else 494 set_src_1(pc, src1, inst); 495 496 emit(pc, inst); 497} 498 499static void 500emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 501 struct nv50_reg *src0, struct nv50_reg *src1) 502{ 503 unsigned inst[2] = { 0, 0 }; 504 505 set_long(pc, inst); 506 inst[0] |= 0xb0000000; 507 inst[1] |= (sub << 29); 508 509 check_swap_src_0_1(pc, &src0, &src1); 510 set_dst(pc, dst, inst); 511 set_src_0(pc, src0, inst); 512 set_src_1(pc, src1, inst); 513 514 emit(pc, inst); 515} 516 517static void 518emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 519 struct nv50_reg *src1) 520{ 521 unsigned inst[2] = { 0, 0 }; 522 523 inst[0] |= 0xb0000000; 524 525 set_long(pc, inst); 526 if (check_swap_src_0_1(pc, &src0, &src1)) 527 inst[1] |= 0x04000000; 528 else 529 inst[1] |= 0x08000000; 530 531 set_dst(pc, dst, inst); 532 set_src_0(pc, src0, inst); 533 set_src_2(pc, src1, inst); 534 535 emit(pc, inst); 536} 537 538static void 539emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 540 struct nv50_reg *src1, struct nv50_reg *src2) 541{ 542 unsigned inst[2] = { 0, 0 }; 543 544 inst[0] |= 0xe0000000; 545 546 check_swap_src_0_1(pc, &src0, &src1); 547 set_dst(pc, dst, inst); 548 set_src_0(pc, src0, inst); 549 set_src_1(pc, src1, inst); 550 set_src_2(pc, src2, inst); 551 552 emit(pc, inst); 553} 554 555static void 556emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 557 struct nv50_reg *src1, struct nv50_reg *src2) 558{ 559 unsigned inst[2] = { 0, 0 }; 560 561 inst[0] |= 0xe0000000; 562 set_long(pc, inst); 563 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 564 565 check_swap_src_0_1(pc, &src0, &src1); 566 set_dst(pc, dst, inst); 567 set_src_0(pc, src0, inst); 568 set_src_1(pc, src1, inst); 569 set_src_2(pc, src2, inst); 570 571 emit(pc, inst); 572} 573 574static void 575emit_flop(struct nv50_pc *pc, unsigned sub, 576 struct nv50_reg *dst, struct nv50_reg *src) 577{ 578 unsigned inst[2] = { 0, 0 }; 579 580 inst[0] |= 0x90000000; 581 if (sub) { 582 set_long(pc, inst); 583 inst[1] |= (sub << 29); 584 } 585 586 set_dst(pc, dst, inst); 587 set_src_0(pc, src, inst); 588 589 emit(pc, inst); 590} 591 592static void 593emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 594{ 595 unsigned inst[2] = { 0, 0 }; 596 597 inst[0] |= 0xb0000000; 598 599 set_dst(pc, dst, inst); 600 set_src_0(pc, src, inst); 601 set_long(pc, inst); 602 inst[1] |= (6 << 29) | 0x00004000; 603 604 emit(pc, inst); 605} 606/*XXX: inaccurate results.. why? */ 607#define ALLOW_SET_SWAP 0 608 609static void 610emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 611 struct nv50_reg *src0, struct nv50_reg *src1) 612{ 613 unsigned inst[2] = { 0, 0 }; 614#if ALLOW_SET_SWAP 615 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; 616#endif 617 struct nv50_reg *rdst; 618 619#if ALLOW_SET_SWAP 620 assert(c_op <= 7); 621 if (check_swap_src_0_1(pc, &src0, &src1)) 622 c_op = inv_cop[c_op]; 623#endif 624 625 rdst = dst; 626 if (dst->type != P_TEMP) 627 dst = alloc_temp(pc, NULL); 628 629 /* set.u32 */ 630 set_long(pc, inst); 631 inst[0] |= 0xb0000000; 632 inst[1] |= (3 << 29); 633 inst[1] |= (c_op << 14); 634 /*XXX: breaks things, .u32 by default? 635 * decuda will disasm as .u16 and use .lo/.hi regs, but this 636 * doesn't seem to match what the hw actually does. 637 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 638 */ 639 set_dst(pc, dst, inst); 640 set_src_0(pc, src0, inst); 641 set_src_1(pc, src1, inst); 642 emit(pc, inst); 643 644 /* cvt.f32.u32 */ 645 inst[0] = 0xa0000001; 646 inst[1] = 0x64014780; 647 set_dst(pc, rdst, inst); 648 set_src_0(pc, dst, inst); 649 emit(pc, inst); 650 651 if (dst != rdst) 652 free_temp(pc, dst); 653} 654 655static void 656emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 657{ 658 unsigned inst[2] = { 0, 0 }; 659 660 set_long(pc, inst); 661 inst[0] = 0xa0000000; /* cvt */ 662 inst[1] |= (6 << 29); /* cvt */ 663 inst[1] |= 0x08000000; /* integer mode */ 664 inst[1] |= 0x04000000; /* 32 bit */ 665 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 666 inst[1] |= (1 << 14); /* src .f32 */ 667 set_dst(pc, dst, inst); 668 set_src_0(pc, src, inst); 669 670 emit(pc, inst); 671} 672 673static boolean 674nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 675{ 676 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 677 struct nv50_reg *dst[4], *src[3][4], *temp; 678 unsigned mask; 679 int i, c; 680 681 NOUVEAU_ERR("insn %p\n", tok); 682 683 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 684 685 for (c = 0; c < 4; c++) { 686 if (mask & (1 << c)) 687 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 688 else 689 dst[c] = NULL; 690 } 691 692 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 693 for (c = 0; c < 4; c++) 694 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 695 } 696 697 switch (inst->Instruction.Opcode) { 698 case TGSI_OPCODE_ABS: 699 for (c = 0; c < 4; c++) { 700 unsigned inst[2] = { 0, 0 }; 701 702 set_long(pc, inst); 703 inst[0] = 0xa0000000; /* cvt */ 704 inst[1] |= (6 << 29); /* cvt */ 705 inst[1] |= 0x04000000; /* 32 bit */ 706 inst[1] |= (1 << 14); /* src .f32 */ 707 inst[1] |= ((1 << 6) << 14); /* .abs */ 708 set_dst(pc, dst[c], inst); 709 set_src_0(pc, src[0][c], inst); 710 emit(pc, inst); 711 } 712 break; 713 case TGSI_OPCODE_ADD: 714 for (c = 0; c < 4; c++) { 715 if (!(mask & (1 << c))) 716 continue; 717 emit_add(pc, dst[c], src[0][c], src[1][c]); 718 } 719 break; 720 case TGSI_OPCODE_COS: 721 for (c = 0; c < 4; c++) { 722 if (!(mask & (1 << c))) 723 continue; 724 emit_flop(pc, 5, dst[c], src[0][c]); 725 } 726 break; 727 case TGSI_OPCODE_DP3: 728 temp = alloc_temp(pc, NULL); 729 emit_mul(pc, temp, src[0][0], src[1][0]); 730 emit_mad(pc, temp, src[0][1], src[1][1], temp); 731 emit_mad(pc, temp, src[0][2], src[1][2], temp); 732 for (c = 0; c < 4; c++) { 733 if (!(mask & (1 << c))) 734 continue; 735 emit_mov(pc, dst[c], temp); 736 } 737 free_temp(pc, temp); 738 break; 739 case TGSI_OPCODE_DP4: 740 temp = alloc_temp(pc, NULL); 741 emit_mul(pc, temp, src[0][0], src[1][0]); 742 emit_mad(pc, temp, src[0][1], src[1][1], temp); 743 emit_mad(pc, temp, src[0][2], src[1][2], temp); 744 emit_mad(pc, temp, src[0][3], src[1][3], temp); 745 for (c = 0; c < 4; c++) { 746 if (!(mask & (1 << c))) 747 continue; 748 emit_mov(pc, dst[c], temp); 749 } 750 free_temp(pc, temp); 751 break; 752 case TGSI_OPCODE_DPH: 753 temp = alloc_temp(pc, NULL); 754 emit_mul(pc, temp, src[0][0], src[1][0]); 755 emit_mad(pc, temp, src[0][1], src[1][1], temp); 756 emit_mad(pc, temp, src[0][2], src[1][2], temp); 757 emit_add(pc, temp, src[1][3], temp); 758 for (c = 0; c < 4; c++) { 759 if (!(mask & (1 << c))) 760 continue; 761 emit_mov(pc, dst[c], temp); 762 } 763 free_temp(pc, temp); 764 break; 765 case TGSI_OPCODE_DST: 766 { 767 struct nv50_reg *one = alloc_immd(pc, 1.0); 768 emit_mov(pc, dst[0], one); 769 emit_mul(pc, dst[1], src[0][1], src[1][1]); 770 emit_mov(pc, dst[2], src[0][2]); 771 emit_mov(pc, dst[3], src[1][3]); 772 FREE(one); 773 } 774 break; 775 case TGSI_OPCODE_EX2: 776 temp = alloc_temp(pc, NULL); 777 for (c = 0; c < 4; c++) { 778 if (!(mask & (1 << c))) 779 continue; 780 emit_preex2(pc, temp, src[0][c]); 781 emit_flop(pc, 6, dst[c], temp); 782 } 783 free_temp(pc, temp); 784 break; 785 case TGSI_OPCODE_FLR: 786 for (c = 0; c < 4; c++) { 787 if (!(mask & (1 << c))) 788 continue; 789 emit_flr(pc, dst[c], src[0][c]); 790 } 791 break; 792 case TGSI_OPCODE_FRC: 793 temp = alloc_temp(pc, NULL); 794 for (c = 0; c < 4; c++) { 795 if (!(mask & (1 << c))) 796 continue; 797 emit_flr(pc, temp, src[0][c]); 798 emit_sub(pc, dst[c], src[0][c], temp); 799 } 800 free_temp(pc, temp); 801 break; 802 case TGSI_OPCODE_LG2: 803 for (c = 0; c < 4; c++) { 804 if (!(mask & (1 << c))) 805 continue; 806 emit_flop(pc, 3, dst[c], src[0][c]); 807 } 808 break; 809 case TGSI_OPCODE_MAD: 810 for (c = 0; c < 4; c++) { 811 if (!(mask & (1 << c))) 812 continue; 813 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 814 } 815 break; 816 case TGSI_OPCODE_MAX: 817 for (c = 0; c < 4; c++) { 818 if (!(mask & (1 << c))) 819 continue; 820 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 821 } 822 break; 823 case TGSI_OPCODE_MIN: 824 for (c = 0; c < 4; c++) { 825 if (!(mask & (1 << c))) 826 continue; 827 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 828 } 829 break; 830 case TGSI_OPCODE_MOV: 831 for (c = 0; c < 4; c++) { 832 if (!(mask & (1 << c))) 833 continue; 834 emit_mov(pc, dst[c], src[0][c]); 835 } 836 break; 837 case TGSI_OPCODE_MUL: 838 for (c = 0; c < 4; c++) { 839 if (!(mask & (1 << c))) 840 continue; 841 emit_mul(pc, dst[c], src[0][c], src[1][c]); 842 } 843 break; 844 case TGSI_OPCODE_RCP: 845 for (c = 0; c < 4; c++) { 846 if (!(mask & (1 << c))) 847 continue; 848 emit_flop(pc, 0, dst[c], src[0][c]); 849 } 850 break; 851 case TGSI_OPCODE_RSQ: 852 for (c = 0; c < 4; c++) { 853 if (!(mask & (1 << c))) 854 continue; 855 emit_flop(pc, 2, dst[c], src[0][c]); 856 } 857 break; 858 case TGSI_OPCODE_SGE: 859 for (c = 0; c < 4; c++) { 860 if (!(mask & (1 << c))) 861 continue; 862 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 863 } 864 break; 865 case TGSI_OPCODE_SIN: 866 for (c = 0; c < 4; c++) { 867 if (!(mask & (1 << c))) 868 continue; 869 emit_flop(pc, 4, dst[c], src[0][c]); 870 } 871 break; 872 case TGSI_OPCODE_SLT: 873 for (c = 0; c < 4; c++) { 874 if (!(mask & (1 << c))) 875 continue; 876 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 877 } 878 break; 879 case TGSI_OPCODE_SUB: 880 for (c = 0; c < 4; c++) { 881 if (!(mask & (1 << c))) 882 continue; 883 emit_sub(pc, dst[c], src[0][c], src[1][c]); 884 } 885 break; 886 case TGSI_OPCODE_XPD: 887 temp = alloc_temp(pc, NULL); 888 emit_mul(pc, temp, src[0][2], src[1][1]); 889 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 890 emit_mul(pc, temp, src[0][0], src[1][2]); 891 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 892 emit_mul(pc, temp, src[0][1], src[1][0]); 893 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 894 free_temp(pc, temp); 895 break; 896 case TGSI_OPCODE_END: 897 break; 898 default: 899 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 900 return FALSE; 901 } 902 903 kill_temp_temp(pc); 904 return TRUE; 905} 906 907static boolean 908nv50_program_tx_prep(struct nv50_pc *pc) 909{ 910 struct tgsi_parse_context p; 911 boolean ret = FALSE; 912 unsigned i, c; 913 914 tgsi_parse_init(&p, pc->p->pipe.tokens); 915 while (!tgsi_parse_end_of_tokens(&p)) { 916 const union tgsi_full_token *tok = &p.FullToken; 917 918 tgsi_parse_token(&p); 919 switch (tok->Token.Type) { 920 case TGSI_TOKEN_TYPE_IMMEDIATE: 921 { 922 const struct tgsi_full_immediate *imm = 923 &p.FullToken.FullImmediate; 924 925 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 926 imm->u.ImmediateFloat32[1].Float, 927 imm->u.ImmediateFloat32[2].Float, 928 imm->u.ImmediateFloat32[3].Float); 929 } 930 break; 931 case TGSI_TOKEN_TYPE_DECLARATION: 932 { 933 const struct tgsi_full_declaration *d; 934 unsigned last; 935 936 d = &p.FullToken.FullDeclaration; 937 last = d->u.DeclarationRange.Last; 938 939 switch (d->Declaration.File) { 940 case TGSI_FILE_TEMPORARY: 941 if (pc->temp_nr < (last + 1)) 942 pc->temp_nr = last + 1; 943 break; 944 case TGSI_FILE_OUTPUT: 945 if (pc->result_nr < (last + 1)) 946 pc->result_nr = last + 1; 947 break; 948 case TGSI_FILE_INPUT: 949 if (pc->attr_nr < (last + 1)) 950 pc->attr_nr = last + 1; 951 break; 952 case TGSI_FILE_CONSTANT: 953 if (pc->param_nr < (last + 1)) 954 pc->param_nr = last + 1; 955 break; 956 default: 957 NOUVEAU_ERR("bad decl file %d\n", 958 d->Declaration.File); 959 goto out_err; 960 } 961 } 962 break; 963 case TGSI_TOKEN_TYPE_INSTRUCTION: 964 break; 965 default: 966 break; 967 } 968 } 969 970 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 971 if (pc->temp_nr) { 972 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 973 if (!pc->temp) 974 goto out_err; 975 976 for (i = 0; i < pc->temp_nr; i++) { 977 for (c = 0; c < 4; c++) { 978 pc->temp[i*4+c].type = P_TEMP; 979 pc->temp[i*4+c].hw = -1; 980 pc->temp[i*4+c].index = i; 981 } 982 } 983 } 984 985 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 986 if (pc->attr_nr) { 987 struct nv50_reg *iv = NULL, *tmp = NULL; 988 int aid = 0; 989 990 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 991 if (!pc->attr) 992 goto out_err; 993 994 if (pc->p->type == NV50_PROG_FRAGMENT) { 995 iv = alloc_temp(pc, NULL); 996 aid++; 997 } 998 999 for (i = 0; i < pc->attr_nr; i++) { 1000 struct nv50_reg *a = &pc->attr[i*4]; 1001 1002 for (c = 0; c < 4; c++) { 1003 if (pc->p->type == NV50_PROG_FRAGMENT) { 1004 struct nv50_reg *at = 1005 alloc_temp(pc, NULL); 1006 pc->attr[i*4+c].type = at->type; 1007 pc->attr[i*4+c].hw = at->hw; 1008 pc->attr[i*4+c].index = at->index; 1009 } else { 1010 pc->p->cfg.vp.attr[aid/32] |= 1011 (1 << (aid % 32)); 1012 pc->attr[i*4+c].type = P_ATTR; 1013 pc->attr[i*4+c].hw = aid++; 1014 pc->attr[i*4+c].index = i; 1015 } 1016 } 1017 1018 if (pc->p->type != NV50_PROG_FRAGMENT) 1019 continue; 1020 1021 emit_interp(pc, iv, iv, iv, FALSE); 1022 tmp = alloc_temp(pc, NULL); 1023 { 1024 unsigned inst[2] = { 0, 0 }; 1025 inst[0] = 0x90000000; 1026 inst[0] |= (tmp->hw << 2); 1027 emit(pc, inst); 1028 } 1029 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1030 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1031 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1032 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1033 free_temp(pc, tmp); 1034 } 1035 1036 if (iv) 1037 free_temp(pc, iv); 1038 } 1039 1040 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1041 if (pc->result_nr) { 1042 int rid = 0; 1043 1044 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1045 if (!pc->result) 1046 goto out_err; 1047 1048 for (i = 0; i < pc->result_nr; i++) { 1049 for (c = 0; c < 4; c++) { 1050 if (pc->p->type == NV50_PROG_FRAGMENT) 1051 pc->result[i*4+c].type = P_TEMP; 1052 else 1053 pc->result[i*4+c].type = P_RESULT; 1054 pc->result[i*4+c].hw = rid++; 1055 pc->result[i*4+c].index = i; 1056 } 1057 } 1058 } 1059 1060 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1061 if (pc->param_nr) { 1062 int rid = 0; 1063 1064 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1065 if (!pc->param) 1066 goto out_err; 1067 1068 for (i = 0; i < pc->param_nr; i++) { 1069 for (c = 0; c < 4; c++) { 1070 pc->param[i*4+c].type = P_CONST; 1071 pc->param[i*4+c].hw = rid++; 1072 pc->param[i*4+c].index = i; 1073 } 1074 } 1075 } 1076 1077 if (pc->immd_nr) { 1078 int rid = 0; 1079 1080 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1081 if (!pc->immd) 1082 goto out_err; 1083 1084 for (i = 0; i < pc->immd_nr; i++) { 1085 for (c = 0; c < 4; c++) { 1086 pc->immd[i*4+c].type = P_IMMD; 1087 pc->immd[i*4+c].hw = rid++; 1088 pc->immd[i*4+c].index = i; 1089 } 1090 } 1091 } 1092 1093 ret = TRUE; 1094out_err: 1095 tgsi_parse_free(&p); 1096 return ret; 1097} 1098 1099static boolean 1100nv50_program_tx(struct nv50_program *p) 1101{ 1102 struct tgsi_parse_context parse; 1103 struct nv50_pc *pc; 1104 boolean ret; 1105 1106 pc = CALLOC_STRUCT(nv50_pc); 1107 if (!pc) 1108 return FALSE; 1109 pc->p = p; 1110 pc->p->cfg.high_temp = 4; 1111 1112 ret = nv50_program_tx_prep(pc); 1113 if (ret == FALSE) 1114 goto out_cleanup; 1115 1116 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1117 while (!tgsi_parse_end_of_tokens(&parse)) { 1118 const union tgsi_full_token *tok = &parse.FullToken; 1119 1120 tgsi_parse_token(&parse); 1121 1122 switch (tok->Token.Type) { 1123 case TGSI_TOKEN_TYPE_INSTRUCTION: 1124 ret = nv50_program_tx_insn(pc, tok); 1125 if (ret == FALSE) 1126 goto out_err; 1127 break; 1128 default: 1129 break; 1130 } 1131 } 1132 1133 p->immd_nr = pc->immd_nr * 4; 1134 p->immd = pc->immd_buf; 1135 1136out_err: 1137 tgsi_parse_free(&parse); 1138 1139out_cleanup: 1140 return ret; 1141} 1142 1143static void 1144nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1145{ 1146 int i; 1147 1148 if (nv50_program_tx(p) == FALSE) 1149 assert(0); 1150 /* *not* sufficient, it's fine if last inst is long and 1151 * NOT immd - otherwise it's fucked fucked fucked */ 1152 p->insns[p->insns_nr - 1] |= 0x00000001; 1153 1154 if (p->type == NV50_PROG_VERTEX) { 1155 for (i = 0; i < p->insns_nr; i++) 1156 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1157 } else { 1158 for (i = 0; i < p->insns_nr; i++) 1159 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1160 } 1161 1162 p->translated = TRUE; 1163} 1164 1165static void 1166nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1167{ 1168 int i; 1169 1170 for (i = 0; i < p->immd_nr; i++) { 1171 BEGIN_RING(tesla, 0x0f00, 2); 1172 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 1173 OUT_RING (fui(p->immd[i])); 1174 } 1175} 1176 1177static void 1178nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1179{ 1180 struct pipe_winsys *ws = nv50->pipe.winsys; 1181 void *map; 1182 1183 if (!p->buffer) 1184 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1185 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1186 memcpy(map, p->insns, p->insns_nr * 4); 1187 ws->buffer_unmap(ws, p->buffer); 1188} 1189 1190void 1191nv50_vertprog_validate(struct nv50_context *nv50) 1192{ 1193 struct nouveau_grobj *tesla = nv50->screen->tesla; 1194 struct nv50_program *p = nv50->vertprog; 1195 struct nouveau_stateobj *so; 1196 1197 if (!p->translated) { 1198 nv50_program_validate(nv50, p); 1199 if (!p->translated) 1200 assert(0); 1201 } 1202 1203 nv50_program_validate_data(nv50, p); 1204 nv50_program_validate_code(nv50, p); 1205 1206 so = so_new(11, 2); 1207 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1208 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1209 NOUVEAU_BO_HIGH, 0, 0); 1210 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1211 NOUVEAU_BO_LOW, 0, 0); 1212 so_method(so, tesla, 0x1650, 2); 1213 so_data (so, p->cfg.vp.attr[0]); 1214 so_data (so, p->cfg.vp.attr[1]); 1215 so_method(so, tesla, 0x16ac, 2); 1216 so_data (so, 8); 1217 so_data (so, p->cfg.high_temp); 1218 so_method(so, tesla, 0x140c, 1); 1219 so_data (so, 0); /* program start offset */ 1220 so_emit(nv50->screen->nvws, so); 1221 so_ref(NULL, &so); 1222} 1223 1224void 1225nv50_fragprog_validate(struct nv50_context *nv50) 1226{ 1227 struct nouveau_grobj *tesla = nv50->screen->tesla; 1228 struct nv50_program *p = nv50->fragprog; 1229 struct nouveau_stateobj *so; 1230 1231 if (!p->translated) { 1232 nv50_program_validate(nv50, p); 1233 if (!p->translated) 1234 assert(0); 1235 } 1236 1237 nv50_program_validate_data(nv50, p); 1238 nv50_program_validate_code(nv50, p); 1239 1240 so = so_new(7, 2); 1241 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1242 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1243 NOUVEAU_BO_HIGH, 0, 0); 1244 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1245 NOUVEAU_BO_LOW, 0, 0); 1246 so_method(so, tesla, 0x198c, 1); 1247 so_data (so, p->cfg.high_temp); 1248 so_method(so, tesla, 0x1414, 1); 1249 so_data (so, 0); /* program start offset */ 1250 so_emit(nv50->screen->nvws, so); 1251 so_ref(NULL, &so); 1252} 1253 1254void 1255nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1256{ 1257 struct pipe_winsys *ws = nv50->pipe.winsys; 1258 1259 if (p->insns_nr) { 1260 if (p->insns) 1261 FREE(p->insns); 1262 p->insns_nr = 0; 1263 } 1264 1265 if (p->buffer) 1266 pipe_buffer_reference(ws, &p->buffer, NULL); 1267 1268 p->translated = 0; 1269} 1270 1271