nv50_program.c revision faa1c02546db00f69c66db18076b5b0ac86d7138
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL 16 * LIT - other buggery 17 * 18 * MSB - Like MAD, but MUL+SUB 19 * - Fuck it off, introduce a way to negate args for ops that 20 * support it. 21 * 22 * Look into inlining IMMD for ops other than MOV (make it general?) 23 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 24 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 25 * 26 * Verify half-insns work where expected - and force disable them where they 27 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 28 */ 29struct nv50_reg { 30 enum { 31 P_TEMP, 32 P_ATTR, 33 P_RESULT, 34 P_CONST, 35 P_IMMD 36 } type; 37 int index; 38 39 int hw; 40 int neg; 41}; 42 43struct nv50_pc { 44 struct nv50_program *p; 45 46 /* hw resources */ 47 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 48 49 /* tgsi resources */ 50 struct nv50_reg *temp; 51 int temp_nr; 52 struct nv50_reg *attr; 53 int attr_nr; 54 struct nv50_reg *result; 55 int result_nr; 56 struct nv50_reg *param; 57 int param_nr; 58 struct nv50_reg *immd; 59 float *immd_buf; 60 int immd_nr; 61 62 struct nv50_reg *temp_temp[8]; 63 unsigned temp_temp_nr; 64}; 65 66static void 67alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 68{ 69 int i; 70 71 if (reg->type != P_TEMP) 72 return; 73 74 if (reg->hw >= 0) { 75 /*XXX: do this here too to catch FP temp-as-attr usage.. 76 * not clean, but works */ 77 if (pc->p->cfg.high_temp < (reg->hw + 1)) 78 pc->p->cfg.high_temp = reg->hw + 1; 79 return; 80 } 81 82 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 83 if (!(pc->r_temp[i])) { 84 pc->r_temp[i] = reg; 85 reg->hw = i; 86 if (pc->p->cfg.high_temp < (i + 1)) 87 pc->p->cfg.high_temp = i + 1; 88 return; 89 } 90 } 91 92 assert(0); 93} 94 95static struct nv50_reg * 96alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 97{ 98 struct nv50_reg *r; 99 int i; 100 101 if (dst && dst->type == P_TEMP && dst->hw == -1) 102 return dst; 103 104 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 105 if (!pc->r_temp[i]) { 106 r = CALLOC_STRUCT(nv50_reg); 107 r->type = P_TEMP; 108 r->index = -1; 109 r->hw = i; 110 pc->r_temp[i] = r; 111 return r; 112 } 113 } 114 115 assert(0); 116 return NULL; 117} 118 119static void 120free_temp(struct nv50_pc *pc, struct nv50_reg *r) 121{ 122 if (r->index == -1) { 123 FREE(pc->r_temp[r->hw]); 124 pc->r_temp[r->hw] = NULL; 125 } 126} 127 128static struct nv50_reg * 129temp_temp(struct nv50_pc *pc) 130{ 131 if (pc->temp_temp_nr >= 8) 132 assert(0); 133 134 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 135 return pc->temp_temp[pc->temp_temp_nr++]; 136} 137 138static void 139kill_temp_temp(struct nv50_pc *pc) 140{ 141 int i; 142 143 for (i = 0; i < pc->temp_temp_nr; i++) 144 free_temp(pc, pc->temp_temp[i]); 145 pc->temp_temp_nr = 0; 146} 147 148static int 149ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 150{ 151 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 152 sizeof(float)); 153 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 154 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 155 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 156 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 157 158 return pc->immd_nr++; 159} 160 161static struct nv50_reg * 162alloc_immd(struct nv50_pc *pc, float f) 163{ 164 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 165 unsigned hw; 166 167 hw = ctor_immd(pc, f, 0, 0, 0); 168 r->type = P_IMMD; 169 r->hw = hw; 170 r->index = -1; 171 return r; 172} 173 174static struct nv50_reg * 175tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 176{ 177 switch (dst->DstRegister.File) { 178 case TGSI_FILE_TEMPORARY: 179 return &pc->temp[dst->DstRegister.Index * 4 + c]; 180 case TGSI_FILE_OUTPUT: 181 return &pc->result[dst->DstRegister.Index * 4 + c]; 182 case TGSI_FILE_NULL: 183 return NULL; 184 default: 185 break; 186 } 187 188 return NULL; 189} 190 191static struct nv50_reg * 192tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 193{ 194 struct nv50_reg *r = NULL; 195 unsigned c; 196 197 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 198 switch (c) { 199 case TGSI_EXTSWIZZLE_X: 200 case TGSI_EXTSWIZZLE_Y: 201 case TGSI_EXTSWIZZLE_Z: 202 case TGSI_EXTSWIZZLE_W: 203 switch (src->SrcRegister.File) { 204 case TGSI_FILE_INPUT: 205 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 206 break; 207 case TGSI_FILE_TEMPORARY: 208 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 209 break; 210 case TGSI_FILE_CONSTANT: 211 r = &pc->param[src->SrcRegister.Index * 4 + c]; 212 break; 213 case TGSI_FILE_IMMEDIATE: 214 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 215 break; 216 default: 217 assert(0); 218 break; 219 } 220 break; 221 case TGSI_EXTSWIZZLE_ZERO: 222 r = alloc_immd(pc, 0.0); 223 break; 224 case TGSI_EXTSWIZZLE_ONE: 225 r = alloc_immd(pc, 1.0); 226 break; 227 default: 228 assert(0); 229 break; 230 } 231 232 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 233 case TGSI_UTIL_SIGN_KEEP: 234 break; 235 default: 236 assert(0); 237 break; 238 } 239 240 return r; 241} 242 243static void 244emit(struct nv50_pc *pc, unsigned *inst) 245{ 246 struct nv50_program *p = pc->p; 247 248 if (inst[0] & 1) { 249 p->insns_nr += 2; 250 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 251 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 252 } else { 253 p->insns_nr += 1; 254 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 255 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 256 } 257} 258 259static INLINE void set_long(struct nv50_pc *, unsigned *); 260 261static boolean 262is_long(unsigned *inst) 263{ 264 if (inst[0] & 1) 265 return TRUE; 266 return FALSE; 267} 268 269static boolean 270is_immd(unsigned *inst) 271{ 272 if (is_long(inst) && (inst[1] & 3) == 3) 273 return TRUE; 274 return FALSE; 275} 276 277static INLINE void 278set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 279{ 280 set_long(pc, inst); 281 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 282 inst[1] |= (pred << 7) | (idx << 12); 283} 284 285static INLINE void 286set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 287{ 288 set_long(pc, inst); 289 inst[1] &= ~((0x3 << 4) | (1 << 6)); 290 inst[1] |= (idx << 4) | (on << 6); 291} 292 293static INLINE void 294set_long(struct nv50_pc *pc, unsigned *inst) 295{ 296 if (is_long(inst)) 297 return; 298 299 inst[0] |= 1; 300 set_pred(pc, 0xf, 0, inst); 301 set_pred_wr(pc, 0, 0, inst); 302} 303 304static INLINE void 305set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 306{ 307 if (dst->type == P_RESULT) { 308 set_long(pc, inst); 309 inst[1] |= 0x00000008; 310 } 311 312 alloc_reg(pc, dst); 313 inst[0] |= (dst->hw << 2); 314} 315 316static INLINE void 317set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 318{ 319 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 320 321 set_long(pc, inst); 322 /*XXX: can't be predicated - bits overlap.. catch cases where both 323 * are required and avoid them. */ 324 set_pred(pc, 0, 0, inst); 325 set_pred_wr(pc, 0, 0, inst); 326 327 inst[1] |= 0x00000002 | 0x00000001; 328 inst[0] |= (val & 0x3f) << 16; 329 inst[1] |= (val >> 6) << 2; 330} 331 332static void 333emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 334 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 335{ 336 unsigned inst[2] = { 0, 0 }; 337 338 inst[0] |= 0x80000000; 339 set_dst(pc, dst, inst); 340 alloc_reg(pc, iv); 341 inst[0] |= (iv->hw << 9); 342 alloc_reg(pc, src); 343 inst[0] |= (src->hw << 16); 344 if (noperspective) 345 inst[0] |= (1 << 25); 346 347 emit(pc, inst); 348} 349 350static void 351set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 352{ 353 set_long(pc, inst); 354 if (src->type == P_IMMD) { 355 inst[1] |= (NV50_CB_PMISC << 22); 356 } else { 357 if (pc->p->type == NV50_PROG_VERTEX) 358 inst[1] |= (NV50_CB_PVP << 22); 359 else 360 inst[1] |= (NV50_CB_PFP << 22); 361 } 362} 363 364static void 365emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 366{ 367 unsigned inst[2] = { 0, 0 }; 368 369 inst[0] |= 0x10000000; 370 371 set_dst(pc, dst, inst); 372 373 if (dst->type != P_RESULT && src->type == P_IMMD) { 374 set_immd(pc, src, inst); 375 /*XXX: 32-bit, but steals part of "half" reg space - need to 376 * catch and handle this case if/when we do half-regs 377 */ 378 inst[0] |= 0x00008000; 379 } else 380 if (src->type == P_IMMD || src->type == P_CONST) { 381 set_long(pc, inst); 382 set_cseg(pc, src, inst); 383 inst[0] |= (src->hw << 9); 384 inst[1] |= 0x20000000; /* src0 const? */ 385 } else { 386 if (src->type == P_ATTR) { 387 set_long(pc, inst); 388 inst[1] |= 0x00200000; 389 } 390 391 alloc_reg(pc, src); 392 inst[0] |= (src->hw << 9); 393 } 394 395 /* We really should support "half" instructions here at some point, 396 * but I don't feel confident enough about them yet. 397 */ 398 set_long(pc, inst); 399 if (is_long(inst) && !is_immd(inst)) { 400 inst[1] |= 0x04000000; /* 32-bit */ 401 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 402 } 403 404 emit(pc, inst); 405} 406 407static boolean 408check_swap_src_0_1(struct nv50_pc *pc, 409 struct nv50_reg **s0, struct nv50_reg **s1) 410{ 411 struct nv50_reg *src0 = *s0, *src1 = *s1; 412 413 if (src0->type == P_CONST) { 414 if (src1->type != P_CONST) { 415 *s0 = src1; 416 *s1 = src0; 417 return TRUE; 418 } 419 } else 420 if (src1->type == P_ATTR) { 421 if (src0->type != P_ATTR) { 422 *s0 = src1; 423 *s1 = src0; 424 return TRUE; 425 } 426 } 427 428 return FALSE; 429} 430 431static void 432set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 433{ 434 if (src->type == P_ATTR) { 435 set_long(pc, inst); 436 inst[1] |= 0x00200000; 437 } else 438 if (src->type == P_CONST || src->type == P_IMMD) { 439 struct nv50_reg *temp = temp_temp(pc); 440 441 emit_mov(pc, temp, src); 442 src = temp; 443 } 444 445 alloc_reg(pc, src); 446 inst[0] |= (src->hw << 9); 447} 448 449static void 450set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 451{ 452 if (src->type == P_ATTR) { 453 struct nv50_reg *temp = temp_temp(pc); 454 455 emit_mov(pc, temp, src); 456 src = temp; 457 } else 458 if (src->type == P_CONST || src->type == P_IMMD) { 459 set_cseg(pc, src, inst); 460 inst[0] |= 0x00800000; 461 } 462 463 alloc_reg(pc, src); 464 inst[0] |= (src->hw << 16); 465} 466 467static void 468set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 469{ 470 set_long(pc, inst); 471 472 if (src->type == P_ATTR) { 473 struct nv50_reg *temp = temp_temp(pc); 474 475 emit_mov(pc, temp, src); 476 src = temp; 477 } else 478 if (src->type == P_CONST || src->type == P_IMMD) { 479 set_cseg(pc, src, inst); 480 inst[0] |= 0x01000000; 481 } 482 483 alloc_reg(pc, src); 484 inst[1] |= (src->hw << 14); 485} 486 487static void 488emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 489 struct nv50_reg *src1) 490{ 491 unsigned inst[2] = { 0, 0 }; 492 493 inst[0] |= 0xc0000000; 494 set_long(pc, inst); 495 496 check_swap_src_0_1(pc, &src0, &src1); 497 set_dst(pc, dst, inst); 498 set_src_0(pc, src0, inst); 499 set_src_1(pc, src1, inst); 500 501 emit(pc, inst); 502} 503 504static void 505emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 506 struct nv50_reg *src0, struct nv50_reg *src1) 507{ 508 unsigned inst[2] = { 0, 0 }; 509 510 inst[0] |= 0xb0000000; 511 512 check_swap_src_0_1(pc, &src0, &src1); 513 set_dst(pc, dst, inst); 514 set_src_0(pc, src0, inst); 515 if (is_long(inst)) 516 set_src_2(pc, src1, inst); 517 else 518 set_src_1(pc, src1, inst); 519 520 emit(pc, inst); 521} 522 523static void 524emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 525 struct nv50_reg *src0, struct nv50_reg *src1) 526{ 527 unsigned inst[2] = { 0, 0 }; 528 529 set_long(pc, inst); 530 inst[0] |= 0xb0000000; 531 inst[1] |= (sub << 29); 532 533 check_swap_src_0_1(pc, &src0, &src1); 534 set_dst(pc, dst, inst); 535 set_src_0(pc, src0, inst); 536 set_src_1(pc, src1, inst); 537 538 emit(pc, inst); 539} 540 541static void 542emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 543 struct nv50_reg *src1) 544{ 545 unsigned inst[2] = { 0, 0 }; 546 547 inst[0] |= 0xb0000000; 548 549 set_long(pc, inst); 550 if (check_swap_src_0_1(pc, &src0, &src1)) 551 inst[1] |= 0x04000000; 552 else 553 inst[1] |= 0x08000000; 554 555 set_dst(pc, dst, inst); 556 set_src_0(pc, src0, inst); 557 set_src_2(pc, src1, inst); 558 559 emit(pc, inst); 560} 561 562static void 563emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 564 struct nv50_reg *src1, struct nv50_reg *src2) 565{ 566 unsigned inst[2] = { 0, 0 }; 567 568 inst[0] |= 0xe0000000; 569 570 check_swap_src_0_1(pc, &src0, &src1); 571 set_dst(pc, dst, inst); 572 set_src_0(pc, src0, inst); 573 set_src_1(pc, src1, inst); 574 set_src_2(pc, src2, inst); 575 576 emit(pc, inst); 577} 578 579static void 580emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 581 struct nv50_reg *src1, struct nv50_reg *src2) 582{ 583 unsigned inst[2] = { 0, 0 }; 584 585 inst[0] |= 0xe0000000; 586 set_long(pc, inst); 587 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 588 589 check_swap_src_0_1(pc, &src0, &src1); 590 set_dst(pc, dst, inst); 591 set_src_0(pc, src0, inst); 592 set_src_1(pc, src1, inst); 593 set_src_2(pc, src2, inst); 594 595 emit(pc, inst); 596} 597 598static void 599emit_flop(struct nv50_pc *pc, unsigned sub, 600 struct nv50_reg *dst, struct nv50_reg *src) 601{ 602 unsigned inst[2] = { 0, 0 }; 603 604 inst[0] |= 0x90000000; 605 if (sub) { 606 set_long(pc, inst); 607 inst[1] |= (sub << 29); 608 } 609 610 set_dst(pc, dst, inst); 611 set_src_0(pc, src, inst); 612 613 emit(pc, inst); 614} 615 616static void 617emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 618{ 619 unsigned inst[2] = { 0, 0 }; 620 621 inst[0] |= 0xb0000000; 622 623 set_dst(pc, dst, inst); 624 set_src_0(pc, src, inst); 625 set_long(pc, inst); 626 inst[1] |= (6 << 29) | 0x00004000; 627 628 emit(pc, inst); 629} 630 631/*XXX: inaccurate results.. why? */ 632#define ALLOW_SET_SWAP 0 633 634static void 635emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 636 struct nv50_reg *src0, struct nv50_reg *src1) 637{ 638 unsigned inst[2] = { 0, 0 }; 639#if ALLOW_SET_SWAP 640 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; 641#endif 642 struct nv50_reg *rdst; 643 644#if ALLOW_SET_SWAP 645 assert(c_op <= 7); 646 if (check_swap_src_0_1(pc, &src0, &src1)) 647 c_op = inv_cop[c_op]; 648#endif 649 650 rdst = dst; 651 if (dst->type != P_TEMP) 652 dst = alloc_temp(pc, NULL); 653 654 /* set.u32 */ 655 set_long(pc, inst); 656 inst[0] |= 0xb0000000; 657 inst[1] |= (3 << 29); 658 inst[1] |= (c_op << 14); 659 /*XXX: breaks things, .u32 by default? 660 * decuda will disasm as .u16 and use .lo/.hi regs, but this 661 * doesn't seem to match what the hw actually does. 662 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 663 */ 664 set_dst(pc, dst, inst); 665 set_src_0(pc, src0, inst); 666 set_src_1(pc, src1, inst); 667 emit(pc, inst); 668 669 /* cvt.f32.u32 */ 670 inst[0] = 0xa0000001; 671 inst[1] = 0x64014780; 672 set_dst(pc, rdst, inst); 673 set_src_0(pc, dst, inst); 674 emit(pc, inst); 675 676 if (dst != rdst) 677 free_temp(pc, dst); 678} 679 680static void 681emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 682{ 683 unsigned inst[2] = { 0, 0 }; 684 685 inst[0] = 0xa0000000; /* cvt */ 686 set_long(pc, inst); 687 inst[1] |= (6 << 29); /* cvt */ 688 inst[1] |= 0x08000000; /* integer mode */ 689 inst[1] |= 0x04000000; /* 32 bit */ 690 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 691 inst[1] |= (1 << 14); /* src .f32 */ 692 set_dst(pc, dst, inst); 693 set_src_0(pc, src, inst); 694 695 emit(pc, inst); 696} 697 698static void 699emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 700 struct nv50_reg *v, struct nv50_reg *e) 701{ 702 struct nv50_reg *temp = alloc_temp(pc, NULL); 703 704 emit_flop(pc, 3, temp, v); 705 emit_mul(pc, temp, temp, e); 706 emit_preex2(pc, temp, temp); 707 emit_flop(pc, 6, dst, temp); 708 709 free_temp(pc, temp); 710} 711 712static boolean 713nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 714{ 715 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 716 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 717 unsigned mask, sat; 718 int i, c; 719 720 NOUVEAU_ERR("insn %p\n", tok); 721 722 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 723 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 724 725 for (c = 0; c < 4; c++) { 726 if (mask & (1 << c)) 727 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 728 else 729 dst[c] = NULL; 730 } 731 732 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 733 for (c = 0; c < 4; c++) 734 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 735 } 736 737 if (sat) { 738 for (c = 0; c < 4; c++) { 739 rdst[c] = dst[c]; 740 dst[c] = temp_temp(pc); 741 } 742 } 743 744 switch (inst->Instruction.Opcode) { 745 case TGSI_OPCODE_ABS: 746 for (c = 0; c < 4; c++) { 747 unsigned inst[2] = { 0, 0 }; 748 749 inst[0] = 0xa0000000; /* cvt */ 750 set_long(pc, inst); 751 inst[1] |= (6 << 29); /* cvt */ 752 inst[1] |= 0x04000000; /* 32 bit */ 753 inst[1] |= (1 << 14); /* src .f32 */ 754 inst[1] |= ((1 << 6) << 14); /* .abs */ 755 set_dst(pc, dst[c], inst); 756 set_src_0(pc, src[0][c], inst); 757 emit(pc, inst); 758 } 759 break; 760 case TGSI_OPCODE_ADD: 761 for (c = 0; c < 4; c++) { 762 if (!(mask & (1 << c))) 763 continue; 764 emit_add(pc, dst[c], src[0][c], src[1][c]); 765 } 766 break; 767 case TGSI_OPCODE_COS: 768 for (c = 0; c < 4; c++) { 769 if (!(mask & (1 << c))) 770 continue; 771 emit_flop(pc, 5, dst[c], src[0][c]); 772 } 773 break; 774 case TGSI_OPCODE_DP3: 775 temp = alloc_temp(pc, NULL); 776 emit_mul(pc, temp, src[0][0], src[1][0]); 777 emit_mad(pc, temp, src[0][1], src[1][1], temp); 778 emit_mad(pc, temp, src[0][2], src[1][2], temp); 779 for (c = 0; c < 4; c++) { 780 if (!(mask & (1 << c))) 781 continue; 782 emit_mov(pc, dst[c], temp); 783 } 784 free_temp(pc, temp); 785 break; 786 case TGSI_OPCODE_DP4: 787 temp = alloc_temp(pc, NULL); 788 emit_mul(pc, temp, src[0][0], src[1][0]); 789 emit_mad(pc, temp, src[0][1], src[1][1], temp); 790 emit_mad(pc, temp, src[0][2], src[1][2], temp); 791 emit_mad(pc, temp, src[0][3], src[1][3], temp); 792 for (c = 0; c < 4; c++) { 793 if (!(mask & (1 << c))) 794 continue; 795 emit_mov(pc, dst[c], temp); 796 } 797 free_temp(pc, temp); 798 break; 799 case TGSI_OPCODE_DPH: 800 temp = alloc_temp(pc, NULL); 801 emit_mul(pc, temp, src[0][0], src[1][0]); 802 emit_mad(pc, temp, src[0][1], src[1][1], temp); 803 emit_mad(pc, temp, src[0][2], src[1][2], temp); 804 emit_add(pc, temp, src[1][3], temp); 805 for (c = 0; c < 4; c++) { 806 if (!(mask & (1 << c))) 807 continue; 808 emit_mov(pc, dst[c], temp); 809 } 810 free_temp(pc, temp); 811 break; 812 case TGSI_OPCODE_DST: 813 { 814 struct nv50_reg *one = alloc_immd(pc, 1.0); 815 emit_mov(pc, dst[0], one); 816 emit_mul(pc, dst[1], src[0][1], src[1][1]); 817 emit_mov(pc, dst[2], src[0][2]); 818 emit_mov(pc, dst[3], src[1][3]); 819 FREE(one); 820 } 821 break; 822 case TGSI_OPCODE_EX2: 823 temp = alloc_temp(pc, NULL); 824 for (c = 0; c < 4; c++) { 825 if (!(mask & (1 << c))) 826 continue; 827 emit_preex2(pc, temp, src[0][c]); 828 emit_flop(pc, 6, dst[c], temp); 829 } 830 free_temp(pc, temp); 831 break; 832 case TGSI_OPCODE_FLR: 833 for (c = 0; c < 4; c++) { 834 if (!(mask & (1 << c))) 835 continue; 836 emit_flr(pc, dst[c], src[0][c]); 837 } 838 break; 839 case TGSI_OPCODE_FRC: 840 temp = alloc_temp(pc, NULL); 841 for (c = 0; c < 4; c++) { 842 if (!(mask & (1 << c))) 843 continue; 844 emit_flr(pc, temp, src[0][c]); 845 emit_sub(pc, dst[c], src[0][c], temp); 846 } 847 free_temp(pc, temp); 848 break; 849 case TGSI_OPCODE_LG2: 850 for (c = 0; c < 4; c++) { 851 if (!(mask & (1 << c))) 852 continue; 853 emit_flop(pc, 3, dst[c], src[0][c]); 854 } 855 break; 856 case TGSI_OPCODE_MAD: 857 for (c = 0; c < 4; c++) { 858 if (!(mask & (1 << c))) 859 continue; 860 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 861 } 862 break; 863 case TGSI_OPCODE_MAX: 864 for (c = 0; c < 4; c++) { 865 if (!(mask & (1 << c))) 866 continue; 867 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 868 } 869 break; 870 case TGSI_OPCODE_MIN: 871 for (c = 0; c < 4; c++) { 872 if (!(mask & (1 << c))) 873 continue; 874 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 875 } 876 break; 877 case TGSI_OPCODE_MOV: 878 for (c = 0; c < 4; c++) { 879 if (!(mask & (1 << c))) 880 continue; 881 emit_mov(pc, dst[c], src[0][c]); 882 } 883 break; 884 case TGSI_OPCODE_MUL: 885 for (c = 0; c < 4; c++) { 886 if (!(mask & (1 << c))) 887 continue; 888 emit_mul(pc, dst[c], src[0][c], src[1][c]); 889 } 890 break; 891 case TGSI_OPCODE_POW: 892 temp = alloc_temp(pc, NULL); 893 emit_pow(pc, temp, src[0][0], src[1][0]); 894 for (c = 0; c < 4; c++) { 895 if (!(mask & (1 << c))) 896 continue; 897 emit_mov(pc, dst[c], temp); 898 } 899 free_temp(pc, temp); 900 break; 901 case TGSI_OPCODE_RCP: 902 for (c = 0; c < 4; c++) { 903 if (!(mask & (1 << c))) 904 continue; 905 emit_flop(pc, 0, dst[c], src[0][c]); 906 } 907 break; 908 case TGSI_OPCODE_RSQ: 909 for (c = 0; c < 4; c++) { 910 if (!(mask & (1 << c))) 911 continue; 912 emit_flop(pc, 2, dst[c], src[0][c]); 913 } 914 break; 915 case TGSI_OPCODE_SGE: 916 for (c = 0; c < 4; c++) { 917 if (!(mask & (1 << c))) 918 continue; 919 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 920 } 921 break; 922 case TGSI_OPCODE_SIN: 923 for (c = 0; c < 4; c++) { 924 if (!(mask & (1 << c))) 925 continue; 926 emit_flop(pc, 4, dst[c], src[0][c]); 927 } 928 break; 929 case TGSI_OPCODE_SLT: 930 for (c = 0; c < 4; c++) { 931 if (!(mask & (1 << c))) 932 continue; 933 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 934 } 935 break; 936 case TGSI_OPCODE_SUB: 937 for (c = 0; c < 4; c++) { 938 if (!(mask & (1 << c))) 939 continue; 940 emit_sub(pc, dst[c], src[0][c], src[1][c]); 941 } 942 break; 943 case TGSI_OPCODE_XPD: 944 temp = alloc_temp(pc, NULL); 945 emit_mul(pc, temp, src[0][2], src[1][1]); 946 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 947 emit_mul(pc, temp, src[0][0], src[1][2]); 948 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 949 emit_mul(pc, temp, src[0][1], src[1][0]); 950 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 951 free_temp(pc, temp); 952 break; 953 case TGSI_OPCODE_END: 954 break; 955 default: 956 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 957 return FALSE; 958 } 959 960 if (sat) { 961 for (c = 0; c < 4; c++) { 962 unsigned inst[2] = { 0, 0 }; 963 964 if (!(mask & (1 << c))) 965 continue; 966 967 inst[0] = 0xa0000000; /* cvt */ 968 set_long(pc, inst); 969 inst[1] |= (6 << 29); /* cvt */ 970 inst[1] |= 0x04000000; /* 32 bit */ 971 inst[1] |= (1 << 14); /* src .f32 */ 972 inst[1] |= ((1 << 5) << 14); /* .sat */ 973 set_dst(pc, rdst[c], inst); 974 set_src_0(pc, dst[c], inst); 975 emit(pc, inst); 976 } 977 } 978 979 kill_temp_temp(pc); 980 return TRUE; 981} 982 983static boolean 984nv50_program_tx_prep(struct nv50_pc *pc) 985{ 986 struct tgsi_parse_context p; 987 boolean ret = FALSE; 988 unsigned i, c; 989 990 tgsi_parse_init(&p, pc->p->pipe.tokens); 991 while (!tgsi_parse_end_of_tokens(&p)) { 992 const union tgsi_full_token *tok = &p.FullToken; 993 994 tgsi_parse_token(&p); 995 switch (tok->Token.Type) { 996 case TGSI_TOKEN_TYPE_IMMEDIATE: 997 { 998 const struct tgsi_full_immediate *imm = 999 &p.FullToken.FullImmediate; 1000 1001 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1002 imm->u.ImmediateFloat32[1].Float, 1003 imm->u.ImmediateFloat32[2].Float, 1004 imm->u.ImmediateFloat32[3].Float); 1005 } 1006 break; 1007 case TGSI_TOKEN_TYPE_DECLARATION: 1008 { 1009 const struct tgsi_full_declaration *d; 1010 unsigned last; 1011 1012 d = &p.FullToken.FullDeclaration; 1013 last = d->u.DeclarationRange.Last; 1014 1015 switch (d->Declaration.File) { 1016 case TGSI_FILE_TEMPORARY: 1017 if (pc->temp_nr < (last + 1)) 1018 pc->temp_nr = last + 1; 1019 break; 1020 case TGSI_FILE_OUTPUT: 1021 if (pc->result_nr < (last + 1)) 1022 pc->result_nr = last + 1; 1023 break; 1024 case TGSI_FILE_INPUT: 1025 if (pc->attr_nr < (last + 1)) 1026 pc->attr_nr = last + 1; 1027 break; 1028 case TGSI_FILE_CONSTANT: 1029 if (pc->param_nr < (last + 1)) 1030 pc->param_nr = last + 1; 1031 break; 1032 default: 1033 NOUVEAU_ERR("bad decl file %d\n", 1034 d->Declaration.File); 1035 goto out_err; 1036 } 1037 } 1038 break; 1039 case TGSI_TOKEN_TYPE_INSTRUCTION: 1040 break; 1041 default: 1042 break; 1043 } 1044 } 1045 1046 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 1047 if (pc->temp_nr) { 1048 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1049 if (!pc->temp) 1050 goto out_err; 1051 1052 for (i = 0; i < pc->temp_nr; i++) { 1053 for (c = 0; c < 4; c++) { 1054 pc->temp[i*4+c].type = P_TEMP; 1055 pc->temp[i*4+c].hw = -1; 1056 pc->temp[i*4+c].index = i; 1057 } 1058 } 1059 } 1060 1061 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1062 if (pc->attr_nr) { 1063 struct nv50_reg *iv = NULL, *tmp = NULL; 1064 int aid = 0; 1065 1066 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1067 if (!pc->attr) 1068 goto out_err; 1069 1070 if (pc->p->type == NV50_PROG_FRAGMENT) { 1071 iv = alloc_temp(pc, NULL); 1072 aid++; 1073 } 1074 1075 for (i = 0; i < pc->attr_nr; i++) { 1076 struct nv50_reg *a = &pc->attr[i*4]; 1077 1078 for (c = 0; c < 4; c++) { 1079 if (pc->p->type == NV50_PROG_FRAGMENT) { 1080 struct nv50_reg *at = 1081 alloc_temp(pc, NULL); 1082 pc->attr[i*4+c].type = at->type; 1083 pc->attr[i*4+c].hw = at->hw; 1084 pc->attr[i*4+c].index = at->index; 1085 } else { 1086 pc->p->cfg.vp.attr[aid/32] |= 1087 (1 << (aid % 32)); 1088 pc->attr[i*4+c].type = P_ATTR; 1089 pc->attr[i*4+c].hw = aid++; 1090 pc->attr[i*4+c].index = i; 1091 } 1092 } 1093 1094 if (pc->p->type != NV50_PROG_FRAGMENT) 1095 continue; 1096 1097 emit_interp(pc, iv, iv, iv, FALSE); 1098 tmp = alloc_temp(pc, NULL); 1099 { 1100 unsigned inst[2] = { 0, 0 }; 1101 inst[0] = 0x90000000; 1102 inst[0] |= (tmp->hw << 2); 1103 emit(pc, inst); 1104 } 1105 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1106 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1107 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1108 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1109 free_temp(pc, tmp); 1110 } 1111 1112 if (iv) 1113 free_temp(pc, iv); 1114 } 1115 1116 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1117 if (pc->result_nr) { 1118 int rid = 0; 1119 1120 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1121 if (!pc->result) 1122 goto out_err; 1123 1124 for (i = 0; i < pc->result_nr; i++) { 1125 for (c = 0; c < 4; c++) { 1126 if (pc->p->type == NV50_PROG_FRAGMENT) 1127 pc->result[i*4+c].type = P_TEMP; 1128 else 1129 pc->result[i*4+c].type = P_RESULT; 1130 pc->result[i*4+c].hw = rid++; 1131 pc->result[i*4+c].index = i; 1132 } 1133 } 1134 } 1135 1136 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1137 if (pc->param_nr) { 1138 int rid = 0; 1139 1140 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1141 if (!pc->param) 1142 goto out_err; 1143 1144 for (i = 0; i < pc->param_nr; i++) { 1145 for (c = 0; c < 4; c++) { 1146 pc->param[i*4+c].type = P_CONST; 1147 pc->param[i*4+c].hw = rid++; 1148 pc->param[i*4+c].index = i; 1149 } 1150 } 1151 } 1152 1153 if (pc->immd_nr) { 1154 int rid = 0; 1155 1156 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1157 if (!pc->immd) 1158 goto out_err; 1159 1160 for (i = 0; i < pc->immd_nr; i++) { 1161 for (c = 0; c < 4; c++) { 1162 pc->immd[i*4+c].type = P_IMMD; 1163 pc->immd[i*4+c].hw = rid++; 1164 pc->immd[i*4+c].index = i; 1165 } 1166 } 1167 } 1168 1169 ret = TRUE; 1170out_err: 1171 tgsi_parse_free(&p); 1172 return ret; 1173} 1174 1175static boolean 1176nv50_program_tx(struct nv50_program *p) 1177{ 1178 struct tgsi_parse_context parse; 1179 struct nv50_pc *pc; 1180 boolean ret; 1181 1182 pc = CALLOC_STRUCT(nv50_pc); 1183 if (!pc) 1184 return FALSE; 1185 pc->p = p; 1186 pc->p->cfg.high_temp = 4; 1187 1188 ret = nv50_program_tx_prep(pc); 1189 if (ret == FALSE) 1190 goto out_cleanup; 1191 1192 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1193 while (!tgsi_parse_end_of_tokens(&parse)) { 1194 const union tgsi_full_token *tok = &parse.FullToken; 1195 1196 tgsi_parse_token(&parse); 1197 1198 switch (tok->Token.Type) { 1199 case TGSI_TOKEN_TYPE_INSTRUCTION: 1200 ret = nv50_program_tx_insn(pc, tok); 1201 if (ret == FALSE) 1202 goto out_err; 1203 break; 1204 default: 1205 break; 1206 } 1207 } 1208 1209 p->immd_nr = pc->immd_nr * 4; 1210 p->immd = pc->immd_buf; 1211 1212out_err: 1213 tgsi_parse_free(&parse); 1214 1215out_cleanup: 1216 return ret; 1217} 1218 1219static void 1220nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1221{ 1222 int i; 1223 1224 if (nv50_program_tx(p) == FALSE) 1225 assert(0); 1226 /* *not* sufficient, it's fine if last inst is long and 1227 * NOT immd - otherwise it's fucked fucked fucked */ 1228 p->insns[p->insns_nr - 1] |= 0x00000001; 1229 1230 if (p->type == NV50_PROG_VERTEX) { 1231 for (i = 0; i < p->insns_nr; i++) 1232 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1233 } else { 1234 for (i = 0; i < p->insns_nr; i++) 1235 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1236 } 1237 1238 p->translated = TRUE; 1239} 1240 1241static void 1242nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1243{ 1244 int i; 1245 1246 for (i = 0; i < p->immd_nr; i++) { 1247 BEGIN_RING(tesla, 0x0f00, 2); 1248 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 1249 OUT_RING (fui(p->immd[i])); 1250 } 1251} 1252 1253static void 1254nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1255{ 1256 struct pipe_winsys *ws = nv50->pipe.winsys; 1257 void *map; 1258 1259 if (!p->buffer) 1260 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1261 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1262 memcpy(map, p->insns, p->insns_nr * 4); 1263 ws->buffer_unmap(ws, p->buffer); 1264} 1265 1266void 1267nv50_vertprog_validate(struct nv50_context *nv50) 1268{ 1269 struct nouveau_grobj *tesla = nv50->screen->tesla; 1270 struct nv50_program *p = nv50->vertprog; 1271 struct nouveau_stateobj *so; 1272 1273 if (!p->translated) { 1274 nv50_program_validate(nv50, p); 1275 if (!p->translated) 1276 assert(0); 1277 } 1278 1279 nv50_program_validate_data(nv50, p); 1280 nv50_program_validate_code(nv50, p); 1281 1282 so = so_new(11, 2); 1283 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1284 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1285 NOUVEAU_BO_HIGH, 0, 0); 1286 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1287 NOUVEAU_BO_LOW, 0, 0); 1288 so_method(so, tesla, 0x1650, 2); 1289 so_data (so, p->cfg.vp.attr[0]); 1290 so_data (so, p->cfg.vp.attr[1]); 1291 so_method(so, tesla, 0x16ac, 2); 1292 so_data (so, 8); 1293 so_data (so, p->cfg.high_temp); 1294 so_method(so, tesla, 0x140c, 1); 1295 so_data (so, 0); /* program start offset */ 1296 so_emit(nv50->screen->nvws, so); 1297 so_ref(NULL, &so); 1298} 1299 1300void 1301nv50_fragprog_validate(struct nv50_context *nv50) 1302{ 1303 struct nouveau_grobj *tesla = nv50->screen->tesla; 1304 struct nv50_program *p = nv50->fragprog; 1305 struct nouveau_stateobj *so; 1306 1307 if (!p->translated) { 1308 nv50_program_validate(nv50, p); 1309 if (!p->translated) 1310 assert(0); 1311 } 1312 1313 nv50_program_validate_data(nv50, p); 1314 nv50_program_validate_code(nv50, p); 1315 1316 so = so_new(7, 2); 1317 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1318 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1319 NOUVEAU_BO_HIGH, 0, 0); 1320 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1321 NOUVEAU_BO_LOW, 0, 0); 1322 so_method(so, tesla, 0x198c, 1); 1323 so_data (so, p->cfg.high_temp); 1324 so_method(so, tesla, 0x1414, 1); 1325 so_data (so, 0); /* program start offset */ 1326 so_emit(nv50->screen->nvws, so); 1327 so_ref(NULL, &so); 1328} 1329 1330void 1331nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1332{ 1333 struct pipe_winsys *ws = nv50->pipe.winsys; 1334 1335 if (p->insns_nr) { 1336 if (p->insns) 1337 FREE(p->insns); 1338 p->insns_nr = 0; 1339 } 1340 1341 if (p->buffer) 1342 pipe_buffer_reference(ws, &p->buffer, NULL); 1343 1344 p->translated = 0; 1345} 1346 1347