nv50_program.c revision ea4b09cbcbd9db82648ab30f18c0f46a66ab9f69
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL 16 * LIT - other buggery 17 * 18 * MSB - Like MAD, but MUL+SUB 19 * - Fuck it off, introduce a way to negate args for ops that 20 * support it. 21 * 22 * Look into inlining IMMD for ops other than MOV (make it general?) 23 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 24 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 25 * 26 * Verify half-insns work where expected - and force disable them where they 27 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 28 */ 29struct nv50_reg { 30 enum { 31 P_TEMP, 32 P_ATTR, 33 P_RESULT, 34 P_CONST, 35 P_IMMD 36 } type; 37 int index; 38 39 int hw; 40 int neg; 41}; 42 43struct nv50_pc { 44 struct nv50_program *p; 45 46 /* hw resources */ 47 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 48 49 /* tgsi resources */ 50 struct nv50_reg *temp; 51 int temp_nr; 52 struct nv50_reg *attr; 53 int attr_nr; 54 struct nv50_reg *result; 55 int result_nr; 56 struct nv50_reg *param; 57 int param_nr; 58 struct nv50_reg *immd; 59 float *immd_buf; 60 int immd_nr; 61 62 struct nv50_reg *temp_temp[8]; 63 unsigned temp_temp_nr; 64}; 65 66static void 67alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 68{ 69 int i; 70 71 if (reg->type != P_TEMP) 72 return; 73 74 if (reg->hw >= 0) { 75 /*XXX: do this here too to catch FP temp-as-attr usage.. 76 * not clean, but works */ 77 if (pc->p->cfg.high_temp < (reg->hw + 1)) 78 pc->p->cfg.high_temp = reg->hw + 1; 79 return; 80 } 81 82 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 83 if (!(pc->r_temp[i])) { 84 pc->r_temp[i] = reg; 85 reg->hw = i; 86 if (pc->p->cfg.high_temp < (i + 1)) 87 pc->p->cfg.high_temp = i + 1; 88 return; 89 } 90 } 91 92 assert(0); 93} 94 95static struct nv50_reg * 96alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 97{ 98 struct nv50_reg *r; 99 int i; 100 101 if (dst && dst->type == P_TEMP && dst->hw == -1) 102 return dst; 103 104 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 105 if (!pc->r_temp[i]) { 106 r = CALLOC_STRUCT(nv50_reg); 107 r->type = P_TEMP; 108 r->index = -1; 109 r->hw = i; 110 pc->r_temp[i] = r; 111 return r; 112 } 113 } 114 115 assert(0); 116 return NULL; 117} 118 119static void 120free_temp(struct nv50_pc *pc, struct nv50_reg *r) 121{ 122 if (r->index == -1) { 123 FREE(pc->r_temp[r->hw]); 124 pc->r_temp[r->hw] = NULL; 125 } 126} 127 128static struct nv50_reg * 129temp_temp(struct nv50_pc *pc) 130{ 131 if (pc->temp_temp_nr >= 8) 132 assert(0); 133 134 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 135 return pc->temp_temp[pc->temp_temp_nr++]; 136} 137 138static void 139kill_temp_temp(struct nv50_pc *pc) 140{ 141 int i; 142 143 for (i = 0; i < pc->temp_temp_nr; i++) 144 free_temp(pc, pc->temp_temp[i]); 145 pc->temp_temp_nr = 0; 146} 147 148static int 149ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 150{ 151 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 152 sizeof(float)); 153 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 154 pc->immd_buf[(pc->immd_nr * 4) + 1] = x; 155 pc->immd_buf[(pc->immd_nr * 4) + 2] = x; 156 pc->immd_buf[(pc->immd_nr * 4) + 3] = x; 157 158 return pc->immd_nr++; 159} 160 161static struct nv50_reg * 162alloc_immd(struct nv50_pc *pc, float f) 163{ 164 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 165 unsigned hw; 166 167 hw = ctor_immd(pc, f, 0, 0, 0); 168 r->type = P_IMMD; 169 r->hw = hw; 170 r->index = -1; 171 return r; 172} 173 174static struct nv50_reg * 175tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 176{ 177 switch (dst->DstRegister.File) { 178 case TGSI_FILE_TEMPORARY: 179 return &pc->temp[dst->DstRegister.Index * 4 + c]; 180 case TGSI_FILE_OUTPUT: 181 return &pc->result[dst->DstRegister.Index * 4 + c]; 182 case TGSI_FILE_NULL: 183 return NULL; 184 default: 185 break; 186 } 187 188 return NULL; 189} 190 191static struct nv50_reg * 192tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 193{ 194 struct nv50_reg *r = NULL; 195 unsigned c; 196 197 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 198 switch (c) { 199 case TGSI_EXTSWIZZLE_X: 200 case TGSI_EXTSWIZZLE_Y: 201 case TGSI_EXTSWIZZLE_Z: 202 case TGSI_EXTSWIZZLE_W: 203 switch (src->SrcRegister.File) { 204 case TGSI_FILE_INPUT: 205 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 206 break; 207 case TGSI_FILE_TEMPORARY: 208 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 209 break; 210 case TGSI_FILE_CONSTANT: 211 r = &pc->param[src->SrcRegister.Index * 4 + c]; 212 break; 213 case TGSI_FILE_IMMEDIATE: 214 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 215 break; 216 default: 217 assert(0); 218 break; 219 } 220 break; 221 case TGSI_EXTSWIZZLE_ZERO: 222 r = alloc_immd(pc, 0.0); 223 break; 224 case TGSI_EXTSWIZZLE_ONE: 225 r = alloc_immd(pc, 1.0); 226 break; 227 default: 228 assert(0); 229 break; 230 } 231 232 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 233 case TGSI_UTIL_SIGN_KEEP: 234 break; 235 default: 236 assert(0); 237 break; 238 } 239 240 return r; 241} 242 243static void 244emit(struct nv50_pc *pc, unsigned *inst) 245{ 246 struct nv50_program *p = pc->p; 247 248 if (inst[0] & 1) { 249 p->insns_nr += 2; 250 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 251 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 252 } else { 253 p->insns_nr += 1; 254 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 255 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 256 } 257} 258 259static INLINE void set_long(struct nv50_pc *, unsigned *); 260 261static boolean 262is_long(unsigned *inst) 263{ 264 if (inst[0] & 1) 265 return TRUE; 266 return FALSE; 267} 268 269static boolean 270is_immd(unsigned *inst) 271{ 272 if (is_long(inst) && (inst[1] & 3) == 3) 273 return TRUE; 274 return FALSE; 275} 276 277static INLINE void 278set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 279{ 280 set_long(pc, inst); 281 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 282 inst[1] |= (pred << 7) | (idx << 12); 283} 284 285static INLINE void 286set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 287{ 288 set_long(pc, inst); 289 inst[1] &= ~((0x3 << 4) | (1 << 6)); 290 inst[1] |= (idx << 4) | (on << 6); 291} 292 293static INLINE void 294set_long(struct nv50_pc *pc, unsigned *inst) 295{ 296 if (is_long(inst)) 297 return; 298 299 inst[0] |= 1; 300 set_pred(pc, 0xf, 0, inst); 301 set_pred_wr(pc, 0, 0, inst); 302} 303 304static INLINE void 305set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 306{ 307 if (dst->type == P_RESULT) { 308 set_long(pc, inst); 309 inst[1] |= 0x00000008; 310 } 311 312 alloc_reg(pc, dst); 313 inst[0] |= (dst->hw << 2); 314} 315 316static INLINE void 317set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 318{ 319 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 320 321 set_long(pc, inst); 322 /*XXX: can't be predicated - bits overlap.. catch cases where both 323 * are required and avoid them. */ 324 set_pred(pc, 0, 0, inst); 325 set_pred_wr(pc, 0, 0, inst); 326 327 inst[1] |= 0x00000002 | 0x00000001; 328 inst[0] |= (val & 0x3f) << 16; 329 inst[1] |= (val >> 6) << 2; 330} 331 332static void 333emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 334 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 335{ 336 unsigned inst[2] = { 0, 0 }; 337 338 inst[0] |= 0x80000000; 339 set_dst(pc, dst, inst); 340 alloc_reg(pc, iv); 341 inst[0] |= (iv->hw << 9); 342 alloc_reg(pc, src); 343 inst[0] |= (src->hw << 16); 344 if (noperspective) 345 inst[0] |= (1 << 25); 346 347 emit(pc, inst); 348} 349 350static void 351set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 352{ 353 set_long(pc, inst); 354 if (src->type == P_IMMD) { 355 inst[1] |= (NV50_CB_PMISC << 22); 356 } else { 357 if (pc->p->type == NV50_PROG_VERTEX) 358 inst[1] |= (NV50_CB_PVP << 22); 359 else 360 inst[1] |= (NV50_CB_PFP << 22); 361 } 362} 363 364static void 365emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 366{ 367 unsigned inst[2] = { 0, 0 }; 368 369 inst[0] |= 0x10000000; 370 371 set_dst(pc, dst, inst); 372 373 if (dst->type != P_RESULT && src->type == P_IMMD) { 374 set_immd(pc, src, inst); 375 /*XXX: 32-bit, but steals part of "half" reg space - need to 376 * catch and handle this case if/when we do half-regs 377 */ 378 inst[0] |= 0x00008000; 379 } else 380 if (src->type == P_IMMD || src->type == P_CONST) { 381 set_long(pc, inst); 382 set_cseg(pc, src, inst); 383 inst[0] |= (src->hw << 9); 384 inst[1] |= 0x20000000; /* src0 const? */ 385 } else { 386 if (src->type == P_ATTR) { 387 set_long(pc, inst); 388 inst[1] |= 0x00200000; 389 } 390 391 alloc_reg(pc, src); 392 inst[0] |= (src->hw << 9); 393 } 394 395 /* We really should support "half" instructions here at some point, 396 * but I don't feel confident enough about them yet. 397 */ 398 set_long(pc, inst); 399 if (is_long(inst) && !is_immd(inst)) { 400 inst[1] |= 0x04000000; /* 32-bit */ 401 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 402 } 403 404 emit(pc, inst); 405} 406 407static boolean 408check_swap_src_0_1(struct nv50_pc *pc, 409 struct nv50_reg **s0, struct nv50_reg **s1) 410{ 411 struct nv50_reg *src0 = *s0, *src1 = *s1; 412 413 if (src0->type == P_CONST) { 414 if (src1->type != P_CONST) { 415 *s0 = src1; 416 *s1 = src0; 417 return TRUE; 418 } 419 } else 420 if (src1->type == P_ATTR) { 421 if (src0->type != P_ATTR) { 422 *s0 = src1; 423 *s1 = src0; 424 return TRUE; 425 } 426 } 427 428 return FALSE; 429} 430 431static void 432set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 433{ 434 if (src->type == P_ATTR) { 435 set_long(pc, inst); 436 inst[1] |= 0x00200000; 437 } else 438 if (src->type == P_CONST || src->type == P_IMMD) { 439 struct nv50_reg *temp = temp_temp(pc); 440 441 emit_mov(pc, temp, src); 442 src = temp; 443 } 444 445 alloc_reg(pc, src); 446 inst[0] |= (src->hw << 9); 447} 448 449static void 450set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 451{ 452 if (src->type == P_ATTR) { 453 struct nv50_reg *temp = temp_temp(pc); 454 455 emit_mov(pc, temp, src); 456 src = temp; 457 } else 458 if (src->type == P_CONST || src->type == P_IMMD) { 459 set_cseg(pc, src, inst); 460 inst[0] |= 0x00800000; 461 } 462 463 alloc_reg(pc, src); 464 inst[0] |= (src->hw << 16); 465} 466 467static void 468set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 469{ 470 set_long(pc, inst); 471 472 if (src->type == P_ATTR) { 473 struct nv50_reg *temp = temp_temp(pc); 474 475 emit_mov(pc, temp, src); 476 src = temp; 477 } else 478 if (src->type == P_CONST || src->type == P_IMMD) { 479 set_cseg(pc, src, inst); 480 inst[0] |= 0x01000000; 481 } 482 483 alloc_reg(pc, src); 484 inst[1] |= (src->hw << 14); 485} 486 487static void 488emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 489 struct nv50_reg *src1) 490{ 491 unsigned inst[2] = { 0, 0 }; 492 493 inst[0] |= 0xc0000000; 494 set_long(pc, inst); 495 496 check_swap_src_0_1(pc, &src0, &src1); 497 set_dst(pc, dst, inst); 498 set_src_0(pc, src0, inst); 499 set_src_1(pc, src1, inst); 500 501 emit(pc, inst); 502} 503 504static void 505emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 506 struct nv50_reg *src0, struct nv50_reg *src1) 507{ 508 unsigned inst[2] = { 0, 0 }; 509 510 inst[0] |= 0xb0000000; 511 512 check_swap_src_0_1(pc, &src0, &src1); 513 set_dst(pc, dst, inst); 514 set_src_0(pc, src0, inst); 515 if (is_long(inst)) 516 set_src_2(pc, src1, inst); 517 else 518 set_src_1(pc, src1, inst); 519 520 emit(pc, inst); 521} 522 523static void 524emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 525 struct nv50_reg *src0, struct nv50_reg *src1) 526{ 527 unsigned inst[2] = { 0, 0 }; 528 529 set_long(pc, inst); 530 inst[0] |= 0xb0000000; 531 inst[1] |= (sub << 29); 532 533 check_swap_src_0_1(pc, &src0, &src1); 534 set_dst(pc, dst, inst); 535 set_src_0(pc, src0, inst); 536 set_src_1(pc, src1, inst); 537 538 emit(pc, inst); 539} 540 541static void 542emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 543 struct nv50_reg *src1) 544{ 545 unsigned inst[2] = { 0, 0 }; 546 547 inst[0] |= 0xb0000000; 548 549 set_long(pc, inst); 550 if (check_swap_src_0_1(pc, &src0, &src1)) 551 inst[1] |= 0x04000000; 552 else 553 inst[1] |= 0x08000000; 554 555 set_dst(pc, dst, inst); 556 set_src_0(pc, src0, inst); 557 set_src_2(pc, src1, inst); 558 559 emit(pc, inst); 560} 561 562static void 563emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 564 struct nv50_reg *src1, struct nv50_reg *src2) 565{ 566 unsigned inst[2] = { 0, 0 }; 567 568 inst[0] |= 0xe0000000; 569 570 check_swap_src_0_1(pc, &src0, &src1); 571 set_dst(pc, dst, inst); 572 set_src_0(pc, src0, inst); 573 set_src_1(pc, src1, inst); 574 set_src_2(pc, src2, inst); 575 576 emit(pc, inst); 577} 578 579static void 580emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 581 struct nv50_reg *src1, struct nv50_reg *src2) 582{ 583 unsigned inst[2] = { 0, 0 }; 584 585 inst[0] |= 0xe0000000; 586 set_long(pc, inst); 587 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 588 589 check_swap_src_0_1(pc, &src0, &src1); 590 set_dst(pc, dst, inst); 591 set_src_0(pc, src0, inst); 592 set_src_1(pc, src1, inst); 593 set_src_2(pc, src2, inst); 594 595 emit(pc, inst); 596} 597 598static void 599emit_flop(struct nv50_pc *pc, unsigned sub, 600 struct nv50_reg *dst, struct nv50_reg *src) 601{ 602 unsigned inst[2] = { 0, 0 }; 603 604 inst[0] |= 0x90000000; 605 if (sub) { 606 set_long(pc, inst); 607 inst[1] |= (sub << 29); 608 } 609 610 set_dst(pc, dst, inst); 611 set_src_0(pc, src, inst); 612 613 emit(pc, inst); 614} 615 616static void 617emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 618{ 619 unsigned inst[2] = { 0, 0 }; 620 621 inst[0] |= 0xb0000000; 622 623 set_dst(pc, dst, inst); 624 set_src_0(pc, src, inst); 625 set_long(pc, inst); 626 inst[1] |= (6 << 29) | 0x00004000; 627 628 emit(pc, inst); 629} 630/*XXX: inaccurate results.. why? */ 631#define ALLOW_SET_SWAP 0 632 633static void 634emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 635 struct nv50_reg *src0, struct nv50_reg *src1) 636{ 637 unsigned inst[2] = { 0, 0 }; 638#if ALLOW_SET_SWAP 639 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; 640#endif 641 struct nv50_reg *rdst; 642 643#if ALLOW_SET_SWAP 644 assert(c_op <= 7); 645 if (check_swap_src_0_1(pc, &src0, &src1)) 646 c_op = inv_cop[c_op]; 647#endif 648 649 rdst = dst; 650 if (dst->type != P_TEMP) 651 dst = alloc_temp(pc, NULL); 652 653 /* set.u32 */ 654 set_long(pc, inst); 655 inst[0] |= 0xb0000000; 656 inst[1] |= (3 << 29); 657 inst[1] |= (c_op << 14); 658 /*XXX: breaks things, .u32 by default? 659 * decuda will disasm as .u16 and use .lo/.hi regs, but this 660 * doesn't seem to match what the hw actually does. 661 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 662 */ 663 set_dst(pc, dst, inst); 664 set_src_0(pc, src0, inst); 665 set_src_1(pc, src1, inst); 666 emit(pc, inst); 667 668 /* cvt.f32.u32 */ 669 inst[0] = 0xa0000001; 670 inst[1] = 0x64014780; 671 set_dst(pc, rdst, inst); 672 set_src_0(pc, dst, inst); 673 emit(pc, inst); 674 675 if (dst != rdst) 676 free_temp(pc, dst); 677} 678 679static void 680emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 681{ 682 unsigned inst[2] = { 0, 0 }; 683 684 inst[0] = 0xa0000000; /* cvt */ 685 set_long(pc, inst); 686 inst[1] |= (6 << 29); /* cvt */ 687 inst[1] |= 0x08000000; /* integer mode */ 688 inst[1] |= 0x04000000; /* 32 bit */ 689 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 690 inst[1] |= (1 << 14); /* src .f32 */ 691 set_dst(pc, dst, inst); 692 set_src_0(pc, src, inst); 693 694 emit(pc, inst); 695} 696 697static boolean 698nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 699{ 700 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 701 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 702 unsigned mask, sat; 703 int i, c; 704 705 NOUVEAU_ERR("insn %p\n", tok); 706 707 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 708 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 709 710 for (c = 0; c < 4; c++) { 711 if (mask & (1 << c)) 712 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 713 else 714 dst[c] = NULL; 715 } 716 717 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 718 for (c = 0; c < 4; c++) 719 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 720 } 721 722 if (sat) { 723 for (c = 0; c < 4; c++) { 724 rdst[c] = dst[c]; 725 dst[c] = temp_temp(pc); 726 } 727 } 728 729 switch (inst->Instruction.Opcode) { 730 case TGSI_OPCODE_ABS: 731 for (c = 0; c < 4; c++) { 732 unsigned inst[2] = { 0, 0 }; 733 734 inst[0] = 0xa0000000; /* cvt */ 735 set_long(pc, inst); 736 inst[1] |= (6 << 29); /* cvt */ 737 inst[1] |= 0x04000000; /* 32 bit */ 738 inst[1] |= (1 << 14); /* src .f32 */ 739 inst[1] |= ((1 << 6) << 14); /* .abs */ 740 set_dst(pc, dst[c], inst); 741 set_src_0(pc, src[0][c], inst); 742 emit(pc, inst); 743 } 744 break; 745 case TGSI_OPCODE_ADD: 746 for (c = 0; c < 4; c++) { 747 if (!(mask & (1 << c))) 748 continue; 749 emit_add(pc, dst[c], src[0][c], src[1][c]); 750 } 751 break; 752 case TGSI_OPCODE_COS: 753 for (c = 0; c < 4; c++) { 754 if (!(mask & (1 << c))) 755 continue; 756 emit_flop(pc, 5, dst[c], src[0][c]); 757 } 758 break; 759 case TGSI_OPCODE_DP3: 760 temp = alloc_temp(pc, NULL); 761 emit_mul(pc, temp, src[0][0], src[1][0]); 762 emit_mad(pc, temp, src[0][1], src[1][1], temp); 763 emit_mad(pc, temp, src[0][2], src[1][2], temp); 764 for (c = 0; c < 4; c++) { 765 if (!(mask & (1 << c))) 766 continue; 767 emit_mov(pc, dst[c], temp); 768 } 769 free_temp(pc, temp); 770 break; 771 case TGSI_OPCODE_DP4: 772 temp = alloc_temp(pc, NULL); 773 emit_mul(pc, temp, src[0][0], src[1][0]); 774 emit_mad(pc, temp, src[0][1], src[1][1], temp); 775 emit_mad(pc, temp, src[0][2], src[1][2], temp); 776 emit_mad(pc, temp, src[0][3], src[1][3], temp); 777 for (c = 0; c < 4; c++) { 778 if (!(mask & (1 << c))) 779 continue; 780 emit_mov(pc, dst[c], temp); 781 } 782 free_temp(pc, temp); 783 break; 784 case TGSI_OPCODE_DPH: 785 temp = alloc_temp(pc, NULL); 786 emit_mul(pc, temp, src[0][0], src[1][0]); 787 emit_mad(pc, temp, src[0][1], src[1][1], temp); 788 emit_mad(pc, temp, src[0][2], src[1][2], temp); 789 emit_add(pc, temp, src[1][3], temp); 790 for (c = 0; c < 4; c++) { 791 if (!(mask & (1 << c))) 792 continue; 793 emit_mov(pc, dst[c], temp); 794 } 795 free_temp(pc, temp); 796 break; 797 case TGSI_OPCODE_DST: 798 { 799 struct nv50_reg *one = alloc_immd(pc, 1.0); 800 emit_mov(pc, dst[0], one); 801 emit_mul(pc, dst[1], src[0][1], src[1][1]); 802 emit_mov(pc, dst[2], src[0][2]); 803 emit_mov(pc, dst[3], src[1][3]); 804 FREE(one); 805 } 806 break; 807 case TGSI_OPCODE_EX2: 808 temp = alloc_temp(pc, NULL); 809 for (c = 0; c < 4; c++) { 810 if (!(mask & (1 << c))) 811 continue; 812 emit_preex2(pc, temp, src[0][c]); 813 emit_flop(pc, 6, dst[c], temp); 814 } 815 free_temp(pc, temp); 816 break; 817 case TGSI_OPCODE_FLR: 818 for (c = 0; c < 4; c++) { 819 if (!(mask & (1 << c))) 820 continue; 821 emit_flr(pc, dst[c], src[0][c]); 822 } 823 break; 824 case TGSI_OPCODE_FRC: 825 temp = alloc_temp(pc, NULL); 826 for (c = 0; c < 4; c++) { 827 if (!(mask & (1 << c))) 828 continue; 829 emit_flr(pc, temp, src[0][c]); 830 emit_sub(pc, dst[c], src[0][c], temp); 831 } 832 free_temp(pc, temp); 833 break; 834 case TGSI_OPCODE_LG2: 835 for (c = 0; c < 4; c++) { 836 if (!(mask & (1 << c))) 837 continue; 838 emit_flop(pc, 3, dst[c], src[0][c]); 839 } 840 break; 841 case TGSI_OPCODE_MAD: 842 for (c = 0; c < 4; c++) { 843 if (!(mask & (1 << c))) 844 continue; 845 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 846 } 847 break; 848 case TGSI_OPCODE_MAX: 849 for (c = 0; c < 4; c++) { 850 if (!(mask & (1 << c))) 851 continue; 852 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 853 } 854 break; 855 case TGSI_OPCODE_MIN: 856 for (c = 0; c < 4; c++) { 857 if (!(mask & (1 << c))) 858 continue; 859 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 860 } 861 break; 862 case TGSI_OPCODE_MOV: 863 for (c = 0; c < 4; c++) { 864 if (!(mask & (1 << c))) 865 continue; 866 emit_mov(pc, dst[c], src[0][c]); 867 } 868 break; 869 case TGSI_OPCODE_MUL: 870 for (c = 0; c < 4; c++) { 871 if (!(mask & (1 << c))) 872 continue; 873 emit_mul(pc, dst[c], src[0][c], src[1][c]); 874 } 875 break; 876 case TGSI_OPCODE_POW: 877 temp = alloc_temp(pc, NULL); 878 emit_flop(pc, 3, temp, src[0][0]); 879 emit_mul(pc, temp, temp, src[1][0]); 880 emit_preex2(pc, temp, temp); 881 emit_flop(pc, 6, temp, temp); 882 for (c = 0; c < 4; c++) { 883 if (!(mask & (1 << c))) 884 continue; 885 emit_mov(pc, dst[c], temp); 886 } 887 free_temp(pc, temp); 888 break; 889 case TGSI_OPCODE_RCP: 890 for (c = 0; c < 4; c++) { 891 if (!(mask & (1 << c))) 892 continue; 893 emit_flop(pc, 0, dst[c], src[0][c]); 894 } 895 break; 896 case TGSI_OPCODE_RSQ: 897 for (c = 0; c < 4; c++) { 898 if (!(mask & (1 << c))) 899 continue; 900 emit_flop(pc, 2, dst[c], src[0][c]); 901 } 902 break; 903 case TGSI_OPCODE_SGE: 904 for (c = 0; c < 4; c++) { 905 if (!(mask & (1 << c))) 906 continue; 907 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 908 } 909 break; 910 case TGSI_OPCODE_SIN: 911 for (c = 0; c < 4; c++) { 912 if (!(mask & (1 << c))) 913 continue; 914 emit_flop(pc, 4, dst[c], src[0][c]); 915 } 916 break; 917 case TGSI_OPCODE_SLT: 918 for (c = 0; c < 4; c++) { 919 if (!(mask & (1 << c))) 920 continue; 921 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 922 } 923 break; 924 case TGSI_OPCODE_SUB: 925 for (c = 0; c < 4; c++) { 926 if (!(mask & (1 << c))) 927 continue; 928 emit_sub(pc, dst[c], src[0][c], src[1][c]); 929 } 930 break; 931 case TGSI_OPCODE_XPD: 932 temp = alloc_temp(pc, NULL); 933 emit_mul(pc, temp, src[0][2], src[1][1]); 934 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 935 emit_mul(pc, temp, src[0][0], src[1][2]); 936 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 937 emit_mul(pc, temp, src[0][1], src[1][0]); 938 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 939 free_temp(pc, temp); 940 break; 941 case TGSI_OPCODE_END: 942 break; 943 default: 944 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 945 return FALSE; 946 } 947 948 if (sat) { 949 for (c = 0; c < 4; c++) { 950 unsigned inst[2] = { 0, 0 }; 951 952 if (!(mask & (1 << c))) 953 continue; 954 955 inst[0] = 0xa0000000; /* cvt */ 956 set_long(pc, inst); 957 inst[1] |= (6 << 29); /* cvt */ 958 inst[1] |= 0x04000000; /* 32 bit */ 959 inst[1] |= (1 << 14); /* src .f32 */ 960 inst[1] |= ((1 << 5) << 14); /* .sat */ 961 set_dst(pc, rdst[c], inst); 962 set_src_0(pc, dst[c], inst); 963 emit(pc, inst); 964 } 965 } 966 967 kill_temp_temp(pc); 968 return TRUE; 969} 970 971static boolean 972nv50_program_tx_prep(struct nv50_pc *pc) 973{ 974 struct tgsi_parse_context p; 975 boolean ret = FALSE; 976 unsigned i, c; 977 978 tgsi_parse_init(&p, pc->p->pipe.tokens); 979 while (!tgsi_parse_end_of_tokens(&p)) { 980 const union tgsi_full_token *tok = &p.FullToken; 981 982 tgsi_parse_token(&p); 983 switch (tok->Token.Type) { 984 case TGSI_TOKEN_TYPE_IMMEDIATE: 985 { 986 const struct tgsi_full_immediate *imm = 987 &p.FullToken.FullImmediate; 988 989 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 990 imm->u.ImmediateFloat32[1].Float, 991 imm->u.ImmediateFloat32[2].Float, 992 imm->u.ImmediateFloat32[3].Float); 993 } 994 break; 995 case TGSI_TOKEN_TYPE_DECLARATION: 996 { 997 const struct tgsi_full_declaration *d; 998 unsigned last; 999 1000 d = &p.FullToken.FullDeclaration; 1001 last = d->u.DeclarationRange.Last; 1002 1003 switch (d->Declaration.File) { 1004 case TGSI_FILE_TEMPORARY: 1005 if (pc->temp_nr < (last + 1)) 1006 pc->temp_nr = last + 1; 1007 break; 1008 case TGSI_FILE_OUTPUT: 1009 if (pc->result_nr < (last + 1)) 1010 pc->result_nr = last + 1; 1011 break; 1012 case TGSI_FILE_INPUT: 1013 if (pc->attr_nr < (last + 1)) 1014 pc->attr_nr = last + 1; 1015 break; 1016 case TGSI_FILE_CONSTANT: 1017 if (pc->param_nr < (last + 1)) 1018 pc->param_nr = last + 1; 1019 break; 1020 default: 1021 NOUVEAU_ERR("bad decl file %d\n", 1022 d->Declaration.File); 1023 goto out_err; 1024 } 1025 } 1026 break; 1027 case TGSI_TOKEN_TYPE_INSTRUCTION: 1028 break; 1029 default: 1030 break; 1031 } 1032 } 1033 1034 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 1035 if (pc->temp_nr) { 1036 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1037 if (!pc->temp) 1038 goto out_err; 1039 1040 for (i = 0; i < pc->temp_nr; i++) { 1041 for (c = 0; c < 4; c++) { 1042 pc->temp[i*4+c].type = P_TEMP; 1043 pc->temp[i*4+c].hw = -1; 1044 pc->temp[i*4+c].index = i; 1045 } 1046 } 1047 } 1048 1049 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1050 if (pc->attr_nr) { 1051 struct nv50_reg *iv = NULL, *tmp = NULL; 1052 int aid = 0; 1053 1054 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1055 if (!pc->attr) 1056 goto out_err; 1057 1058 if (pc->p->type == NV50_PROG_FRAGMENT) { 1059 iv = alloc_temp(pc, NULL); 1060 aid++; 1061 } 1062 1063 for (i = 0; i < pc->attr_nr; i++) { 1064 struct nv50_reg *a = &pc->attr[i*4]; 1065 1066 for (c = 0; c < 4; c++) { 1067 if (pc->p->type == NV50_PROG_FRAGMENT) { 1068 struct nv50_reg *at = 1069 alloc_temp(pc, NULL); 1070 pc->attr[i*4+c].type = at->type; 1071 pc->attr[i*4+c].hw = at->hw; 1072 pc->attr[i*4+c].index = at->index; 1073 } else { 1074 pc->p->cfg.vp.attr[aid/32] |= 1075 (1 << (aid % 32)); 1076 pc->attr[i*4+c].type = P_ATTR; 1077 pc->attr[i*4+c].hw = aid++; 1078 pc->attr[i*4+c].index = i; 1079 } 1080 } 1081 1082 if (pc->p->type != NV50_PROG_FRAGMENT) 1083 continue; 1084 1085 emit_interp(pc, iv, iv, iv, FALSE); 1086 tmp = alloc_temp(pc, NULL); 1087 { 1088 unsigned inst[2] = { 0, 0 }; 1089 inst[0] = 0x90000000; 1090 inst[0] |= (tmp->hw << 2); 1091 emit(pc, inst); 1092 } 1093 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1094 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1095 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1096 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1097 free_temp(pc, tmp); 1098 } 1099 1100 if (iv) 1101 free_temp(pc, iv); 1102 } 1103 1104 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1105 if (pc->result_nr) { 1106 int rid = 0; 1107 1108 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1109 if (!pc->result) 1110 goto out_err; 1111 1112 for (i = 0; i < pc->result_nr; i++) { 1113 for (c = 0; c < 4; c++) { 1114 if (pc->p->type == NV50_PROG_FRAGMENT) 1115 pc->result[i*4+c].type = P_TEMP; 1116 else 1117 pc->result[i*4+c].type = P_RESULT; 1118 pc->result[i*4+c].hw = rid++; 1119 pc->result[i*4+c].index = i; 1120 } 1121 } 1122 } 1123 1124 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1125 if (pc->param_nr) { 1126 int rid = 0; 1127 1128 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1129 if (!pc->param) 1130 goto out_err; 1131 1132 for (i = 0; i < pc->param_nr; i++) { 1133 for (c = 0; c < 4; c++) { 1134 pc->param[i*4+c].type = P_CONST; 1135 pc->param[i*4+c].hw = rid++; 1136 pc->param[i*4+c].index = i; 1137 } 1138 } 1139 } 1140 1141 if (pc->immd_nr) { 1142 int rid = 0; 1143 1144 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1145 if (!pc->immd) 1146 goto out_err; 1147 1148 for (i = 0; i < pc->immd_nr; i++) { 1149 for (c = 0; c < 4; c++) { 1150 pc->immd[i*4+c].type = P_IMMD; 1151 pc->immd[i*4+c].hw = rid++; 1152 pc->immd[i*4+c].index = i; 1153 } 1154 } 1155 } 1156 1157 ret = TRUE; 1158out_err: 1159 tgsi_parse_free(&p); 1160 return ret; 1161} 1162 1163static boolean 1164nv50_program_tx(struct nv50_program *p) 1165{ 1166 struct tgsi_parse_context parse; 1167 struct nv50_pc *pc; 1168 boolean ret; 1169 1170 pc = CALLOC_STRUCT(nv50_pc); 1171 if (!pc) 1172 return FALSE; 1173 pc->p = p; 1174 pc->p->cfg.high_temp = 4; 1175 1176 ret = nv50_program_tx_prep(pc); 1177 if (ret == FALSE) 1178 goto out_cleanup; 1179 1180 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1181 while (!tgsi_parse_end_of_tokens(&parse)) { 1182 const union tgsi_full_token *tok = &parse.FullToken; 1183 1184 tgsi_parse_token(&parse); 1185 1186 switch (tok->Token.Type) { 1187 case TGSI_TOKEN_TYPE_INSTRUCTION: 1188 ret = nv50_program_tx_insn(pc, tok); 1189 if (ret == FALSE) 1190 goto out_err; 1191 break; 1192 default: 1193 break; 1194 } 1195 } 1196 1197 p->immd_nr = pc->immd_nr * 4; 1198 p->immd = pc->immd_buf; 1199 1200out_err: 1201 tgsi_parse_free(&parse); 1202 1203out_cleanup: 1204 return ret; 1205} 1206 1207static void 1208nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1209{ 1210 int i; 1211 1212 if (nv50_program_tx(p) == FALSE) 1213 assert(0); 1214 /* *not* sufficient, it's fine if last inst is long and 1215 * NOT immd - otherwise it's fucked fucked fucked */ 1216 p->insns[p->insns_nr - 1] |= 0x00000001; 1217 1218 if (p->type == NV50_PROG_VERTEX) { 1219 for (i = 0; i < p->insns_nr; i++) 1220 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1221 } else { 1222 for (i = 0; i < p->insns_nr; i++) 1223 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1224 } 1225 1226 p->translated = TRUE; 1227} 1228 1229static void 1230nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1231{ 1232 int i; 1233 1234 for (i = 0; i < p->immd_nr; i++) { 1235 BEGIN_RING(tesla, 0x0f00, 2); 1236 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 1237 OUT_RING (fui(p->immd[i])); 1238 } 1239} 1240 1241static void 1242nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1243{ 1244 struct pipe_winsys *ws = nv50->pipe.winsys; 1245 void *map; 1246 1247 if (!p->buffer) 1248 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1249 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1250 memcpy(map, p->insns, p->insns_nr * 4); 1251 ws->buffer_unmap(ws, p->buffer); 1252} 1253 1254void 1255nv50_vertprog_validate(struct nv50_context *nv50) 1256{ 1257 struct nouveau_grobj *tesla = nv50->screen->tesla; 1258 struct nv50_program *p = nv50->vertprog; 1259 struct nouveau_stateobj *so; 1260 1261 if (!p->translated) { 1262 nv50_program_validate(nv50, p); 1263 if (!p->translated) 1264 assert(0); 1265 } 1266 1267 nv50_program_validate_data(nv50, p); 1268 nv50_program_validate_code(nv50, p); 1269 1270 so = so_new(11, 2); 1271 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1272 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1273 NOUVEAU_BO_HIGH, 0, 0); 1274 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1275 NOUVEAU_BO_LOW, 0, 0); 1276 so_method(so, tesla, 0x1650, 2); 1277 so_data (so, p->cfg.vp.attr[0]); 1278 so_data (so, p->cfg.vp.attr[1]); 1279 so_method(so, tesla, 0x16ac, 2); 1280 so_data (so, 8); 1281 so_data (so, p->cfg.high_temp); 1282 so_method(so, tesla, 0x140c, 1); 1283 so_data (so, 0); /* program start offset */ 1284 so_emit(nv50->screen->nvws, so); 1285 so_ref(NULL, &so); 1286} 1287 1288void 1289nv50_fragprog_validate(struct nv50_context *nv50) 1290{ 1291 struct nouveau_grobj *tesla = nv50->screen->tesla; 1292 struct nv50_program *p = nv50->fragprog; 1293 struct nouveau_stateobj *so; 1294 1295 if (!p->translated) { 1296 nv50_program_validate(nv50, p); 1297 if (!p->translated) 1298 assert(0); 1299 } 1300 1301 nv50_program_validate_data(nv50, p); 1302 nv50_program_validate_code(nv50, p); 1303 1304 so = so_new(7, 2); 1305 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1306 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1307 NOUVEAU_BO_HIGH, 0, 0); 1308 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1309 NOUVEAU_BO_LOW, 0, 0); 1310 so_method(so, tesla, 0x198c, 1); 1311 so_data (so, p->cfg.high_temp); 1312 so_method(so, tesla, 0x1414, 1); 1313 so_data (so, 0); /* program start offset */ 1314 so_emit(nv50->screen->nvws, so); 1315 so_ref(NULL, &so); 1316} 1317 1318void 1319nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1320{ 1321 struct pipe_winsys *ws = nv50->pipe.winsys; 1322 1323 if (p->insns_nr) { 1324 if (p->insns) 1325 FREE(p->insns); 1326 p->insns_nr = 0; 1327 } 1328 1329 if (p->buffer) 1330 pipe_buffer_reference(ws, &p->buffer, NULL); 1331 1332 p->translated = 0; 1333} 1334 1335