nv50_program.c revision 9a37a56c8ab8c64bdadb1e1e807f885d6a5e3121
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL - gallium craps itself on progs/vp/arl.txt 16 * 17 * MSB - Like MAD, but MUL+SUB 18 * - Fuck it off, introduce a way to negate args for ops that 19 * support it. 20 * 21 * Look into inlining IMMD for ops other than MOV (make it general?) 22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 24 * 25 * Hmmm.. what happens if we have src1+src2 both consts.. ouch ! 26 * 27 * Verify half-insns work where expected - and force disable them where they 28 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 29 */ 30struct nv50_reg { 31 enum { 32 P_TEMP, 33 P_ATTR, 34 P_RESULT, 35 P_CONST, 36 P_IMMD 37 } type; 38 int index; 39 40 int hw; 41 int neg; 42}; 43 44struct nv50_pc { 45 struct nv50_program *p; 46 47 /* hw resources */ 48 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 49 50 /* tgsi resources */ 51 struct nv50_reg *temp; 52 int temp_nr; 53 struct nv50_reg *attr; 54 int attr_nr; 55 struct nv50_reg *result; 56 int result_nr; 57 struct nv50_reg *param; 58 int param_nr; 59 struct nv50_reg *immd; 60 float *immd_buf; 61 int immd_nr; 62 63 struct nv50_reg *temp_temp[8]; 64 unsigned temp_temp_nr; 65}; 66 67static void 68alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 69{ 70 int i; 71 72 if (reg->type != P_TEMP) 73 return; 74 75 if (reg->hw >= 0) { 76 /*XXX: do this here too to catch FP temp-as-attr usage.. 77 * not clean, but works */ 78 if (pc->p->cfg.high_temp < (reg->hw + 1)) 79 pc->p->cfg.high_temp = reg->hw + 1; 80 return; 81 } 82 83 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 84 if (!(pc->r_temp[i])) { 85 pc->r_temp[i] = reg; 86 reg->hw = i; 87 if (pc->p->cfg.high_temp < (i + 1)) 88 pc->p->cfg.high_temp = i + 1; 89 return; 90 } 91 } 92 93 assert(0); 94} 95 96static struct nv50_reg * 97alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 98{ 99 struct nv50_reg *r; 100 int i; 101 102 if (dst && dst->type == P_TEMP && dst->hw == -1) 103 return dst; 104 105 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 106 if (!pc->r_temp[i]) { 107 r = CALLOC_STRUCT(nv50_reg); 108 r->type = P_TEMP; 109 r->index = -1; 110 r->hw = i; 111 pc->r_temp[i] = r; 112 return r; 113 } 114 } 115 116 assert(0); 117 return NULL; 118} 119 120static void 121free_temp(struct nv50_pc *pc, struct nv50_reg *r) 122{ 123 if (r->index == -1) { 124 FREE(pc->r_temp[r->hw]); 125 pc->r_temp[r->hw] = NULL; 126 } 127} 128 129static struct nv50_reg * 130temp_temp(struct nv50_pc *pc) 131{ 132 if (pc->temp_temp_nr >= 8) 133 assert(0); 134 135 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 136 return pc->temp_temp[pc->temp_temp_nr++]; 137} 138 139static void 140kill_temp_temp(struct nv50_pc *pc) 141{ 142 int i; 143 144 for (i = 0; i < pc->temp_temp_nr; i++) 145 free_temp(pc, pc->temp_temp[i]); 146 pc->temp_temp_nr = 0; 147} 148 149static int 150ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 151{ 152 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 153 sizeof(float)); 154 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 155 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 156 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 157 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 158 159 return pc->immd_nr++; 160} 161 162static struct nv50_reg * 163alloc_immd(struct nv50_pc *pc, float f) 164{ 165 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 166 unsigned hw; 167 168 hw = ctor_immd(pc, f, 0, 0, 0) * 4; 169 r->type = P_IMMD; 170 r->hw = hw; 171 r->index = -1; 172 return r; 173} 174 175static void 176emit(struct nv50_pc *pc, unsigned *inst) 177{ 178 struct nv50_program *p = pc->p; 179 180 if (inst[0] & 1) { 181 p->insns_nr += 2; 182 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 183 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 184 } else { 185 p->insns_nr += 1; 186 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 187 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 188 } 189} 190 191static INLINE void set_long(struct nv50_pc *, unsigned *); 192 193static boolean 194is_long(unsigned *inst) 195{ 196 if (inst[0] & 1) 197 return TRUE; 198 return FALSE; 199} 200 201static boolean 202is_immd(unsigned *inst) 203{ 204 if (is_long(inst) && (inst[1] & 3) == 3) 205 return TRUE; 206 return FALSE; 207} 208 209static INLINE void 210set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 211{ 212 set_long(pc, inst); 213 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 214 inst[1] |= (pred << 7) | (idx << 12); 215} 216 217static INLINE void 218set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 219{ 220 set_long(pc, inst); 221 inst[1] &= ~((0x3 << 4) | (1 << 6)); 222 inst[1] |= (idx << 4) | (on << 6); 223} 224 225static INLINE void 226set_long(struct nv50_pc *pc, unsigned *inst) 227{ 228 if (is_long(inst)) 229 return; 230 231 inst[0] |= 1; 232 set_pred(pc, 0xf, 0, inst); 233 set_pred_wr(pc, 0, 0, inst); 234} 235 236static INLINE void 237set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 238{ 239 if (dst->type == P_RESULT) { 240 set_long(pc, inst); 241 inst[1] |= 0x00000008; 242 } 243 244 alloc_reg(pc, dst); 245 inst[0] |= (dst->hw << 2); 246} 247 248static INLINE void 249set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 250{ 251 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 252 253 set_long(pc, inst); 254 /*XXX: can't be predicated - bits overlap.. catch cases where both 255 * are required and avoid them. */ 256 set_pred(pc, 0, 0, inst); 257 set_pred_wr(pc, 0, 0, inst); 258 259 inst[1] |= 0x00000002 | 0x00000001; 260 inst[0] |= (val & 0x3f) << 16; 261 inst[1] |= (val >> 6) << 2; 262} 263 264static void 265emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 266 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 267{ 268 unsigned inst[2] = { 0, 0 }; 269 270 inst[0] |= 0x80000000; 271 set_dst(pc, dst, inst); 272 alloc_reg(pc, iv); 273 inst[0] |= (iv->hw << 9); 274 alloc_reg(pc, src); 275 inst[0] |= (src->hw << 16); 276 if (noperspective) 277 inst[0] |= (1 << 25); 278 279 emit(pc, inst); 280} 281 282static void 283set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 284{ 285 set_long(pc, inst); 286 if (src->type == P_IMMD) { 287 inst[1] |= (NV50_CB_PMISC << 22); 288 } else { 289 if (pc->p->type == NV50_PROG_VERTEX) 290 inst[1] |= (NV50_CB_PVP << 22); 291 else 292 inst[1] |= (NV50_CB_PFP << 22); 293 } 294} 295 296static void 297emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 298{ 299 unsigned inst[2] = { 0, 0 }; 300 301 inst[0] |= 0x10000000; 302 303 set_dst(pc, dst, inst); 304 305 if (dst->type != P_RESULT && src->type == P_IMMD) { 306 set_immd(pc, src, inst); 307 /*XXX: 32-bit, but steals part of "half" reg space - need to 308 * catch and handle this case if/when we do half-regs 309 */ 310 inst[0] |= 0x00008000; 311 } else 312 if (src->type == P_IMMD || src->type == P_CONST) { 313 set_long(pc, inst); 314 set_cseg(pc, src, inst); 315 inst[0] |= (src->hw << 9); 316 inst[1] |= 0x20000000; /* src0 const? */ 317 } else { 318 if (src->type == P_ATTR) { 319 set_long(pc, inst); 320 inst[1] |= 0x00200000; 321 } 322 323 alloc_reg(pc, src); 324 inst[0] |= (src->hw << 9); 325 } 326 327 /* We really should support "half" instructions here at some point, 328 * but I don't feel confident enough about them yet. 329 */ 330 set_long(pc, inst); 331 if (is_long(inst) && !is_immd(inst)) { 332 inst[1] |= 0x04000000; /* 32-bit */ 333 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 334 } 335 336 emit(pc, inst); 337} 338 339static boolean 340check_swap_src_0_1(struct nv50_pc *pc, 341 struct nv50_reg **s0, struct nv50_reg **s1) 342{ 343 struct nv50_reg *src0 = *s0, *src1 = *s1; 344 345 if (src0->type == P_CONST) { 346 if (src1->type != P_CONST) { 347 *s0 = src1; 348 *s1 = src0; 349 return TRUE; 350 } 351 } else 352 if (src1->type == P_ATTR) { 353 if (src0->type != P_ATTR) { 354 *s0 = src1; 355 *s1 = src0; 356 return TRUE; 357 } 358 } 359 360 return FALSE; 361} 362 363static void 364set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 365{ 366 if (src->type == P_ATTR) { 367 set_long(pc, inst); 368 inst[1] |= 0x00200000; 369 } else 370 if (src->type == P_CONST || src->type == P_IMMD) { 371 struct nv50_reg *temp = temp_temp(pc); 372 373 emit_mov(pc, temp, src); 374 src = temp; 375 } 376 377 alloc_reg(pc, src); 378 inst[0] |= (src->hw << 9); 379} 380 381static void 382set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 383{ 384 if (src->type == P_ATTR) { 385 struct nv50_reg *temp = temp_temp(pc); 386 387 emit_mov(pc, temp, src); 388 src = temp; 389 } else 390 if (src->type == P_CONST || src->type == P_IMMD) { 391 set_cseg(pc, src, inst); 392 inst[0] |= 0x00800000; 393 } 394 395 alloc_reg(pc, src); 396 inst[0] |= (src->hw << 16); 397} 398 399static void 400set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 401{ 402 set_long(pc, inst); 403 404 if (src->type == P_ATTR) { 405 struct nv50_reg *temp = temp_temp(pc); 406 407 emit_mov(pc, temp, src); 408 src = temp; 409 } else 410 if (src->type == P_CONST || src->type == P_IMMD) { 411 set_cseg(pc, src, inst); 412 inst[0] |= 0x01000000; 413 } 414 415 alloc_reg(pc, src); 416 inst[1] |= (src->hw << 14); 417} 418 419static void 420emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 421 struct nv50_reg *src1) 422{ 423 unsigned inst[2] = { 0, 0 }; 424 425 inst[0] |= 0xc0000000; 426 set_long(pc, inst); 427 428 check_swap_src_0_1(pc, &src0, &src1); 429 set_dst(pc, dst, inst); 430 set_src_0(pc, src0, inst); 431 set_src_1(pc, src1, inst); 432 433 emit(pc, inst); 434} 435 436static void 437emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 438 struct nv50_reg *src0, struct nv50_reg *src1) 439{ 440 unsigned inst[2] = { 0, 0 }; 441 442 inst[0] |= 0xb0000000; 443 444 check_swap_src_0_1(pc, &src0, &src1); 445 set_dst(pc, dst, inst); 446 set_src_0(pc, src0, inst); 447 if (is_long(inst)) 448 set_src_2(pc, src1, inst); 449 else 450 set_src_1(pc, src1, inst); 451 452 emit(pc, inst); 453} 454 455static void 456emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 457 struct nv50_reg *src0, struct nv50_reg *src1) 458{ 459 unsigned inst[2] = { 0, 0 }; 460 461 set_long(pc, inst); 462 inst[0] |= 0xb0000000; 463 inst[1] |= (sub << 29); 464 465 check_swap_src_0_1(pc, &src0, &src1); 466 set_dst(pc, dst, inst); 467 set_src_0(pc, src0, inst); 468 set_src_1(pc, src1, inst); 469 470 emit(pc, inst); 471} 472 473static void 474emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 475 struct nv50_reg *src1) 476{ 477 unsigned inst[2] = { 0, 0 }; 478 479 inst[0] |= 0xb0000000; 480 481 set_long(pc, inst); 482 if (check_swap_src_0_1(pc, &src0, &src1)) 483 inst[1] |= 0x04000000; 484 else 485 inst[1] |= 0x08000000; 486 487 set_dst(pc, dst, inst); 488 set_src_0(pc, src0, inst); 489 set_src_2(pc, src1, inst); 490 491 emit(pc, inst); 492} 493 494static void 495emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 496 struct nv50_reg *src1, struct nv50_reg *src2) 497{ 498 unsigned inst[2] = { 0, 0 }; 499 500 inst[0] |= 0xe0000000; 501 502 check_swap_src_0_1(pc, &src0, &src1); 503 set_dst(pc, dst, inst); 504 set_src_0(pc, src0, inst); 505 set_src_1(pc, src1, inst); 506 set_src_2(pc, src2, inst); 507 508 emit(pc, inst); 509} 510 511static void 512emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 513 struct nv50_reg *src1, struct nv50_reg *src2) 514{ 515 unsigned inst[2] = { 0, 0 }; 516 517 inst[0] |= 0xe0000000; 518 set_long(pc, inst); 519 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 520 521 check_swap_src_0_1(pc, &src0, &src1); 522 set_dst(pc, dst, inst); 523 set_src_0(pc, src0, inst); 524 set_src_1(pc, src1, inst); 525 set_src_2(pc, src2, inst); 526 527 emit(pc, inst); 528} 529 530static void 531emit_flop(struct nv50_pc *pc, unsigned sub, 532 struct nv50_reg *dst, struct nv50_reg *src) 533{ 534 unsigned inst[2] = { 0, 0 }; 535 536 inst[0] |= 0x90000000; 537 if (sub) { 538 set_long(pc, inst); 539 inst[1] |= (sub << 29); 540 } 541 542 set_dst(pc, dst, inst); 543 set_src_0(pc, src, inst); 544 545 emit(pc, inst); 546} 547 548static void 549emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 550{ 551 unsigned inst[2] = { 0, 0 }; 552 553 inst[0] |= 0xb0000000; 554 555 set_dst(pc, dst, inst); 556 set_src_0(pc, src, inst); 557 set_long(pc, inst); 558 inst[1] |= (6 << 29) | 0x00004000; 559 560 emit(pc, inst); 561} 562 563/*XXX: inaccurate results.. why? */ 564#define ALLOW_SET_SWAP 0 565 566static void 567emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 568 struct nv50_reg *src0, struct nv50_reg *src1) 569{ 570 unsigned inst[2] = { 0, 0 }; 571#if ALLOW_SET_SWAP 572 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; 573#endif 574 struct nv50_reg *rdst; 575 576#if ALLOW_SET_SWAP 577 assert(c_op <= 7); 578 if (check_swap_src_0_1(pc, &src0, &src1)) 579 c_op = inv_cop[c_op]; 580#endif 581 582 rdst = dst; 583 if (dst->type != P_TEMP) 584 dst = alloc_temp(pc, NULL); 585 586 /* set.u32 */ 587 set_long(pc, inst); 588 inst[0] |= 0xb0000000; 589 inst[1] |= (3 << 29); 590 inst[1] |= (c_op << 14); 591 /*XXX: breaks things, .u32 by default? 592 * decuda will disasm as .u16 and use .lo/.hi regs, but this 593 * doesn't seem to match what the hw actually does. 594 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 595 */ 596 set_dst(pc, dst, inst); 597 set_src_0(pc, src0, inst); 598 set_src_1(pc, src1, inst); 599 emit(pc, inst); 600 601 /* cvt.f32.u32 */ 602 inst[0] = 0xa0000001; 603 inst[1] = 0x64014780; 604 set_dst(pc, rdst, inst); 605 set_src_0(pc, dst, inst); 606 emit(pc, inst); 607 608 if (dst != rdst) 609 free_temp(pc, dst); 610} 611 612static void 613emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 614{ 615 unsigned inst[2] = { 0, 0 }; 616 617 inst[0] = 0xa0000000; /* cvt */ 618 set_long(pc, inst); 619 inst[1] |= (6 << 29); /* cvt */ 620 inst[1] |= 0x08000000; /* integer mode */ 621 inst[1] |= 0x04000000; /* 32 bit */ 622 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 623 inst[1] |= (1 << 14); /* src .f32 */ 624 set_dst(pc, dst, inst); 625 set_src_0(pc, src, inst); 626 627 emit(pc, inst); 628} 629 630static void 631emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 632 struct nv50_reg *v, struct nv50_reg *e) 633{ 634 struct nv50_reg *temp = alloc_temp(pc, NULL); 635 636 emit_flop(pc, 3, temp, v); 637 emit_mul(pc, temp, temp, e); 638 emit_preex2(pc, temp, temp); 639 emit_flop(pc, 6, dst, temp); 640 641 free_temp(pc, temp); 642} 643 644static void 645emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 646{ 647 unsigned inst[2] = { 0, 0 }; 648 649 inst[0] = 0xa0000000; /* cvt */ 650 set_long(pc, inst); 651 inst[1] |= (6 << 29); /* cvt */ 652 inst[1] |= 0x04000000; /* 32 bit */ 653 inst[1] |= (1 << 14); /* src .f32 */ 654 inst[1] |= ((1 << 6) << 14); /* .abs */ 655 set_dst(pc, dst, inst); 656 set_src_0(pc, src, inst); 657 658 emit(pc, inst); 659} 660 661static void 662emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) 663{ 664 struct nv50_reg *one = alloc_immd(pc, 1.0); 665 struct nv50_reg *zero = alloc_immd(pc, 0.0); 666 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 667 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 668 struct nv50_reg *tmp[4]; 669 670 emit_mov(pc, dst[0], one); 671 emit_mov(pc, dst[3], one); 672 673 tmp[0] = temp_temp(pc); 674 emit_minmax(pc, 4, dst[1], src[0], zero); 675 set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); 676 677 tmp[1] = temp_temp(pc); 678 emit_minmax(pc, 4, tmp[1], src[1], zero); 679 680 tmp[3] = temp_temp(pc); 681 emit_minmax(pc, 4, tmp[3], src[3], neg128); 682 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 683 684 emit_pow(pc, dst[2], tmp[1], tmp[3]); 685 emit_mov(pc, dst[2], zero); 686 set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); 687} 688 689static struct nv50_reg * 690tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 691{ 692 switch (dst->DstRegister.File) { 693 case TGSI_FILE_TEMPORARY: 694 return &pc->temp[dst->DstRegister.Index * 4 + c]; 695 case TGSI_FILE_OUTPUT: 696 return &pc->result[dst->DstRegister.Index * 4 + c]; 697 case TGSI_FILE_NULL: 698 return NULL; 699 default: 700 break; 701 } 702 703 return NULL; 704} 705 706static struct nv50_reg * 707tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 708{ 709 struct nv50_reg *r = NULL; 710 struct nv50_reg *temp; 711 unsigned c; 712 713 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 714 switch (c) { 715 case TGSI_EXTSWIZZLE_X: 716 case TGSI_EXTSWIZZLE_Y: 717 case TGSI_EXTSWIZZLE_Z: 718 case TGSI_EXTSWIZZLE_W: 719 switch (src->SrcRegister.File) { 720 case TGSI_FILE_INPUT: 721 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 722 break; 723 case TGSI_FILE_TEMPORARY: 724 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 725 break; 726 case TGSI_FILE_CONSTANT: 727 r = &pc->param[src->SrcRegister.Index * 4 + c]; 728 break; 729 case TGSI_FILE_IMMEDIATE: 730 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 731 break; 732 default: 733 assert(0); 734 break; 735 } 736 break; 737 case TGSI_EXTSWIZZLE_ZERO: 738 r = alloc_immd(pc, 0.0); 739 break; 740 case TGSI_EXTSWIZZLE_ONE: 741 r = alloc_immd(pc, 1.0); 742 break; 743 default: 744 assert(0); 745 break; 746 } 747 748 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 749 case TGSI_UTIL_SIGN_KEEP: 750 break; 751 case TGSI_UTIL_SIGN_CLEAR: 752 temp = temp_temp(pc); 753 emit_abs(pc, temp, r); 754 r = temp; 755 break; 756 default: 757 assert(0); 758 break; 759 } 760 761 return r; 762} 763 764static boolean 765nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 766{ 767 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 768 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 769 unsigned mask, sat; 770 int i, c; 771 772 NOUVEAU_ERR("insn %p\n", tok); 773 774 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 775 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 776 777 for (c = 0; c < 4; c++) { 778 if (mask & (1 << c)) 779 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 780 else 781 dst[c] = NULL; 782 } 783 784 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 785 for (c = 0; c < 4; c++) 786 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 787 } 788 789 if (sat) { 790 for (c = 0; c < 4; c++) { 791 rdst[c] = dst[c]; 792 dst[c] = temp_temp(pc); 793 } 794 } 795 796 switch (inst->Instruction.Opcode) { 797 case TGSI_OPCODE_ABS: 798 for (c = 0; c < 4; c++) { 799 if (!(mask & (1 << c))) 800 continue; 801 emit_abs(pc, dst[c], src[0][c]); 802 } 803 break; 804 case TGSI_OPCODE_ADD: 805 for (c = 0; c < 4; c++) { 806 if (!(mask & (1 << c))) 807 continue; 808 emit_add(pc, dst[c], src[0][c], src[1][c]); 809 } 810 break; 811 case TGSI_OPCODE_COS: 812 for (c = 0; c < 4; c++) { 813 if (!(mask & (1 << c))) 814 continue; 815 emit_flop(pc, 5, dst[c], src[0][c]); 816 } 817 break; 818 case TGSI_OPCODE_DP3: 819 temp = alloc_temp(pc, NULL); 820 emit_mul(pc, temp, src[0][0], src[1][0]); 821 emit_mad(pc, temp, src[0][1], src[1][1], temp); 822 emit_mad(pc, temp, src[0][2], src[1][2], temp); 823 for (c = 0; c < 4; c++) { 824 if (!(mask & (1 << c))) 825 continue; 826 emit_mov(pc, dst[c], temp); 827 } 828 free_temp(pc, temp); 829 break; 830 case TGSI_OPCODE_DP4: 831 temp = alloc_temp(pc, NULL); 832 emit_mul(pc, temp, src[0][0], src[1][0]); 833 emit_mad(pc, temp, src[0][1], src[1][1], temp); 834 emit_mad(pc, temp, src[0][2], src[1][2], temp); 835 emit_mad(pc, temp, src[0][3], src[1][3], temp); 836 for (c = 0; c < 4; c++) { 837 if (!(mask & (1 << c))) 838 continue; 839 emit_mov(pc, dst[c], temp); 840 } 841 free_temp(pc, temp); 842 break; 843 case TGSI_OPCODE_DPH: 844 temp = alloc_temp(pc, NULL); 845 emit_mul(pc, temp, src[0][0], src[1][0]); 846 emit_mad(pc, temp, src[0][1], src[1][1], temp); 847 emit_mad(pc, temp, src[0][2], src[1][2], temp); 848 emit_add(pc, temp, src[1][3], temp); 849 for (c = 0; c < 4; c++) { 850 if (!(mask & (1 << c))) 851 continue; 852 emit_mov(pc, dst[c], temp); 853 } 854 free_temp(pc, temp); 855 break; 856 case TGSI_OPCODE_DST: 857 { 858 struct nv50_reg *one = alloc_immd(pc, 1.0); 859 if (mask & (1 << 0)) 860 emit_mov(pc, dst[0], one); 861 if (mask & (1 << 1)) 862 emit_mul(pc, dst[1], src[0][1], src[1][1]); 863 if (mask & (1 << 2)) 864 emit_mov(pc, dst[2], src[0][2]); 865 if (mask & (1 << 3)) 866 emit_mov(pc, dst[3], src[1][3]); 867 FREE(one); 868 } 869 break; 870 case TGSI_OPCODE_EX2: 871 temp = alloc_temp(pc, NULL); 872 for (c = 0; c < 4; c++) { 873 if (!(mask & (1 << c))) 874 continue; 875 emit_preex2(pc, temp, src[0][c]); 876 emit_flop(pc, 6, dst[c], temp); 877 } 878 free_temp(pc, temp); 879 break; 880 case TGSI_OPCODE_FLR: 881 for (c = 0; c < 4; c++) { 882 if (!(mask & (1 << c))) 883 continue; 884 emit_flr(pc, dst[c], src[0][c]); 885 } 886 break; 887 case TGSI_OPCODE_FRC: 888 temp = alloc_temp(pc, NULL); 889 for (c = 0; c < 4; c++) { 890 if (!(mask & (1 << c))) 891 continue; 892 emit_flr(pc, temp, src[0][c]); 893 emit_sub(pc, dst[c], src[0][c], temp); 894 } 895 free_temp(pc, temp); 896 break; 897 case TGSI_OPCODE_LIT: 898 /*XXX: writemask */ 899 emit_lit(pc, &dst[0], &src[0][0]); 900 break; 901 case TGSI_OPCODE_LG2: 902 for (c = 0; c < 4; c++) { 903 if (!(mask & (1 << c))) 904 continue; 905 emit_flop(pc, 3, dst[c], src[0][c]); 906 } 907 break; 908 case TGSI_OPCODE_MAD: 909 for (c = 0; c < 4; c++) { 910 if (!(mask & (1 << c))) 911 continue; 912 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 913 } 914 break; 915 case TGSI_OPCODE_MAX: 916 for (c = 0; c < 4; c++) { 917 if (!(mask & (1 << c))) 918 continue; 919 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 920 } 921 break; 922 case TGSI_OPCODE_MIN: 923 for (c = 0; c < 4; c++) { 924 if (!(mask & (1 << c))) 925 continue; 926 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 927 } 928 break; 929 case TGSI_OPCODE_MOV: 930 for (c = 0; c < 4; c++) { 931 if (!(mask & (1 << c))) 932 continue; 933 emit_mov(pc, dst[c], src[0][c]); 934 } 935 break; 936 case TGSI_OPCODE_MUL: 937 for (c = 0; c < 4; c++) { 938 if (!(mask & (1 << c))) 939 continue; 940 emit_mul(pc, dst[c], src[0][c], src[1][c]); 941 } 942 break; 943 case TGSI_OPCODE_POW: 944 temp = alloc_temp(pc, NULL); 945 emit_pow(pc, temp, src[0][0], src[1][0]); 946 for (c = 0; c < 4; c++) { 947 if (!(mask & (1 << c))) 948 continue; 949 emit_mov(pc, dst[c], temp); 950 } 951 free_temp(pc, temp); 952 break; 953 case TGSI_OPCODE_RCP: 954 for (c = 0; c < 4; c++) { 955 if (!(mask & (1 << c))) 956 continue; 957 emit_flop(pc, 0, dst[c], src[0][c]); 958 } 959 break; 960 case TGSI_OPCODE_RSQ: 961 for (c = 0; c < 4; c++) { 962 if (!(mask & (1 << c))) 963 continue; 964 emit_flop(pc, 2, dst[c], src[0][c]); 965 } 966 break; 967 case TGSI_OPCODE_SGE: 968 for (c = 0; c < 4; c++) { 969 if (!(mask & (1 << c))) 970 continue; 971 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 972 } 973 break; 974 case TGSI_OPCODE_SIN: 975 for (c = 0; c < 4; c++) { 976 if (!(mask & (1 << c))) 977 continue; 978 emit_flop(pc, 4, dst[c], src[0][c]); 979 } 980 break; 981 case TGSI_OPCODE_SLT: 982 for (c = 0; c < 4; c++) { 983 if (!(mask & (1 << c))) 984 continue; 985 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 986 } 987 break; 988 case TGSI_OPCODE_SUB: 989 for (c = 0; c < 4; c++) { 990 if (!(mask & (1 << c))) 991 continue; 992 emit_sub(pc, dst[c], src[0][c], src[1][c]); 993 } 994 break; 995 case TGSI_OPCODE_XPD: 996 temp = alloc_temp(pc, NULL); 997 if (mask & (1 << 0)) { 998 emit_mul(pc, temp, src[0][2], src[1][1]); 999 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 1000 } 1001 if (mask & (1 << 1)) { 1002 emit_mul(pc, temp, src[0][0], src[1][2]); 1003 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 1004 } 1005 if (mask & (1 << 2)) { 1006 emit_mul(pc, temp, src[0][1], src[1][0]); 1007 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 1008 } 1009 free_temp(pc, temp); 1010 break; 1011 case TGSI_OPCODE_END: 1012 break; 1013 default: 1014 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 1015 return FALSE; 1016 } 1017 1018 if (sat) { 1019 for (c = 0; c < 4; c++) { 1020 unsigned inst[2] = { 0, 0 }; 1021 1022 if (!(mask & (1 << c))) 1023 continue; 1024 1025 inst[0] = 0xa0000000; /* cvt */ 1026 set_long(pc, inst); 1027 inst[1] |= (6 << 29); /* cvt */ 1028 inst[1] |= 0x04000000; /* 32 bit */ 1029 inst[1] |= (1 << 14); /* src .f32 */ 1030 inst[1] |= ((1 << 5) << 14); /* .sat */ 1031 set_dst(pc, rdst[c], inst); 1032 set_src_0(pc, dst[c], inst); 1033 emit(pc, inst); 1034 } 1035 } 1036 1037 kill_temp_temp(pc); 1038 return TRUE; 1039} 1040 1041static boolean 1042nv50_program_tx_prep(struct nv50_pc *pc) 1043{ 1044 struct tgsi_parse_context p; 1045 boolean ret = FALSE; 1046 unsigned i, c; 1047 1048 tgsi_parse_init(&p, pc->p->pipe.tokens); 1049 while (!tgsi_parse_end_of_tokens(&p)) { 1050 const union tgsi_full_token *tok = &p.FullToken; 1051 1052 tgsi_parse_token(&p); 1053 switch (tok->Token.Type) { 1054 case TGSI_TOKEN_TYPE_IMMEDIATE: 1055 { 1056 const struct tgsi_full_immediate *imm = 1057 &p.FullToken.FullImmediate; 1058 1059 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1060 imm->u.ImmediateFloat32[1].Float, 1061 imm->u.ImmediateFloat32[2].Float, 1062 imm->u.ImmediateFloat32[3].Float); 1063 } 1064 break; 1065 case TGSI_TOKEN_TYPE_DECLARATION: 1066 { 1067 const struct tgsi_full_declaration *d; 1068 unsigned last; 1069 1070 d = &p.FullToken.FullDeclaration; 1071 last = d->u.DeclarationRange.Last; 1072 1073 switch (d->Declaration.File) { 1074 case TGSI_FILE_TEMPORARY: 1075 if (pc->temp_nr < (last + 1)) 1076 pc->temp_nr = last + 1; 1077 break; 1078 case TGSI_FILE_OUTPUT: 1079 if (pc->result_nr < (last + 1)) 1080 pc->result_nr = last + 1; 1081 break; 1082 case TGSI_FILE_INPUT: 1083 if (pc->attr_nr < (last + 1)) 1084 pc->attr_nr = last + 1; 1085 break; 1086 case TGSI_FILE_CONSTANT: 1087 if (pc->param_nr < (last + 1)) 1088 pc->param_nr = last + 1; 1089 break; 1090 default: 1091 NOUVEAU_ERR("bad decl file %d\n", 1092 d->Declaration.File); 1093 goto out_err; 1094 } 1095 } 1096 break; 1097 case TGSI_TOKEN_TYPE_INSTRUCTION: 1098 break; 1099 default: 1100 break; 1101 } 1102 } 1103 1104 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 1105 if (pc->temp_nr) { 1106 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1107 if (!pc->temp) 1108 goto out_err; 1109 1110 for (i = 0; i < pc->temp_nr; i++) { 1111 for (c = 0; c < 4; c++) { 1112 pc->temp[i*4+c].type = P_TEMP; 1113 pc->temp[i*4+c].hw = -1; 1114 pc->temp[i*4+c].index = i; 1115 } 1116 } 1117 } 1118 1119 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1120 if (pc->attr_nr) { 1121 struct nv50_reg *iv = NULL, *tmp = NULL; 1122 int aid = 0; 1123 1124 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1125 if (!pc->attr) 1126 goto out_err; 1127 1128 if (pc->p->type == NV50_PROG_FRAGMENT) { 1129 iv = alloc_temp(pc, NULL); 1130 aid++; 1131 } 1132 1133 for (i = 0; i < pc->attr_nr; i++) { 1134 struct nv50_reg *a = &pc->attr[i*4]; 1135 1136 for (c = 0; c < 4; c++) { 1137 if (pc->p->type == NV50_PROG_FRAGMENT) { 1138 struct nv50_reg *at = 1139 alloc_temp(pc, NULL); 1140 pc->attr[i*4+c].type = at->type; 1141 pc->attr[i*4+c].hw = at->hw; 1142 pc->attr[i*4+c].index = at->index; 1143 } else { 1144 pc->p->cfg.vp.attr[aid/32] |= 1145 (1 << (aid % 32)); 1146 pc->attr[i*4+c].type = P_ATTR; 1147 pc->attr[i*4+c].hw = aid++; 1148 pc->attr[i*4+c].index = i; 1149 } 1150 } 1151 1152 if (pc->p->type != NV50_PROG_FRAGMENT) 1153 continue; 1154 1155 emit_interp(pc, iv, iv, iv, FALSE); 1156 tmp = alloc_temp(pc, NULL); 1157 { 1158 unsigned inst[2] = { 0, 0 }; 1159 inst[0] = 0x90000000; 1160 inst[0] |= (tmp->hw << 2); 1161 emit(pc, inst); 1162 } 1163 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1164 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1165 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1166 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1167 free_temp(pc, tmp); 1168 } 1169 1170 if (iv) 1171 free_temp(pc, iv); 1172 } 1173 1174 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1175 if (pc->result_nr) { 1176 int rid = 0; 1177 1178 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1179 if (!pc->result) 1180 goto out_err; 1181 1182 for (i = 0; i < pc->result_nr; i++) { 1183 for (c = 0; c < 4; c++) { 1184 if (pc->p->type == NV50_PROG_FRAGMENT) 1185 pc->result[i*4+c].type = P_TEMP; 1186 else 1187 pc->result[i*4+c].type = P_RESULT; 1188 pc->result[i*4+c].hw = rid++; 1189 pc->result[i*4+c].index = i; 1190 } 1191 } 1192 } 1193 1194 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1195 if (pc->param_nr) { 1196 int rid = 0; 1197 1198 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1199 if (!pc->param) 1200 goto out_err; 1201 1202 for (i = 0; i < pc->param_nr; i++) { 1203 for (c = 0; c < 4; c++) { 1204 pc->param[i*4+c].type = P_CONST; 1205 pc->param[i*4+c].hw = rid++; 1206 pc->param[i*4+c].index = i; 1207 } 1208 } 1209 } 1210 1211 if (pc->immd_nr) { 1212 int rid = 0; 1213 1214 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1215 if (!pc->immd) 1216 goto out_err; 1217 1218 for (i = 0; i < pc->immd_nr; i++) { 1219 for (c = 0; c < 4; c++) { 1220 pc->immd[i*4+c].type = P_IMMD; 1221 pc->immd[i*4+c].hw = rid++; 1222 pc->immd[i*4+c].index = i; 1223 } 1224 } 1225 } 1226 1227 ret = TRUE; 1228out_err: 1229 tgsi_parse_free(&p); 1230 return ret; 1231} 1232 1233static boolean 1234nv50_program_tx(struct nv50_program *p) 1235{ 1236 struct tgsi_parse_context parse; 1237 struct nv50_pc *pc; 1238 boolean ret; 1239 1240 pc = CALLOC_STRUCT(nv50_pc); 1241 if (!pc) 1242 return FALSE; 1243 pc->p = p; 1244 pc->p->cfg.high_temp = 4; 1245 1246 ret = nv50_program_tx_prep(pc); 1247 if (ret == FALSE) 1248 goto out_cleanup; 1249 1250 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1251 while (!tgsi_parse_end_of_tokens(&parse)) { 1252 const union tgsi_full_token *tok = &parse.FullToken; 1253 1254 tgsi_parse_token(&parse); 1255 1256 switch (tok->Token.Type) { 1257 case TGSI_TOKEN_TYPE_INSTRUCTION: 1258 ret = nv50_program_tx_insn(pc, tok); 1259 if (ret == FALSE) 1260 goto out_err; 1261 break; 1262 default: 1263 break; 1264 } 1265 } 1266 1267 p->immd_nr = pc->immd_nr * 4; 1268 p->immd = pc->immd_buf; 1269 1270out_err: 1271 tgsi_parse_free(&parse); 1272 1273out_cleanup: 1274 return ret; 1275} 1276 1277static void 1278nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1279{ 1280 int i; 1281 1282 if (nv50_program_tx(p) == FALSE) 1283 assert(0); 1284 /* *not* sufficient, it's fine if last inst is long and 1285 * NOT immd - otherwise it's fucked fucked fucked */ 1286 p->insns[p->insns_nr - 1] |= 0x00000001; 1287 1288 if (p->type == NV50_PROG_VERTEX) { 1289 for (i = 0; i < p->insns_nr; i++) 1290 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1291 } else { 1292 for (i = 0; i < p->insns_nr; i++) 1293 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1294 } 1295 1296 p->translated = TRUE; 1297} 1298 1299static void 1300nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1301{ 1302 int i; 1303 1304 for (i = 0; i < p->immd_nr; i++) { 1305 BEGIN_RING(tesla, 0x0f00, 2); 1306 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 1307 OUT_RING (fui(p->immd[i])); 1308 } 1309} 1310 1311static void 1312nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1313{ 1314 struct pipe_winsys *ws = nv50->pipe.winsys; 1315 void *map; 1316 1317 if (!p->buffer) 1318 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1319 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1320 memcpy(map, p->insns, p->insns_nr * 4); 1321 ws->buffer_unmap(ws, p->buffer); 1322} 1323 1324void 1325nv50_vertprog_validate(struct nv50_context *nv50) 1326{ 1327 struct nouveau_grobj *tesla = nv50->screen->tesla; 1328 struct nv50_program *p = nv50->vertprog; 1329 struct nouveau_stateobj *so; 1330 1331 if (!p->translated) { 1332 nv50_program_validate(nv50, p); 1333 if (!p->translated) 1334 assert(0); 1335 } 1336 1337 nv50_program_validate_data(nv50, p); 1338 nv50_program_validate_code(nv50, p); 1339 1340 so = so_new(11, 2); 1341 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1342 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1343 NOUVEAU_BO_HIGH, 0, 0); 1344 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1345 NOUVEAU_BO_LOW, 0, 0); 1346 so_method(so, tesla, 0x1650, 2); 1347 so_data (so, p->cfg.vp.attr[0]); 1348 so_data (so, p->cfg.vp.attr[1]); 1349 so_method(so, tesla, 0x16ac, 2); 1350 so_data (so, 8); 1351 so_data (so, p->cfg.high_temp); 1352 so_method(so, tesla, 0x140c, 1); 1353 so_data (so, 0); /* program start offset */ 1354 so_emit(nv50->screen->nvws, so); 1355 so_ref(NULL, &so); 1356} 1357 1358void 1359nv50_fragprog_validate(struct nv50_context *nv50) 1360{ 1361 struct nouveau_grobj *tesla = nv50->screen->tesla; 1362 struct nv50_program *p = nv50->fragprog; 1363 struct nouveau_stateobj *so; 1364 1365 if (!p->translated) { 1366 nv50_program_validate(nv50, p); 1367 if (!p->translated) 1368 assert(0); 1369 } 1370 1371 nv50_program_validate_data(nv50, p); 1372 nv50_program_validate_code(nv50, p); 1373 1374 so = so_new(7, 2); 1375 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1376 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1377 NOUVEAU_BO_HIGH, 0, 0); 1378 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1379 NOUVEAU_BO_LOW, 0, 0); 1380 so_method(so, tesla, 0x198c, 1); 1381 so_data (so, p->cfg.high_temp); 1382 so_method(so, tesla, 0x1414, 1); 1383 so_data (so, 0); /* program start offset */ 1384 so_emit(nv50->screen->nvws, so); 1385 so_ref(NULL, &so); 1386} 1387 1388void 1389nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1390{ 1391 struct pipe_winsys *ws = nv50->pipe.winsys; 1392 1393 if (p->insns_nr) { 1394 if (p->insns) 1395 FREE(p->insns); 1396 p->insns_nr = 0; 1397 } 1398 1399 if (p->buffer) 1400 pipe_buffer_reference(ws, &p->buffer, NULL); 1401 1402 p->translated = 0; 1403} 1404 1405