nv50_program.c revision 619549a6377a58d54c9cf55f8863beed56b09566
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11 12#define NV50_SU_MAX_TEMP 64 13#define NV50_PROGRAM_DUMP 14 15/* ARL - gallium craps itself on progs/vp/arl.txt 16 * 17 * MSB - Like MAD, but MUL+SUB 18 * - Fuck it off, introduce a way to negate args for ops that 19 * support it. 20 * 21 * Look into inlining IMMD for ops other than MOV (make it general?) 22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 24 * 25 * Verify half-insns work where expected - and force disable them where they 26 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 27 * 28 * FUCK! watch dst==src vectors, can overwrite components that are needed. 29 * ie. SUB R0, R0.yzxw, R0 30 * 31 * MOV dst, -src 32 * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0) 33 * mov dst, tmp 34 * 35 * Things to check with renouveau: 36 * FP attr/result assignment - how? 37 * attrib 38 * - 0x16bc maps vp output onto fp hpos 39 * - 0x16c0 maps vp output onto fp col0 40 * result 41 * - colr always 0-3 42 * - depr always 4 43 * 0x16bc->0x16e8 --> some binding between vp/fp regs 44 * 0x16b8 --> VP output count 45 * 46 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 47 * "MOV rcol.x, fcol.y" = 0x00000004 48 * 0x19a8 --> as above but 0x00000100 and 0x00000000 49 * - 0x00100000 used when KIL used 50 * 0x196c --> as above but 0x00000011 and 0x00000000 51 * 52 * 0x1988 --> 0xXXNNNNNN 53 * - XX == FP high something 54 */ 55struct nv50_reg { 56 enum { 57 P_TEMP, 58 P_ATTR, 59 P_RESULT, 60 P_CONST, 61 P_IMMD 62 } type; 63 int index; 64 65 int hw; 66 int neg; 67}; 68 69struct nv50_pc { 70 struct nv50_program *p; 71 72 /* hw resources */ 73 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 74 75 /* tgsi resources */ 76 struct nv50_reg *temp; 77 int temp_nr; 78 struct nv50_reg *attr; 79 int attr_nr; 80 struct nv50_reg *result; 81 int result_nr; 82 struct nv50_reg *param; 83 int param_nr; 84 struct nv50_reg *immd; 85 float *immd_buf; 86 int immd_nr; 87 88 struct nv50_reg *temp_temp[16]; 89 unsigned temp_temp_nr; 90}; 91 92static void 93alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 94{ 95 int i; 96 97 if (reg->type != P_TEMP) 98 return; 99 100 if (reg->hw >= 0) { 101 /*XXX: do this here too to catch FP temp-as-attr usage.. 102 * not clean, but works */ 103 if (pc->p->cfg.high_temp < (reg->hw + 1)) 104 pc->p->cfg.high_temp = reg->hw + 1; 105 return; 106 } 107 108 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 109 if (!(pc->r_temp[i])) { 110 pc->r_temp[i] = reg; 111 reg->hw = i; 112 if (pc->p->cfg.high_temp < (i + 1)) 113 pc->p->cfg.high_temp = i + 1; 114 return; 115 } 116 } 117 118 assert(0); 119} 120 121static struct nv50_reg * 122alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 123{ 124 struct nv50_reg *r; 125 int i; 126 127 if (dst && dst->type == P_TEMP && dst->hw == -1) 128 return dst; 129 130 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 131 if (!pc->r_temp[i]) { 132 r = CALLOC_STRUCT(nv50_reg); 133 r->type = P_TEMP; 134 r->index = -1; 135 r->hw = i; 136 pc->r_temp[i] = r; 137 return r; 138 } 139 } 140 141 assert(0); 142 return NULL; 143} 144 145static void 146free_temp(struct nv50_pc *pc, struct nv50_reg *r) 147{ 148 if (r->index == -1) { 149 unsigned hw = r->hw; 150 151 FREE(pc->r_temp[hw]); 152 pc->r_temp[hw] = NULL; 153 } 154} 155 156static struct nv50_reg * 157temp_temp(struct nv50_pc *pc) 158{ 159 if (pc->temp_temp_nr >= 16) 160 assert(0); 161 162 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 163 return pc->temp_temp[pc->temp_temp_nr++]; 164} 165 166static void 167kill_temp_temp(struct nv50_pc *pc) 168{ 169 int i; 170 171 for (i = 0; i < pc->temp_temp_nr; i++) 172 free_temp(pc, pc->temp_temp[i]); 173 pc->temp_temp_nr = 0; 174} 175 176static int 177ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 178{ 179 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 180 sizeof(float)); 181 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 182 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 183 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 184 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 185 186 return pc->immd_nr++; 187} 188 189static struct nv50_reg * 190alloc_immd(struct nv50_pc *pc, float f) 191{ 192 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 193 unsigned hw; 194 195 hw = ctor_immd(pc, f, 0, 0, 0) * 4; 196 r->type = P_IMMD; 197 r->hw = hw; 198 r->index = -1; 199 return r; 200} 201 202static struct nv50_program_exec * 203exec(struct nv50_pc *pc) 204{ 205 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); 206 207 e->param.index = -1; 208 return e; 209} 210 211static void 212emit(struct nv50_pc *pc, struct nv50_program_exec *e) 213{ 214 struct nv50_program *p = pc->p; 215 216 if (p->exec_tail) 217 p->exec_tail->next = e; 218 if (!p->exec_head) 219 p->exec_head = e; 220 p->exec_tail = e; 221 p->exec_size += (e->inst[0] & 1) ? 2 : 1; 222} 223 224static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); 225 226static boolean 227is_long(struct nv50_program_exec *e) 228{ 229 if (e->inst[0] & 1) 230 return TRUE; 231 return FALSE; 232} 233 234static boolean 235is_immd(struct nv50_program_exec *e) 236{ 237 if (is_long(e) && (e->inst[1] & 3) == 3) 238 return TRUE; 239 return FALSE; 240} 241 242static INLINE void 243set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, 244 struct nv50_program_exec *e) 245{ 246 set_long(pc, e); 247 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 248 e->inst[1] |= (pred << 7) | (idx << 12); 249} 250 251static INLINE void 252set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, 253 struct nv50_program_exec *e) 254{ 255 set_long(pc, e); 256 e->inst[1] &= ~((0x3 << 4) | (1 << 6)); 257 e->inst[1] |= (idx << 4) | (on << 6); 258} 259 260static INLINE void 261set_long(struct nv50_pc *pc, struct nv50_program_exec *e) 262{ 263 if (is_long(e)) 264 return; 265 266 e->inst[0] |= 1; 267 set_pred(pc, 0xf, 0, e); 268 set_pred_wr(pc, 0, 0, e); 269} 270 271static INLINE void 272set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) 273{ 274 if (dst->type == P_RESULT) { 275 set_long(pc, e); 276 e->inst[1] |= 0x00000008; 277 } 278 279 alloc_reg(pc, dst); 280 e->inst[0] |= (dst->hw << 2); 281} 282 283static INLINE void 284set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) 285{ 286 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 287 288 set_long(pc, e); 289 /*XXX: can't be predicated - bits overlap.. catch cases where both 290 * are required and avoid them. */ 291 set_pred(pc, 0, 0, e); 292 set_pred_wr(pc, 0, 0, e); 293 294 e->inst[1] |= 0x00000002 | 0x00000001; 295 e->inst[0] |= (val & 0x3f) << 16; 296 e->inst[1] |= (val >> 6) << 2; 297} 298 299static void 300emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 301 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 302{ 303 struct nv50_program_exec *e = exec(pc); 304 305 e->inst[0] |= 0x80000000; 306 set_dst(pc, dst, e); 307 alloc_reg(pc, iv); 308 e->inst[0] |= (iv->hw << 9); 309 alloc_reg(pc, src); 310 e->inst[0] |= (src->hw << 16); 311 if (noperspective) 312 e->inst[0] |= (1 << 25); 313 314 emit(pc, e); 315} 316 317static void 318set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, 319 struct nv50_program_exec *e) 320{ 321 set_long(pc, e); 322#if 1 323 e->inst[1] |= (1 << 22); 324#else 325 if (src->type == P_IMMD) { 326 e->inst[1] |= (NV50_CB_PMISC << 22); 327 } else { 328 if (pc->p->type == PIPE_SHADER_VERTEX) 329 e->inst[1] |= (NV50_CB_PVP << 22); 330 else 331 e->inst[1] |= (NV50_CB_PFP << 22); 332 } 333#endif 334 335 e->param.index = src->hw; 336 e->param.shift = s; 337 e->param.mask = m << (s % 32); 338} 339 340static void 341emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 342{ 343 struct nv50_program_exec *e = exec(pc); 344 345 e->inst[0] |= 0x10000000; 346 347 set_dst(pc, dst, e); 348 349 if (dst->type != P_RESULT && src->type == P_IMMD) { 350 set_immd(pc, src, e); 351 /*XXX: 32-bit, but steals part of "half" reg space - need to 352 * catch and handle this case if/when we do half-regs 353 */ 354 e->inst[0] |= 0x00008000; 355 } else 356 if (src->type == P_IMMD || src->type == P_CONST) { 357 set_long(pc, e); 358 set_data(pc, src, 0x7f, 9, e); 359 e->inst[1] |= 0x20000000; /* src0 const? */ 360 } else { 361 if (src->type == P_ATTR) { 362 set_long(pc, e); 363 e->inst[1] |= 0x00200000; 364 } 365 366 alloc_reg(pc, src); 367 e->inst[0] |= (src->hw << 9); 368 } 369 370 /* We really should support "half" instructions here at some point, 371 * but I don't feel confident enough about them yet. 372 */ 373 set_long(pc, e); 374 if (is_long(e) && !is_immd(e)) { 375 e->inst[1] |= 0x04000000; /* 32-bit */ 376 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 377 } 378 379 emit(pc, e); 380} 381 382static boolean 383check_swap_src_0_1(struct nv50_pc *pc, 384 struct nv50_reg **s0, struct nv50_reg **s1) 385{ 386 struct nv50_reg *src0 = *s0, *src1 = *s1; 387 388 if (src0->type == P_CONST) { 389 if (src1->type != P_CONST) { 390 *s0 = src1; 391 *s1 = src0; 392 return TRUE; 393 } 394 } else 395 if (src1->type == P_ATTR) { 396 if (src0->type != P_ATTR) { 397 *s0 = src1; 398 *s1 = src0; 399 return TRUE; 400 } 401 } 402 403 return FALSE; 404} 405 406static void 407set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 408{ 409 if (src->type == P_ATTR) { 410 set_long(pc, e); 411 e->inst[1] |= 0x00200000; 412 } else 413 if (src->type == P_CONST || src->type == P_IMMD) { 414 struct nv50_reg *temp = temp_temp(pc); 415 416 emit_mov(pc, temp, src); 417 src = temp; 418 } 419 420 alloc_reg(pc, src); 421 e->inst[0] |= (src->hw << 9); 422} 423 424static void 425set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 426{ 427 if (src->type == P_ATTR) { 428 struct nv50_reg *temp = temp_temp(pc); 429 430 emit_mov(pc, temp, src); 431 src = temp; 432 } else 433 if (src->type == P_CONST || src->type == P_IMMD) { 434 assert(!(e->inst[0] & 0x00800000)); 435 if (e->inst[0] & 0x01000000) { 436 struct nv50_reg *temp = temp_temp(pc); 437 438 emit_mov(pc, temp, src); 439 src = temp; 440 } else { 441 set_data(pc, src, 0x7f, 16, e); 442 e->inst[0] |= 0x00800000; 443 } 444 } 445 446 alloc_reg(pc, src); 447 e->inst[0] |= (src->hw << 16); 448} 449 450static void 451set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 452{ 453 set_long(pc, e); 454 455 if (src->type == P_ATTR) { 456 struct nv50_reg *temp = temp_temp(pc); 457 458 emit_mov(pc, temp, src); 459 src = temp; 460 } else 461 if (src->type == P_CONST || src->type == P_IMMD) { 462 assert(!(e->inst[0] & 0x01000000)); 463 if (e->inst[0] & 0x00800000) { 464 struct nv50_reg *temp = temp_temp(pc); 465 466 emit_mov(pc, temp, src); 467 src = temp; 468 } else { 469 set_data(pc, src, 0x7f, 32+14, e); 470 e->inst[0] |= 0x01000000; 471 } 472 } 473 474 alloc_reg(pc, src); 475 e->inst[1] |= (src->hw << 14); 476} 477 478static void 479emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 480 struct nv50_reg *src1) 481{ 482 struct nv50_program_exec *e = exec(pc); 483 484 e->inst[0] |= 0xc0000000; 485 set_long(pc, e); 486 487 check_swap_src_0_1(pc, &src0, &src1); 488 set_dst(pc, dst, e); 489 set_src_0(pc, src0, e); 490 set_src_1(pc, src1, e); 491 492 emit(pc, e); 493} 494 495static void 496emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 497 struct nv50_reg *src0, struct nv50_reg *src1) 498{ 499 struct nv50_program_exec *e = exec(pc); 500 501 e->inst[0] |= 0xb0000000; 502 503 check_swap_src_0_1(pc, &src0, &src1); 504 set_dst(pc, dst, e); 505 set_src_0(pc, src0, e); 506 if (is_long(e)) 507 set_src_2(pc, src1, e); 508 else 509 set_src_1(pc, src1, e); 510 511 emit(pc, e); 512} 513 514static void 515emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 516 struct nv50_reg *src0, struct nv50_reg *src1) 517{ 518 struct nv50_program_exec *e = exec(pc); 519 520 set_long(pc, e); 521 e->inst[0] |= 0xb0000000; 522 e->inst[1] |= (sub << 29); 523 524 check_swap_src_0_1(pc, &src0, &src1); 525 set_dst(pc, dst, e); 526 set_src_0(pc, src0, e); 527 set_src_1(pc, src1, e); 528 529 emit(pc, e); 530} 531 532static void 533emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 534 struct nv50_reg *src1) 535{ 536 struct nv50_program_exec *e = exec(pc); 537 538 e->inst[0] |= 0xb0000000; 539 540 set_long(pc, e); 541 if (check_swap_src_0_1(pc, &src0, &src1)) 542 e->inst[1] |= 0x04000000; 543 else 544 e->inst[1] |= 0x08000000; 545 546 set_dst(pc, dst, e); 547 set_src_0(pc, src0, e); 548 set_src_2(pc, src1, e); 549 550 emit(pc, e); 551} 552 553static void 554emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 555 struct nv50_reg *src1, struct nv50_reg *src2) 556{ 557 struct nv50_program_exec *e = exec(pc); 558 559 e->inst[0] |= 0xe0000000; 560 561 check_swap_src_0_1(pc, &src0, &src1); 562 set_dst(pc, dst, e); 563 set_src_0(pc, src0, e); 564 set_src_1(pc, src1, e); 565 set_src_2(pc, src2, e); 566 567 emit(pc, e); 568} 569 570static void 571emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 572 struct nv50_reg *src1, struct nv50_reg *src2) 573{ 574 struct nv50_program_exec *e = exec(pc); 575 576 e->inst[0] |= 0xe0000000; 577 set_long(pc, e); 578 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 579 580 check_swap_src_0_1(pc, &src0, &src1); 581 set_dst(pc, dst, e); 582 set_src_0(pc, src0, e); 583 set_src_1(pc, src1, e); 584 set_src_2(pc, src2, e); 585 586 emit(pc, e); 587} 588 589static void 590emit_flop(struct nv50_pc *pc, unsigned sub, 591 struct nv50_reg *dst, struct nv50_reg *src) 592{ 593 struct nv50_program_exec *e = exec(pc); 594 595 e->inst[0] |= 0x90000000; 596 if (sub) { 597 set_long(pc, e); 598 e->inst[1] |= (sub << 29); 599 } 600 601 set_dst(pc, dst, e); 602 set_src_0(pc, src, e); 603 604 emit(pc, e); 605} 606 607static void 608emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 609{ 610 struct nv50_program_exec *e = exec(pc); 611 612 e->inst[0] |= 0xb0000000; 613 614 set_dst(pc, dst, e); 615 set_src_0(pc, src, e); 616 set_long(pc, e); 617 e->inst[1] |= (6 << 29) | 0x00004000; 618 619 emit(pc, e); 620} 621 622static void 623emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 624{ 625 struct nv50_program_exec *e = exec(pc); 626 627 e->inst[0] |= 0xb0000000; 628 629 set_dst(pc, dst, e); 630 set_src_0(pc, src, e); 631 set_long(pc, e); 632 e->inst[1] |= (6 << 29); 633 634 emit(pc, e); 635} 636 637static void 638emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 639 struct nv50_reg *src0, struct nv50_reg *src1) 640{ 641 struct nv50_program_exec *e = exec(pc); 642 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; 643 struct nv50_reg *rdst; 644 645 assert(c_op <= 7); 646 if (check_swap_src_0_1(pc, &src0, &src1)) 647 c_op = inv_cop[c_op]; 648 649 rdst = dst; 650 if (dst->type != P_TEMP) 651 dst = alloc_temp(pc, NULL); 652 653 /* set.u32 */ 654 set_long(pc, e); 655 e->inst[0] |= 0xb0000000; 656 e->inst[1] |= (3 << 29); 657 e->inst[1] |= (c_op << 14); 658 /*XXX: breaks things, .u32 by default? 659 * decuda will disasm as .u16 and use .lo/.hi regs, but this 660 * doesn't seem to match what the hw actually does. 661 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 662 */ 663 set_dst(pc, dst, e); 664 set_src_0(pc, src0, e); 665 set_src_1(pc, src1, e); 666 emit(pc, e); 667 668 /* cvt.f32.u32 */ 669 e = exec(pc); 670 e->inst[0] = 0xa0000001; 671 e->inst[1] = 0x64014780; 672 set_dst(pc, rdst, e); 673 set_src_0(pc, dst, e); 674 emit(pc, e); 675 676 if (dst != rdst) 677 free_temp(pc, dst); 678} 679 680static void 681emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 682{ 683 struct nv50_program_exec *e = exec(pc); 684 685 e->inst[0] = 0xa0000000; /* cvt */ 686 set_long(pc, e); 687 e->inst[1] |= (6 << 29); /* cvt */ 688 e->inst[1] |= 0x08000000; /* integer mode */ 689 e->inst[1] |= 0x04000000; /* 32 bit */ 690 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 691 e->inst[1] |= (1 << 14); /* src .f32 */ 692 set_dst(pc, dst, e); 693 set_src_0(pc, src, e); 694 695 emit(pc, e); 696} 697 698static void 699emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 700 struct nv50_reg *v, struct nv50_reg *e) 701{ 702 struct nv50_reg *temp = alloc_temp(pc, NULL); 703 704 emit_flop(pc, 3, temp, v); 705 emit_mul(pc, temp, temp, e); 706 emit_preex2(pc, temp, temp); 707 emit_flop(pc, 6, dst, temp); 708 709 free_temp(pc, temp); 710} 711 712static void 713emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 714{ 715 struct nv50_program_exec *e = exec(pc); 716 717 e->inst[0] = 0xa0000000; /* cvt */ 718 set_long(pc, e); 719 e->inst[1] |= (6 << 29); /* cvt */ 720 e->inst[1] |= 0x04000000; /* 32 bit */ 721 e->inst[1] |= (1 << 14); /* src .f32 */ 722 e->inst[1] |= ((1 << 6) << 14); /* .abs */ 723 set_dst(pc, dst, e); 724 set_src_0(pc, src, e); 725 726 emit(pc, e); 727} 728 729static void 730emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, 731 struct nv50_reg **src) 732{ 733 struct nv50_reg *one = alloc_immd(pc, 1.0); 734 struct nv50_reg *zero = alloc_immd(pc, 0.0); 735 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 736 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 737 struct nv50_reg *tmp[4]; 738 739 if (mask & (1 << 0)) 740 emit_mov(pc, dst[0], one); 741 742 if (mask & (1 << 3)) 743 emit_mov(pc, dst[3], one); 744 745 if (mask & (3 << 1)) { 746 if (mask & (1 << 1)) 747 tmp[0] = dst[1]; 748 else 749 tmp[0] = temp_temp(pc); 750 emit_minmax(pc, 4, tmp[0], src[0], zero); 751 } 752 753 if (mask & (1 << 2)) { 754 set_pred_wr(pc, 1, 0, pc->p->exec_tail); 755 756 tmp[1] = temp_temp(pc); 757 emit_minmax(pc, 4, tmp[1], src[1], zero); 758 759 tmp[3] = temp_temp(pc); 760 emit_minmax(pc, 4, tmp[3], src[3], neg128); 761 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 762 763 emit_pow(pc, dst[2], tmp[1], tmp[3]); 764 emit_mov(pc, dst[2], zero); 765 set_pred(pc, 3, 0, pc->p->exec_tail); 766 } 767} 768 769static void 770emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 771{ 772 struct nv50_program_exec *e = exec(pc); 773 774 set_long(pc, e); 775 e->inst[0] |= 0xa0000000; /* delta */ 776 e->inst[1] |= (7 << 29); /* delta */ 777 e->inst[1] |= 0x04000000; /* negate arg0? probably not */ 778 e->inst[1] |= (1 << 14); /* src .f32 */ 779 set_dst(pc, dst, e); 780 set_src_0(pc, src, e); 781 782 emit(pc, e); 783} 784 785static struct nv50_reg * 786tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 787{ 788 switch (dst->DstRegister.File) { 789 case TGSI_FILE_TEMPORARY: 790 return &pc->temp[dst->DstRegister.Index * 4 + c]; 791 case TGSI_FILE_OUTPUT: 792 return &pc->result[dst->DstRegister.Index * 4 + c]; 793 case TGSI_FILE_NULL: 794 return NULL; 795 default: 796 break; 797 } 798 799 return NULL; 800} 801 802static struct nv50_reg * 803tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 804{ 805 struct nv50_reg *r = NULL; 806 struct nv50_reg *temp; 807 unsigned c; 808 809 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 810 switch (c) { 811 case TGSI_EXTSWIZZLE_X: 812 case TGSI_EXTSWIZZLE_Y: 813 case TGSI_EXTSWIZZLE_Z: 814 case TGSI_EXTSWIZZLE_W: 815 switch (src->SrcRegister.File) { 816 case TGSI_FILE_INPUT: 817 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 818 break; 819 case TGSI_FILE_TEMPORARY: 820 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 821 break; 822 case TGSI_FILE_CONSTANT: 823 r = &pc->param[src->SrcRegister.Index * 4 + c]; 824 break; 825 case TGSI_FILE_IMMEDIATE: 826 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 827 break; 828 default: 829 assert(0); 830 break; 831 } 832 break; 833 case TGSI_EXTSWIZZLE_ZERO: 834 r = alloc_immd(pc, 0.0); 835 break; 836 case TGSI_EXTSWIZZLE_ONE: 837 r = alloc_immd(pc, 1.0); 838 break; 839 default: 840 assert(0); 841 break; 842 } 843 844 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 845 case TGSI_UTIL_SIGN_KEEP: 846 break; 847 case TGSI_UTIL_SIGN_CLEAR: 848 temp = temp_temp(pc); 849 emit_abs(pc, temp, r); 850 r = temp; 851 break; 852 case TGSI_UTIL_SIGN_TOGGLE: 853 temp = temp_temp(pc); 854 emit_neg(pc, temp, r); 855 r = temp; 856 break; 857 case TGSI_UTIL_SIGN_SET: 858 temp = temp_temp(pc); 859 emit_abs(pc, temp, r); 860 emit_neg(pc, temp, r); 861 r = temp; 862 break; 863 default: 864 assert(0); 865 break; 866 } 867 868 return r; 869} 870 871static boolean 872nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 873{ 874 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 875 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 876 unsigned mask, sat; 877 int i, c; 878 879 NOUVEAU_ERR("insn %p\n", tok); 880 881 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 882 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 883 884 for (c = 0; c < 4; c++) { 885 if (mask & (1 << c)) 886 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 887 else 888 dst[c] = NULL; 889 } 890 891 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 892 for (c = 0; c < 4; c++) 893 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 894 } 895 896 if (sat) { 897 for (c = 0; c < 4; c++) { 898 rdst[c] = dst[c]; 899 dst[c] = temp_temp(pc); 900 } 901 } 902 903 switch (inst->Instruction.Opcode) { 904 case TGSI_OPCODE_ABS: 905 for (c = 0; c < 4; c++) { 906 if (!(mask & (1 << c))) 907 continue; 908 emit_abs(pc, dst[c], src[0][c]); 909 } 910 break; 911 case TGSI_OPCODE_ADD: 912 for (c = 0; c < 4; c++) { 913 if (!(mask & (1 << c))) 914 continue; 915 emit_add(pc, dst[c], src[0][c], src[1][c]); 916 } 917 break; 918 case TGSI_OPCODE_COS: 919 temp = alloc_temp(pc, NULL); 920 emit_precossin(pc, temp, src[0][0]); 921 emit_flop(pc, 5, temp, temp); 922 for (c = 0; c < 4; c++) { 923 if (!(mask & (1 << c))) 924 continue; 925 emit_mov(pc, dst[c], temp); 926 } 927 break; 928 case TGSI_OPCODE_DP3: 929 temp = alloc_temp(pc, NULL); 930 emit_mul(pc, temp, src[0][0], src[1][0]); 931 emit_mad(pc, temp, src[0][1], src[1][1], temp); 932 emit_mad(pc, temp, src[0][2], src[1][2], temp); 933 for (c = 0; c < 4; c++) { 934 if (!(mask & (1 << c))) 935 continue; 936 emit_mov(pc, dst[c], temp); 937 } 938 free_temp(pc, temp); 939 break; 940 case TGSI_OPCODE_DP4: 941 temp = alloc_temp(pc, NULL); 942 emit_mul(pc, temp, src[0][0], src[1][0]); 943 emit_mad(pc, temp, src[0][1], src[1][1], temp); 944 emit_mad(pc, temp, src[0][2], src[1][2], temp); 945 emit_mad(pc, temp, src[0][3], src[1][3], temp); 946 for (c = 0; c < 4; c++) { 947 if (!(mask & (1 << c))) 948 continue; 949 emit_mov(pc, dst[c], temp); 950 } 951 free_temp(pc, temp); 952 break; 953 case TGSI_OPCODE_DPH: 954 temp = alloc_temp(pc, NULL); 955 emit_mul(pc, temp, src[0][0], src[1][0]); 956 emit_mad(pc, temp, src[0][1], src[1][1], temp); 957 emit_mad(pc, temp, src[0][2], src[1][2], temp); 958 emit_add(pc, temp, src[1][3], temp); 959 for (c = 0; c < 4; c++) { 960 if (!(mask & (1 << c))) 961 continue; 962 emit_mov(pc, dst[c], temp); 963 } 964 free_temp(pc, temp); 965 break; 966 case TGSI_OPCODE_DST: 967 { 968 struct nv50_reg *one = alloc_immd(pc, 1.0); 969 if (mask & (1 << 0)) 970 emit_mov(pc, dst[0], one); 971 if (mask & (1 << 1)) 972 emit_mul(pc, dst[1], src[0][1], src[1][1]); 973 if (mask & (1 << 2)) 974 emit_mov(pc, dst[2], src[0][2]); 975 if (mask & (1 << 3)) 976 emit_mov(pc, dst[3], src[1][3]); 977 FREE(one); 978 } 979 break; 980 case TGSI_OPCODE_EX2: 981 temp = alloc_temp(pc, NULL); 982 emit_preex2(pc, temp, src[0][0]); 983 emit_flop(pc, 6, temp, temp); 984 for (c = 0; c < 4; c++) { 985 if (!(mask & (1 << c))) 986 continue; 987 emit_mov(pc, dst[c], temp); 988 } 989 free_temp(pc, temp); 990 break; 991 case TGSI_OPCODE_FLR: 992 for (c = 0; c < 4; c++) { 993 if (!(mask & (1 << c))) 994 continue; 995 emit_flr(pc, dst[c], src[0][c]); 996 } 997 break; 998 case TGSI_OPCODE_FRC: 999 temp = alloc_temp(pc, NULL); 1000 for (c = 0; c < 4; c++) { 1001 if (!(mask & (1 << c))) 1002 continue; 1003 emit_flr(pc, temp, src[0][c]); 1004 emit_sub(pc, dst[c], src[0][c], temp); 1005 } 1006 free_temp(pc, temp); 1007 break; 1008 case TGSI_OPCODE_LIT: 1009 emit_lit(pc, &dst[0], mask, &src[0][0]); 1010 break; 1011 case TGSI_OPCODE_LG2: 1012 temp = alloc_temp(pc, NULL); 1013 emit_flop(pc, 3, temp, src[0][0]); 1014 for (c = 0; c < 4; c++) { 1015 if (!(mask & (1 << c))) 1016 continue; 1017 emit_mov(pc, dst[c], temp); 1018 } 1019 break; 1020 case TGSI_OPCODE_LRP: 1021 for (c = 0; c < 4; c++) { 1022 if (!(mask & (1 << c))) 1023 continue; 1024 /*XXX: we can do better than this */ 1025 temp = alloc_temp(pc, NULL); 1026 emit_neg(pc, temp, src[0][c]); 1027 emit_mad(pc, temp, temp, src[2][c], src[2][c]); 1028 emit_mad(pc, dst[c], src[0][c], src[1][c], temp); 1029 free_temp(pc, temp); 1030 } 1031 break; 1032 case TGSI_OPCODE_MAD: 1033 for (c = 0; c < 4; c++) { 1034 if (!(mask & (1 << c))) 1035 continue; 1036 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 1037 } 1038 break; 1039 case TGSI_OPCODE_MAX: 1040 for (c = 0; c < 4; c++) { 1041 if (!(mask & (1 << c))) 1042 continue; 1043 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 1044 } 1045 break; 1046 case TGSI_OPCODE_MIN: 1047 for (c = 0; c < 4; c++) { 1048 if (!(mask & (1 << c))) 1049 continue; 1050 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 1051 } 1052 break; 1053 case TGSI_OPCODE_MOV: 1054 for (c = 0; c < 4; c++) { 1055 if (!(mask & (1 << c))) 1056 continue; 1057 emit_mov(pc, dst[c], src[0][c]); 1058 } 1059 break; 1060 case TGSI_OPCODE_MUL: 1061 for (c = 0; c < 4; c++) { 1062 if (!(mask & (1 << c))) 1063 continue; 1064 emit_mul(pc, dst[c], src[0][c], src[1][c]); 1065 } 1066 break; 1067 case TGSI_OPCODE_POW: 1068 temp = alloc_temp(pc, NULL); 1069 emit_pow(pc, temp, src[0][0], src[1][0]); 1070 for (c = 0; c < 4; c++) { 1071 if (!(mask & (1 << c))) 1072 continue; 1073 emit_mov(pc, dst[c], temp); 1074 } 1075 free_temp(pc, temp); 1076 break; 1077 case TGSI_OPCODE_RCP: 1078 for (c = 0; c < 4; c++) { 1079 if (!(mask & (1 << c))) 1080 continue; 1081 emit_flop(pc, 0, dst[c], src[0][0]); 1082 } 1083 break; 1084 case TGSI_OPCODE_RSQ: 1085 for (c = 0; c < 4; c++) { 1086 if (!(mask & (1 << c))) 1087 continue; 1088 emit_flop(pc, 2, dst[c], src[0][0]); 1089 } 1090 break; 1091 case TGSI_OPCODE_SCS: 1092 temp = alloc_temp(pc, NULL); 1093 emit_precossin(pc, temp, src[0][0]); 1094 if (mask & (1 << 0)) 1095 emit_flop(pc, 5, dst[0], temp); 1096 if (mask & (1 << 1)) 1097 emit_flop(pc, 4, dst[1], temp); 1098 break; 1099 case TGSI_OPCODE_SGE: 1100 for (c = 0; c < 4; c++) { 1101 if (!(mask & (1 << c))) 1102 continue; 1103 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 1104 } 1105 break; 1106 case TGSI_OPCODE_SIN: 1107 temp = alloc_temp(pc, NULL); 1108 emit_precossin(pc, temp, src[0][0]); 1109 emit_flop(pc, 4, temp, temp); 1110 for (c = 0; c < 4; c++) { 1111 if (!(mask & (1 << c))) 1112 continue; 1113 emit_mov(pc, dst[c], temp); 1114 } 1115 break; 1116 case TGSI_OPCODE_SLT: 1117 for (c = 0; c < 4; c++) { 1118 if (!(mask & (1 << c))) 1119 continue; 1120 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 1121 } 1122 break; 1123 case TGSI_OPCODE_SUB: 1124 for (c = 0; c < 4; c++) { 1125 if (!(mask & (1 << c))) 1126 continue; 1127 emit_sub(pc, dst[c], src[0][c], src[1][c]); 1128 } 1129 break; 1130 case TGSI_OPCODE_XPD: 1131 temp = alloc_temp(pc, NULL); 1132 if (mask & (1 << 0)) { 1133 emit_mul(pc, temp, src[0][2], src[1][1]); 1134 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 1135 } 1136 if (mask & (1 << 1)) { 1137 emit_mul(pc, temp, src[0][0], src[1][2]); 1138 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 1139 } 1140 if (mask & (1 << 2)) { 1141 emit_mul(pc, temp, src[0][1], src[1][0]); 1142 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 1143 } 1144 free_temp(pc, temp); 1145 break; 1146 case TGSI_OPCODE_END: 1147 break; 1148 default: 1149 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 1150 return FALSE; 1151 } 1152 1153 if (sat) { 1154 for (c = 0; c < 4; c++) { 1155 struct nv50_program_exec *e; 1156 1157 if (!(mask & (1 << c))) 1158 continue; 1159 e = exec(pc); 1160 1161 e->inst[0] = 0xa0000000; /* cvt */ 1162 set_long(pc, e); 1163 e->inst[1] |= (6 << 29); /* cvt */ 1164 e->inst[1] |= 0x04000000; /* 32 bit */ 1165 e->inst[1] |= (1 << 14); /* src .f32 */ 1166 e->inst[1] |= ((1 << 5) << 14); /* .sat */ 1167 set_dst(pc, rdst[c], e); 1168 set_src_0(pc, dst[c], e); 1169 emit(pc, e); 1170 } 1171 } 1172 1173 kill_temp_temp(pc); 1174 return TRUE; 1175} 1176 1177static boolean 1178nv50_program_tx_prep(struct nv50_pc *pc) 1179{ 1180 struct tgsi_parse_context p; 1181 boolean ret = FALSE; 1182 unsigned i, c; 1183 1184 tgsi_parse_init(&p, pc->p->pipe.tokens); 1185 while (!tgsi_parse_end_of_tokens(&p)) { 1186 const union tgsi_full_token *tok = &p.FullToken; 1187 1188 tgsi_parse_token(&p); 1189 switch (tok->Token.Type) { 1190 case TGSI_TOKEN_TYPE_IMMEDIATE: 1191 { 1192 const struct tgsi_full_immediate *imm = 1193 &p.FullToken.FullImmediate; 1194 1195 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1196 imm->u.ImmediateFloat32[1].Float, 1197 imm->u.ImmediateFloat32[2].Float, 1198 imm->u.ImmediateFloat32[3].Float); 1199 } 1200 break; 1201 case TGSI_TOKEN_TYPE_DECLARATION: 1202 { 1203 const struct tgsi_full_declaration *d; 1204 unsigned last; 1205 1206 d = &p.FullToken.FullDeclaration; 1207 last = d->u.DeclarationRange.Last; 1208 1209 switch (d->Declaration.File) { 1210 case TGSI_FILE_TEMPORARY: 1211 if (pc->temp_nr < (last + 1)) 1212 pc->temp_nr = last + 1; 1213 break; 1214 case TGSI_FILE_OUTPUT: 1215 if (pc->result_nr < (last + 1)) 1216 pc->result_nr = last + 1; 1217 break; 1218 case TGSI_FILE_INPUT: 1219 if (pc->attr_nr < (last + 1)) 1220 pc->attr_nr = last + 1; 1221 break; 1222 case TGSI_FILE_CONSTANT: 1223 if (pc->param_nr < (last + 1)) 1224 pc->param_nr = last + 1; 1225 break; 1226 default: 1227 NOUVEAU_ERR("bad decl file %d\n", 1228 d->Declaration.File); 1229 goto out_err; 1230 } 1231 } 1232 break; 1233 case TGSI_TOKEN_TYPE_INSTRUCTION: 1234 break; 1235 default: 1236 break; 1237 } 1238 } 1239 1240 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 1241 if (pc->temp_nr) { 1242 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1243 if (!pc->temp) 1244 goto out_err; 1245 1246 for (i = 0; i < pc->temp_nr; i++) { 1247 for (c = 0; c < 4; c++) { 1248 pc->temp[i*4+c].type = P_TEMP; 1249 pc->temp[i*4+c].hw = -1; 1250 pc->temp[i*4+c].index = i; 1251 } 1252 } 1253 } 1254 1255 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1256 if (pc->attr_nr) { 1257 struct nv50_reg *iv = NULL, *tmp = NULL; 1258 int aid = 0; 1259 1260 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1261 if (!pc->attr) 1262 goto out_err; 1263 1264 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1265 iv = alloc_temp(pc, NULL); 1266 aid++; 1267 } 1268 1269 for (i = 0; i < pc->attr_nr; i++) { 1270 struct nv50_reg *a = &pc->attr[i*4]; 1271 1272 for (c = 0; c < 4; c++) { 1273 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1274 struct nv50_reg *at = 1275 alloc_temp(pc, NULL); 1276 pc->attr[i*4+c].type = at->type; 1277 pc->attr[i*4+c].hw = at->hw; 1278 pc->attr[i*4+c].index = at->index; 1279 } else { 1280 pc->p->cfg.vp.attr[aid/32] |= 1281 (1 << (aid % 32)); 1282 pc->attr[i*4+c].type = P_ATTR; 1283 pc->attr[i*4+c].hw = aid++; 1284 pc->attr[i*4+c].index = i; 1285 } 1286 } 1287 1288 if (pc->p->type != PIPE_SHADER_FRAGMENT) 1289 continue; 1290 1291 emit_interp(pc, iv, iv, iv, FALSE); 1292 tmp = alloc_temp(pc, NULL); 1293 emit_flop(pc, 0, tmp, iv); 1294 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1295 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1296 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1297 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1298 free_temp(pc, tmp); 1299 } 1300 1301 if (iv) 1302 free_temp(pc, iv); 1303 } 1304 1305 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1306 if (pc->result_nr) { 1307 int rid = 0; 1308 1309 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1310 if (!pc->result) 1311 goto out_err; 1312 1313 for (i = 0; i < pc->result_nr; i++) { 1314 for (c = 0; c < 4; c++) { 1315 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1316 pc->result[i*4+c].type = P_TEMP; 1317 pc->result[i*4+c].hw = -1; 1318 } else { 1319 pc->result[i*4+c].type = P_RESULT; 1320 pc->result[i*4+c].hw = rid++; 1321 } 1322 pc->result[i*4+c].index = i; 1323 } 1324 } 1325 } 1326 1327 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1328 if (pc->param_nr) { 1329 int rid = 0; 1330 1331 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1332 if (!pc->param) 1333 goto out_err; 1334 1335 for (i = 0; i < pc->param_nr; i++) { 1336 for (c = 0; c < 4; c++) { 1337 pc->param[i*4+c].type = P_CONST; 1338 pc->param[i*4+c].hw = rid++; 1339 pc->param[i*4+c].index = i; 1340 } 1341 } 1342 } 1343 1344 if (pc->immd_nr) { 1345 int rid = pc->param_nr * 4; 1346 1347 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1348 if (!pc->immd) 1349 goto out_err; 1350 1351 for (i = 0; i < pc->immd_nr; i++) { 1352 for (c = 0; c < 4; c++) { 1353 pc->immd[i*4+c].type = P_IMMD; 1354 pc->immd[i*4+c].hw = rid++; 1355 pc->immd[i*4+c].index = i; 1356 } 1357 } 1358 } 1359 1360 ret = TRUE; 1361out_err: 1362 tgsi_parse_free(&p); 1363 return ret; 1364} 1365 1366static boolean 1367nv50_program_tx(struct nv50_program *p) 1368{ 1369 struct tgsi_parse_context parse; 1370 struct nv50_pc *pc; 1371 boolean ret; 1372 1373 pc = CALLOC_STRUCT(nv50_pc); 1374 if (!pc) 1375 return FALSE; 1376 pc->p = p; 1377 pc->p->cfg.high_temp = 4; 1378 1379 ret = nv50_program_tx_prep(pc); 1380 if (ret == FALSE) 1381 goto out_cleanup; 1382 1383 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1384 while (!tgsi_parse_end_of_tokens(&parse)) { 1385 const union tgsi_full_token *tok = &parse.FullToken; 1386 1387 tgsi_parse_token(&parse); 1388 1389 switch (tok->Token.Type) { 1390 case TGSI_TOKEN_TYPE_INSTRUCTION: 1391 ret = nv50_program_tx_insn(pc, tok); 1392 if (ret == FALSE) 1393 goto out_err; 1394 break; 1395 default: 1396 break; 1397 } 1398 } 1399 1400 if (p->type == PIPE_SHADER_FRAGMENT) { 1401 struct nv50_reg out; 1402 1403 out.type = P_TEMP; 1404 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) 1405 emit_mov(pc, &out, &pc->result[out.hw]); 1406 } 1407 1408 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); 1409 pc->p->exec_tail->inst[1] |= 0x00000001; 1410 1411 p->param_nr = pc->param_nr * 4; 1412 p->immd_nr = pc->immd_nr * 4; 1413 p->immd = pc->immd_buf; 1414 1415out_err: 1416 tgsi_parse_free(&parse); 1417 1418out_cleanup: 1419 return ret; 1420} 1421 1422static void 1423nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1424{ 1425 if (nv50_program_tx(p) == FALSE) 1426 assert(0); 1427 p->translated = TRUE; 1428} 1429 1430static void 1431nv50_program_upload_data(struct nv50_context *nv50, float *map, 1432 unsigned start, unsigned count) 1433{ 1434 while (count) { 1435 unsigned nr = count > 2047 ? 2047 : count; 1436 1437 BEGIN_RING(tesla, 0x00000f00, 1); 1438 OUT_RING ((NV50_CB_PMISC << 0) | (start << 8)); 1439 BEGIN_RING(tesla, 0x40000f04, nr); 1440 OUT_RINGp (map, nr); 1441 1442 map += nr; 1443 start += nr; 1444 count -= nr; 1445 } 1446} 1447 1448static void 1449nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1450{ 1451 struct nouveau_winsys *nvws = nv50->screen->nvws; 1452 struct pipe_winsys *ws = nv50->pipe.winsys; 1453 unsigned nr = p->param_nr + p->immd_nr; 1454 1455 if (!p->data && nr) { 1456 struct nouveau_resource *heap = nv50->screen->vp_data_heap; 1457 1458 if (nvws->res_alloc(heap, nr, p, &p->data)) { 1459 while (heap->next && heap->size < nr) { 1460 struct nv50_program *evict = heap->next->priv; 1461 nvws->res_free(&evict->data); 1462 } 1463 1464 if (nvws->res_alloc(heap, nr, p, &p->data)) 1465 assert(0); 1466 } 1467 } 1468 1469 if (p->param_nr) { 1470 float *map = ws->buffer_map(ws, nv50->constbuf[p->type], 1471 PIPE_BUFFER_USAGE_CPU_READ); 1472 nv50_program_upload_data(nv50, map, p->data->start, 1473 p->param_nr); 1474 ws->buffer_unmap(ws, nv50->constbuf[p->type]); 1475 } 1476 1477 if (p->immd_nr) { 1478 nv50_program_upload_data(nv50, p->immd, 1479 p->data->start + p->param_nr, 1480 p->immd_nr); 1481 } 1482} 1483 1484static void 1485nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1486{ 1487 struct pipe_winsys *ws = nv50->pipe.winsys; 1488 struct nv50_program_exec *e; 1489 boolean upload = FALSE; 1490 unsigned *map; 1491 1492 if (!p->buffer) { 1493 p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); 1494 upload = TRUE; 1495 } 1496 1497 if (p->data && p->data->start != p->data_start) { 1498 for (e = p->exec_head; e; e = e->next) { 1499 unsigned ei, ci; 1500 1501 if (e->param.index < 0) 1502 continue; 1503 ei = e->param.shift >> 5; 1504 ci = e->param.index + p->data->start; 1505 1506 e->inst[ei] &= ~e->param.mask; 1507 e->inst[ei] |= (ci << e->param.shift); 1508 } 1509 1510 p->data_start = p->data->start; 1511 upload = TRUE; 1512 } 1513 1514 if (!upload) 1515 return FALSE; 1516 1517 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1518 for (e = p->exec_head; e; e = e->next) { 1519#ifdef NV50_PROGRAM_DUMP 1520 NOUVEAU_ERR("0x%08x\n", e->inst[0]); 1521#endif 1522 *(map++) = e->inst[0]; 1523 if (is_long(e)) { 1524#ifdef NV50_PROGRAM_DUMP 1525 NOUVEAU_ERR("0x%08x\n", e->inst[1]); 1526#endif 1527 *(map++) = e->inst[1]; 1528 } 1529 } 1530 ws->buffer_unmap(ws, p->buffer); 1531} 1532 1533void 1534nv50_vertprog_validate(struct nv50_context *nv50) 1535{ 1536 struct nouveau_grobj *tesla = nv50->screen->tesla; 1537 struct nv50_program *p = nv50->vertprog; 1538 struct nouveau_stateobj *so; 1539 1540 if (!p->translated) { 1541 nv50_program_validate(nv50, p); 1542 if (!p->translated) 1543 assert(0); 1544 } 1545 1546 nv50_program_validate_data(nv50, p); 1547 nv50_program_validate_code(nv50, p); 1548 1549 so = so_new(11, 2); 1550 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1551 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1552 NOUVEAU_BO_HIGH, 0, 0); 1553 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1554 NOUVEAU_BO_LOW, 0, 0); 1555 so_method(so, tesla, 0x1650, 2); 1556 so_data (so, p->cfg.vp.attr[0]); 1557 so_data (so, p->cfg.vp.attr[1]); 1558 so_method(so, tesla, 0x16ac, 2); 1559 so_data (so, 8); 1560 so_data (so, p->cfg.high_temp); 1561 so_method(so, tesla, 0x140c, 1); 1562 so_data (so, 0); /* program start offset */ 1563 so_emit(nv50->screen->nvws, so); 1564 so_ref(NULL, &so); 1565} 1566 1567void 1568nv50_fragprog_validate(struct nv50_context *nv50) 1569{ 1570 struct nouveau_grobj *tesla = nv50->screen->tesla; 1571 struct nv50_program *p = nv50->fragprog; 1572 struct nouveau_stateobj *so; 1573 1574 if (!p->translated) { 1575 nv50_program_validate(nv50, p); 1576 if (!p->translated) 1577 assert(0); 1578 } 1579 1580 nv50_program_validate_data(nv50, p); 1581 nv50_program_validate_code(nv50, p); 1582 1583 so = so_new(64, 2); 1584 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1585 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1586 NOUVEAU_BO_HIGH, 0, 0); 1587 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1588 NOUVEAU_BO_LOW, 0, 0); 1589 so_method(so, tesla, 0x1904, 4); 1590 so_data (so, 0x01040404); /* p: 0x01000404 */ 1591 so_data (so, 0x00000004); 1592 so_data (so, 0x00000000); 1593 so_data (so, 0x00000000); 1594 so_method(so, tesla, 0x16bc, 2); /*XXX: fixme */ 1595 so_data (so, 0x03020100); 1596 so_data (so, 0x07060504); 1597 so_method(so, tesla, 0x1988, 2); 1598 so_data (so, 0x08040404); /* p: 0x0f000401 */ 1599 so_data (so, p->cfg.high_temp); 1600 so_method(so, tesla, 0x1414, 1); 1601 so_data (so, 0); /* program start offset */ 1602 so_emit(nv50->screen->nvws, so); 1603 so_ref(NULL, &so); 1604} 1605 1606void 1607nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1608{ 1609 struct pipe_winsys *ws = nv50->pipe.winsys; 1610 1611 while (p->exec_head) { 1612 struct nv50_program_exec *e = p->exec_head; 1613 1614 p->exec_head = e->next; 1615 FREE(e); 1616 } 1617 p->exec_tail = NULL; 1618 p->exec_size = 0; 1619 1620 if (p->buffer) 1621 pipe_buffer_reference(ws, &p->buffer, NULL); 1622 1623 p->translated = 0; 1624} 1625 1626