nv50_program.c revision f579a99cc608eaba6f617c11ab0aec7f3e9ef953
1/* 2 * Copyright 2008 Ben Skeggs 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23#include "pipe/p_context.h" 24#include "pipe/p_defines.h" 25#include "pipe/p_state.h" 26#include "pipe/p_inlines.h" 27 28#include "pipe/p_shader_tokens.h" 29#include "tgsi/tgsi_parse.h" 30#include "tgsi/tgsi_util.h" 31 32#include "nv50_context.h" 33 34#define NV50_SU_MAX_TEMP 64 35//#define NV50_PROGRAM_DUMP 36 37/* ARL - gallium craps itself on progs/vp/arl.txt 38 * 39 * MSB - Like MAD, but MUL+SUB 40 * - Fuck it off, introduce a way to negate args for ops that 41 * support it. 42 * 43 * Look into inlining IMMD for ops other than MOV (make it general?) 44 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 45 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 46 * 47 * In ops such as ADD it's possible to construct a bad opcode in the !is_long() 48 * case, if the emit_src() causes the inst to suddenly become long. 49 * 50 * Verify half-insns work where expected - and force disable them where they 51 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 52 * 53 * FUCK! watch dst==src vectors, can overwrite components that are needed. 54 * ie. SUB R0, R0.yzxw, R0 55 * 56 * Things to check with renouveau: 57 * FP attr/result assignment - how? 58 * attrib 59 * - 0x16bc maps vp output onto fp hpos 60 * - 0x16c0 maps vp output onto fp col0 61 * result 62 * - colr always 0-3 63 * - depr always 4 64 * 0x16bc->0x16e8 --> some binding between vp/fp regs 65 * 0x16b8 --> VP output count 66 * 67 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 68 * "MOV rcol.x, fcol.y" = 0x00000004 69 * 0x19a8 --> as above but 0x00000100 and 0x00000000 70 * - 0x00100000 used when KIL used 71 * 0x196c --> as above but 0x00000011 and 0x00000000 72 * 73 * 0x1988 --> 0xXXNNNNNN 74 * - XX == FP high something 75 */ 76struct nv50_reg { 77 enum { 78 P_TEMP, 79 P_ATTR, 80 P_RESULT, 81 P_CONST, 82 P_IMMD 83 } type; 84 int index; 85 86 int hw; 87 int neg; 88 89 int rhw; /* result hw for FP outputs, or interpolant index */ 90 int acc; /* instruction where this reg is last read (first insn == 1) */ 91}; 92 93struct nv50_pc { 94 struct nv50_program *p; 95 96 /* hw resources */ 97 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 98 99 /* tgsi resources */ 100 struct nv50_reg *temp; 101 int temp_nr; 102 struct nv50_reg *attr; 103 int attr_nr; 104 struct nv50_reg *result; 105 int result_nr; 106 struct nv50_reg *param; 107 int param_nr; 108 struct nv50_reg *immd; 109 float *immd_buf; 110 int immd_nr; 111 112 struct nv50_reg *temp_temp[16]; 113 unsigned temp_temp_nr; 114 115 unsigned interp_mode[32]; 116 /* perspective interpolation registers */ 117 struct nv50_reg *iv_p; 118 struct nv50_reg *iv_c; 119 120 /* current instruction and total number of insns */ 121 unsigned insn_cur; 122 unsigned insn_nr; 123}; 124 125static void 126alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 127{ 128 int i = 0; 129 130 if (reg->type == P_RESULT) { 131 if (pc->p->cfg.high_result < (reg->hw + 1)) 132 pc->p->cfg.high_result = reg->hw + 1; 133 } 134 135 if (reg->type != P_TEMP) 136 return; 137 138 if (reg->hw >= 0) { 139 /*XXX: do this here too to catch FP temp-as-attr usage.. 140 * not clean, but works */ 141 if (pc->p->cfg.high_temp < (reg->hw + 1)) 142 pc->p->cfg.high_temp = reg->hw + 1; 143 return; 144 } 145 146 if (reg->rhw != -1) { 147 /* try to allocate temporary with index rhw first */ 148 if (!(pc->r_temp[reg->rhw])) { 149 pc->r_temp[reg->rhw] = reg; 150 reg->hw = reg->rhw; 151 if (pc->p->cfg.high_temp < (reg->rhw + 1)) 152 pc->p->cfg.high_temp = reg->rhw + 1; 153 return; 154 } 155 /* make sure we don't get things like $r0 needs to go 156 * in $r1 and $r1 in $r0 157 */ 158 i = pc->result_nr * 4; 159 } 160 161 for (; i < NV50_SU_MAX_TEMP; i++) { 162 if (!(pc->r_temp[i])) { 163 pc->r_temp[i] = reg; 164 reg->hw = i; 165 if (pc->p->cfg.high_temp < (i + 1)) 166 pc->p->cfg.high_temp = i + 1; 167 return; 168 } 169 } 170 171 assert(0); 172} 173 174static struct nv50_reg * 175alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 176{ 177 struct nv50_reg *r; 178 int i; 179 180 if (dst && dst->type == P_TEMP && dst->hw == -1) 181 return dst; 182 183 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 184 if (!pc->r_temp[i]) { 185 r = CALLOC_STRUCT(nv50_reg); 186 r->type = P_TEMP; 187 r->index = -1; 188 r->hw = i; 189 r->rhw = -1; 190 pc->r_temp[i] = r; 191 return r; 192 } 193 } 194 195 assert(0); 196 return NULL; 197} 198 199static void 200free_temp(struct nv50_pc *pc, struct nv50_reg *r) 201{ 202 if (r->index == -1) { 203 unsigned hw = r->hw; 204 205 FREE(pc->r_temp[hw]); 206 pc->r_temp[hw] = NULL; 207 } 208} 209 210static int 211alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) 212{ 213 int i; 214 215 if ((idx + 4) >= NV50_SU_MAX_TEMP) 216 return 1; 217 218 if (pc->r_temp[idx] || pc->r_temp[idx + 1] || 219 pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) 220 return alloc_temp4(pc, dst, idx + 1); 221 222 for (i = 0; i < 4; i++) { 223 dst[i] = CALLOC_STRUCT(nv50_reg); 224 dst[i]->type = P_TEMP; 225 dst[i]->index = -1; 226 dst[i]->hw = idx + i; 227 pc->r_temp[idx + i] = dst[i]; 228 } 229 230 return 0; 231} 232 233static void 234free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) 235{ 236 int i; 237 238 for (i = 0; i < 4; i++) 239 free_temp(pc, reg[i]); 240} 241 242static struct nv50_reg * 243temp_temp(struct nv50_pc *pc) 244{ 245 if (pc->temp_temp_nr >= 16) 246 assert(0); 247 248 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 249 return pc->temp_temp[pc->temp_temp_nr++]; 250} 251 252static void 253kill_temp_temp(struct nv50_pc *pc) 254{ 255 int i; 256 257 for (i = 0; i < pc->temp_temp_nr; i++) 258 free_temp(pc, pc->temp_temp[i]); 259 pc->temp_temp_nr = 0; 260} 261 262static int 263ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 264{ 265 pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), 266 (pc->immd_nr + 1) * 4 * sizeof(float)); 267 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 268 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 269 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 270 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 271 272 return pc->immd_nr++; 273} 274 275static struct nv50_reg * 276alloc_immd(struct nv50_pc *pc, float f) 277{ 278 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 279 unsigned hw; 280 281 for (hw = 0; hw < pc->immd_nr * 4; hw++) 282 if (pc->immd_buf[hw] == f) 283 break; 284 285 if (hw == pc->immd_nr * 4) 286 hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4; 287 288 r->type = P_IMMD; 289 r->hw = hw; 290 r->index = -1; 291 return r; 292} 293 294static struct nv50_program_exec * 295exec(struct nv50_pc *pc) 296{ 297 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); 298 299 e->param.index = -1; 300 return e; 301} 302 303static void 304emit(struct nv50_pc *pc, struct nv50_program_exec *e) 305{ 306 struct nv50_program *p = pc->p; 307 308 if (p->exec_tail) 309 p->exec_tail->next = e; 310 if (!p->exec_head) 311 p->exec_head = e; 312 p->exec_tail = e; 313 p->exec_size += (e->inst[0] & 1) ? 2 : 1; 314} 315 316static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); 317 318static boolean 319is_long(struct nv50_program_exec *e) 320{ 321 if (e->inst[0] & 1) 322 return TRUE; 323 return FALSE; 324} 325 326static boolean 327is_immd(struct nv50_program_exec *e) 328{ 329 if (is_long(e) && (e->inst[1] & 3) == 3) 330 return TRUE; 331 return FALSE; 332} 333 334static INLINE void 335set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, 336 struct nv50_program_exec *e) 337{ 338 set_long(pc, e); 339 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 340 e->inst[1] |= (pred << 7) | (idx << 12); 341} 342 343static INLINE void 344set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, 345 struct nv50_program_exec *e) 346{ 347 set_long(pc, e); 348 e->inst[1] &= ~((0x3 << 4) | (1 << 6)); 349 e->inst[1] |= (idx << 4) | (on << 6); 350} 351 352static INLINE void 353set_long(struct nv50_pc *pc, struct nv50_program_exec *e) 354{ 355 if (is_long(e)) 356 return; 357 358 e->inst[0] |= 1; 359 set_pred(pc, 0xf, 0, e); 360 set_pred_wr(pc, 0, 0, e); 361} 362 363static INLINE void 364set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) 365{ 366 if (dst->type == P_RESULT) { 367 set_long(pc, e); 368 e->inst[1] |= 0x00000008; 369 } 370 371 alloc_reg(pc, dst); 372 e->inst[0] |= (dst->hw << 2); 373} 374 375static INLINE void 376set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) 377{ 378 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 379 380 set_long(pc, e); 381 /*XXX: can't be predicated - bits overlap.. catch cases where both 382 * are required and avoid them. */ 383 set_pred(pc, 0, 0, e); 384 set_pred_wr(pc, 0, 0, e); 385 386 e->inst[1] |= 0x00000002 | 0x00000001; 387 e->inst[0] |= (val & 0x3f) << 16; 388 e->inst[1] |= (val >> 6) << 2; 389} 390 391 392#define INTERP_LINEAR 0 393#define INTERP_FLAT 1 394#define INTERP_PERSPECTIVE 2 395#define INTERP_CENTROID 4 396 397/* interpolant index has been stored in dst->rhw */ 398static void 399emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv, 400 unsigned mode) 401{ 402 assert(dst->rhw != -1); 403 struct nv50_program_exec *e = exec(pc); 404 405 e->inst[0] |= 0x80000000; 406 set_dst(pc, dst, e); 407 e->inst[0] |= (dst->rhw << 16); 408 409 if (mode & INTERP_FLAT) { 410 e->inst[0] |= (1 << 8); 411 } else { 412 if (mode & INTERP_PERSPECTIVE) { 413 e->inst[0] |= (1 << 25); 414 alloc_reg(pc, iv); 415 e->inst[0] |= (iv->hw << 9); 416 } 417 418 if (mode & INTERP_CENTROID) 419 e->inst[0] |= (1 << 24); 420 } 421 422 emit(pc, e); 423} 424 425static void 426set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, 427 struct nv50_program_exec *e) 428{ 429 set_long(pc, e); 430#if 1 431 e->inst[1] |= (1 << 22); 432#else 433 if (src->type == P_IMMD) { 434 e->inst[1] |= (NV50_CB_PMISC << 22); 435 } else { 436 if (pc->p->type == PIPE_SHADER_VERTEX) 437 e->inst[1] |= (NV50_CB_PVP << 22); 438 else 439 e->inst[1] |= (NV50_CB_PFP << 22); 440 } 441#endif 442 443 e->param.index = src->hw; 444 e->param.shift = s; 445 e->param.mask = m << (s % 32); 446} 447 448static void 449emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 450{ 451 struct nv50_program_exec *e = exec(pc); 452 453 e->inst[0] |= 0x10000000; 454 455 set_dst(pc, dst, e); 456 457 if (0 && dst->type != P_RESULT && src->type == P_IMMD) { 458 set_immd(pc, src, e); 459 /*XXX: 32-bit, but steals part of "half" reg space - need to 460 * catch and handle this case if/when we do half-regs 461 */ 462 e->inst[0] |= 0x00008000; 463 } else 464 if (src->type == P_IMMD || src->type == P_CONST) { 465 set_long(pc, e); 466 set_data(pc, src, 0x7f, 9, e); 467 e->inst[1] |= 0x20000000; /* src0 const? */ 468 } else { 469 if (src->type == P_ATTR) { 470 set_long(pc, e); 471 e->inst[1] |= 0x00200000; 472 } 473 474 alloc_reg(pc, src); 475 e->inst[0] |= (src->hw << 9); 476 } 477 478 /* We really should support "half" instructions here at some point, 479 * but I don't feel confident enough about them yet. 480 */ 481 set_long(pc, e); 482 if (is_long(e) && !is_immd(e)) { 483 e->inst[1] |= 0x04000000; /* 32-bit */ 484 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 485 } 486 487 emit(pc, e); 488} 489 490static INLINE void 491emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) 492{ 493 struct nv50_reg *imm = alloc_immd(pc, f); 494 emit_mov(pc, dst, imm); 495 FREE(imm); 496} 497 498static boolean 499check_swap_src_0_1(struct nv50_pc *pc, 500 struct nv50_reg **s0, struct nv50_reg **s1) 501{ 502 struct nv50_reg *src0 = *s0, *src1 = *s1; 503 504 if (src0->type == P_CONST) { 505 if (src1->type != P_CONST) { 506 *s0 = src1; 507 *s1 = src0; 508 return TRUE; 509 } 510 } else 511 if (src1->type == P_ATTR) { 512 if (src0->type != P_ATTR) { 513 *s0 = src1; 514 *s1 = src0; 515 return TRUE; 516 } 517 } 518 519 return FALSE; 520} 521 522static void 523set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 524{ 525 if (src->type == P_ATTR) { 526 set_long(pc, e); 527 e->inst[1] |= 0x00200000; 528 } else 529 if (src->type == P_CONST || src->type == P_IMMD) { 530 struct nv50_reg *temp = temp_temp(pc); 531 532 emit_mov(pc, temp, src); 533 src = temp; 534 } 535 536 alloc_reg(pc, src); 537 e->inst[0] |= (src->hw << 9); 538} 539 540static void 541set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 542{ 543 if (src->type == P_ATTR) { 544 struct nv50_reg *temp = temp_temp(pc); 545 546 emit_mov(pc, temp, src); 547 src = temp; 548 } else 549 if (src->type == P_CONST || src->type == P_IMMD) { 550 assert(!(e->inst[0] & 0x00800000)); 551 if (e->inst[0] & 0x01000000) { 552 struct nv50_reg *temp = temp_temp(pc); 553 554 emit_mov(pc, temp, src); 555 src = temp; 556 } else { 557 set_data(pc, src, 0x7f, 16, e); 558 e->inst[0] |= 0x00800000; 559 } 560 } 561 562 alloc_reg(pc, src); 563 e->inst[0] |= (src->hw << 16); 564} 565 566static void 567set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 568{ 569 set_long(pc, e); 570 571 if (src->type == P_ATTR) { 572 struct nv50_reg *temp = temp_temp(pc); 573 574 emit_mov(pc, temp, src); 575 src = temp; 576 } else 577 if (src->type == P_CONST || src->type == P_IMMD) { 578 assert(!(e->inst[0] & 0x01000000)); 579 if (e->inst[0] & 0x00800000) { 580 struct nv50_reg *temp = temp_temp(pc); 581 582 emit_mov(pc, temp, src); 583 src = temp; 584 } else { 585 set_data(pc, src, 0x7f, 32+14, e); 586 e->inst[0] |= 0x01000000; 587 } 588 } 589 590 alloc_reg(pc, src); 591 e->inst[1] |= (src->hw << 14); 592} 593 594static void 595emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 596 struct nv50_reg *src1) 597{ 598 struct nv50_program_exec *e = exec(pc); 599 600 e->inst[0] |= 0xc0000000; 601 set_long(pc, e); 602 603 check_swap_src_0_1(pc, &src0, &src1); 604 set_dst(pc, dst, e); 605 set_src_0(pc, src0, e); 606 set_src_1(pc, src1, e); 607 608 emit(pc, e); 609} 610 611static void 612emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 613 struct nv50_reg *src0, struct nv50_reg *src1) 614{ 615 struct nv50_program_exec *e = exec(pc); 616 617 e->inst[0] |= 0xb0000000; 618 619 check_swap_src_0_1(pc, &src0, &src1); 620 set_dst(pc, dst, e); 621 set_src_0(pc, src0, e); 622 if (is_long(e)) 623 set_src_2(pc, src1, e); 624 else 625 set_src_1(pc, src1, e); 626 627 emit(pc, e); 628} 629 630static void 631emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 632 struct nv50_reg *src0, struct nv50_reg *src1) 633{ 634 struct nv50_program_exec *e = exec(pc); 635 636 set_long(pc, e); 637 e->inst[0] |= 0xb0000000; 638 e->inst[1] |= (sub << 29); 639 640 check_swap_src_0_1(pc, &src0, &src1); 641 set_dst(pc, dst, e); 642 set_src_0(pc, src0, e); 643 set_src_1(pc, src1, e); 644 645 emit(pc, e); 646} 647 648static void 649emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 650 struct nv50_reg *src1) 651{ 652 struct nv50_program_exec *e = exec(pc); 653 654 e->inst[0] |= 0xb0000000; 655 656 set_long(pc, e); 657 if (check_swap_src_0_1(pc, &src0, &src1)) 658 e->inst[1] |= 0x04000000; 659 else 660 e->inst[1] |= 0x08000000; 661 662 set_dst(pc, dst, e); 663 set_src_0(pc, src0, e); 664 set_src_2(pc, src1, e); 665 666 emit(pc, e); 667} 668 669static void 670emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 671 struct nv50_reg *src1, struct nv50_reg *src2) 672{ 673 struct nv50_program_exec *e = exec(pc); 674 675 e->inst[0] |= 0xe0000000; 676 677 check_swap_src_0_1(pc, &src0, &src1); 678 set_dst(pc, dst, e); 679 set_src_0(pc, src0, e); 680 set_src_1(pc, src1, e); 681 set_src_2(pc, src2, e); 682 683 emit(pc, e); 684} 685 686static void 687emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 688 struct nv50_reg *src1, struct nv50_reg *src2) 689{ 690 struct nv50_program_exec *e = exec(pc); 691 692 e->inst[0] |= 0xe0000000; 693 set_long(pc, e); 694 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 695 696 check_swap_src_0_1(pc, &src0, &src1); 697 set_dst(pc, dst, e); 698 set_src_0(pc, src0, e); 699 set_src_1(pc, src1, e); 700 set_src_2(pc, src2, e); 701 702 emit(pc, e); 703} 704 705static void 706emit_flop(struct nv50_pc *pc, unsigned sub, 707 struct nv50_reg *dst, struct nv50_reg *src) 708{ 709 struct nv50_program_exec *e = exec(pc); 710 711 e->inst[0] |= 0x90000000; 712 if (sub) { 713 set_long(pc, e); 714 e->inst[1] |= (sub << 29); 715 } 716 717 set_dst(pc, dst, e); 718 set_src_0(pc, src, e); 719 720 emit(pc, e); 721} 722 723static void 724emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 725{ 726 struct nv50_program_exec *e = exec(pc); 727 728 e->inst[0] |= 0xb0000000; 729 730 set_dst(pc, dst, e); 731 set_src_0(pc, src, e); 732 set_long(pc, e); 733 e->inst[1] |= (6 << 29) | 0x00004000; 734 735 emit(pc, e); 736} 737 738static void 739emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 740{ 741 struct nv50_program_exec *e = exec(pc); 742 743 e->inst[0] |= 0xb0000000; 744 745 set_dst(pc, dst, e); 746 set_src_0(pc, src, e); 747 set_long(pc, e); 748 e->inst[1] |= (6 << 29); 749 750 emit(pc, e); 751} 752 753static void 754emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 755 struct nv50_reg *src0, struct nv50_reg *src1) 756{ 757 struct nv50_program_exec *e = exec(pc); 758 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; 759 struct nv50_reg *rdst; 760 761 assert(c_op <= 7); 762 if (check_swap_src_0_1(pc, &src0, &src1)) 763 c_op = inv_cop[c_op]; 764 765 rdst = dst; 766 if (dst->type != P_TEMP) 767 dst = alloc_temp(pc, NULL); 768 769 /* set.u32 */ 770 set_long(pc, e); 771 e->inst[0] |= 0xb0000000; 772 e->inst[1] |= (3 << 29); 773 e->inst[1] |= (c_op << 14); 774 /*XXX: breaks things, .u32 by default? 775 * decuda will disasm as .u16 and use .lo/.hi regs, but this 776 * doesn't seem to match what the hw actually does. 777 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 778 */ 779 set_dst(pc, dst, e); 780 set_src_0(pc, src0, e); 781 set_src_1(pc, src1, e); 782 emit(pc, e); 783 784 /* cvt.f32.u32 */ 785 e = exec(pc); 786 e->inst[0] = 0xa0000001; 787 e->inst[1] = 0x64014780; 788 set_dst(pc, rdst, e); 789 set_src_0(pc, dst, e); 790 emit(pc, e); 791 792 if (dst != rdst) 793 free_temp(pc, dst); 794} 795 796static void 797emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 798{ 799 struct nv50_program_exec *e = exec(pc); 800 801 e->inst[0] = 0xa0000000; /* cvt */ 802 set_long(pc, e); 803 e->inst[1] |= (6 << 29); /* cvt */ 804 e->inst[1] |= 0x08000000; /* integer mode */ 805 e->inst[1] |= 0x04000000; /* 32 bit */ 806 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 807 e->inst[1] |= (1 << 14); /* src .f32 */ 808 set_dst(pc, dst, e); 809 set_src_0(pc, src, e); 810 811 emit(pc, e); 812} 813 814static void 815emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 816 struct nv50_reg *v, struct nv50_reg *e) 817{ 818 struct nv50_reg *temp = alloc_temp(pc, NULL); 819 820 emit_flop(pc, 3, temp, v); 821 emit_mul(pc, temp, temp, e); 822 emit_preex2(pc, temp, temp); 823 emit_flop(pc, 6, dst, temp); 824 825 free_temp(pc, temp); 826} 827 828static void 829emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 830{ 831 struct nv50_program_exec *e = exec(pc); 832 833 e->inst[0] = 0xa0000000; /* cvt */ 834 set_long(pc, e); 835 e->inst[1] |= (6 << 29); /* cvt */ 836 e->inst[1] |= 0x04000000; /* 32 bit */ 837 e->inst[1] |= (1 << 14); /* src .f32 */ 838 e->inst[1] |= ((1 << 6) << 14); /* .abs */ 839 set_dst(pc, dst, e); 840 set_src_0(pc, src, e); 841 842 emit(pc, e); 843} 844 845static void 846emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, 847 struct nv50_reg **src) 848{ 849 struct nv50_reg *one = alloc_immd(pc, 1.0); 850 struct nv50_reg *zero = alloc_immd(pc, 0.0); 851 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 852 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 853 struct nv50_reg *tmp[4]; 854 855 if (mask & (1 << 0)) 856 emit_mov(pc, dst[0], one); 857 858 if (mask & (1 << 3)) 859 emit_mov(pc, dst[3], one); 860 861 if (mask & (3 << 1)) { 862 if (mask & (1 << 1)) 863 tmp[0] = dst[1]; 864 else 865 tmp[0] = temp_temp(pc); 866 emit_minmax(pc, 4, tmp[0], src[0], zero); 867 } 868 869 if (mask & (1 << 2)) { 870 set_pred_wr(pc, 1, 0, pc->p->exec_tail); 871 872 tmp[1] = temp_temp(pc); 873 emit_minmax(pc, 4, tmp[1], src[1], zero); 874 875 tmp[3] = temp_temp(pc); 876 emit_minmax(pc, 4, tmp[3], src[3], neg128); 877 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 878 879 emit_pow(pc, dst[2], tmp[1], tmp[3]); 880 emit_mov(pc, dst[2], zero); 881 set_pred(pc, 3, 0, pc->p->exec_tail); 882 } 883 884 FREE(pos128); 885 FREE(neg128); 886 FREE(zero); 887 FREE(one); 888} 889 890static void 891emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 892{ 893 struct nv50_program_exec *e = exec(pc); 894 895 set_long(pc, e); 896 e->inst[0] |= 0xa0000000; /* delta */ 897 e->inst[1] |= (7 << 29); /* delta */ 898 e->inst[1] |= 0x04000000; /* negate arg0? probably not */ 899 e->inst[1] |= (1 << 14); /* src .f32 */ 900 set_dst(pc, dst, e); 901 set_src_0(pc, src, e); 902 903 emit(pc, e); 904} 905 906static void 907emit_kil(struct nv50_pc *pc, struct nv50_reg *src) 908{ 909 struct nv50_program_exec *e; 910 const int r_pred = 1; 911 912 /* Sets predicate reg ? */ 913 e = exec(pc); 914 e->inst[0] = 0xa00001fd; 915 e->inst[1] = 0xc4014788; 916 set_src_0(pc, src, e); 917 set_pred_wr(pc, 1, r_pred, e); 918 emit(pc, e); 919 920 /* This is probably KILP */ 921 e = exec(pc); 922 e->inst[0] = 0x000001fe; 923 set_long(pc, e); 924 set_pred(pc, 1 /* LT? */, r_pred, e); 925 emit(pc, e); 926} 927 928static struct nv50_reg * 929tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 930{ 931 switch (dst->DstRegister.File) { 932 case TGSI_FILE_TEMPORARY: 933 return &pc->temp[dst->DstRegister.Index * 4 + c]; 934 case TGSI_FILE_OUTPUT: 935 return &pc->result[dst->DstRegister.Index * 4 + c]; 936 case TGSI_FILE_NULL: 937 return NULL; 938 default: 939 break; 940 } 941 942 return NULL; 943} 944 945static struct nv50_reg * 946tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 947{ 948 struct nv50_reg *r = NULL; 949 struct nv50_reg *temp; 950 unsigned sgn, c; 951 952 sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); 953 954 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 955 switch (c) { 956 case TGSI_EXTSWIZZLE_X: 957 case TGSI_EXTSWIZZLE_Y: 958 case TGSI_EXTSWIZZLE_Z: 959 case TGSI_EXTSWIZZLE_W: 960 switch (src->SrcRegister.File) { 961 case TGSI_FILE_INPUT: 962 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 963 break; 964 case TGSI_FILE_TEMPORARY: 965 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 966 break; 967 case TGSI_FILE_CONSTANT: 968 r = &pc->param[src->SrcRegister.Index * 4 + c]; 969 break; 970 case TGSI_FILE_IMMEDIATE: 971 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 972 break; 973 case TGSI_FILE_SAMPLER: 974 break; 975 default: 976 assert(0); 977 break; 978 } 979 break; 980 case TGSI_EXTSWIZZLE_ZERO: 981 r = alloc_immd(pc, 0.0); 982 return r; 983 case TGSI_EXTSWIZZLE_ONE: 984 if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET) 985 return alloc_immd(pc, -1.0); 986 return alloc_immd(pc, 1.0); 987 default: 988 assert(0); 989 break; 990 } 991 992 switch (sgn) { 993 case TGSI_UTIL_SIGN_KEEP: 994 break; 995 case TGSI_UTIL_SIGN_CLEAR: 996 temp = temp_temp(pc); 997 emit_abs(pc, temp, r); 998 r = temp; 999 break; 1000 case TGSI_UTIL_SIGN_TOGGLE: 1001 temp = temp_temp(pc); 1002 emit_neg(pc, temp, r); 1003 r = temp; 1004 break; 1005 case TGSI_UTIL_SIGN_SET: 1006 temp = temp_temp(pc); 1007 emit_abs(pc, temp, r); 1008 emit_neg(pc, temp, temp); 1009 r = temp; 1010 break; 1011 default: 1012 assert(0); 1013 break; 1014 } 1015 1016 return r; 1017} 1018 1019static boolean 1020nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 1021{ 1022 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 1023 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 1024 unsigned mask, sat, unit; 1025 int i, c; 1026 1027 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 1028 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 1029 1030 for (c = 0; c < 4; c++) { 1031 if (mask & (1 << c)) 1032 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 1033 else 1034 dst[c] = NULL; 1035 rdst[c] = NULL; 1036 src[0][c] = NULL; 1037 src[1][c] = NULL; 1038 src[2][c] = NULL; 1039 } 1040 1041 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1042 const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; 1043 1044 if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) 1045 unit = fs->SrcRegister.Index; 1046 1047 for (c = 0; c < 4; c++) 1048 src[i][c] = tgsi_src(pc, c, fs); 1049 } 1050 1051 if (sat) { 1052 for (c = 0; c < 4; c++) { 1053 rdst[c] = dst[c]; 1054 dst[c] = temp_temp(pc); 1055 } 1056 } 1057 1058 switch (inst->Instruction.Opcode) { 1059 case TGSI_OPCODE_ABS: 1060 for (c = 0; c < 4; c++) { 1061 if (!(mask & (1 << c))) 1062 continue; 1063 emit_abs(pc, dst[c], src[0][c]); 1064 } 1065 break; 1066 case TGSI_OPCODE_ADD: 1067 for (c = 0; c < 4; c++) { 1068 if (!(mask & (1 << c))) 1069 continue; 1070 emit_add(pc, dst[c], src[0][c], src[1][c]); 1071 } 1072 break; 1073 case TGSI_OPCODE_COS: 1074 temp = temp_temp(pc); 1075 emit_precossin(pc, temp, src[0][0]); 1076 emit_flop(pc, 5, temp, temp); 1077 for (c = 0; c < 4; c++) { 1078 if (!(mask & (1 << c))) 1079 continue; 1080 emit_mov(pc, dst[c], temp); 1081 } 1082 break; 1083 case TGSI_OPCODE_DP3: 1084 temp = temp_temp(pc); 1085 emit_mul(pc, temp, src[0][0], src[1][0]); 1086 emit_mad(pc, temp, src[0][1], src[1][1], temp); 1087 emit_mad(pc, temp, src[0][2], src[1][2], temp); 1088 for (c = 0; c < 4; c++) { 1089 if (!(mask & (1 << c))) 1090 continue; 1091 emit_mov(pc, dst[c], temp); 1092 } 1093 break; 1094 case TGSI_OPCODE_DP4: 1095 temp = temp_temp(pc); 1096 emit_mul(pc, temp, src[0][0], src[1][0]); 1097 emit_mad(pc, temp, src[0][1], src[1][1], temp); 1098 emit_mad(pc, temp, src[0][2], src[1][2], temp); 1099 emit_mad(pc, temp, src[0][3], src[1][3], temp); 1100 for (c = 0; c < 4; c++) { 1101 if (!(mask & (1 << c))) 1102 continue; 1103 emit_mov(pc, dst[c], temp); 1104 } 1105 break; 1106 case TGSI_OPCODE_DPH: 1107 temp = temp_temp(pc); 1108 emit_mul(pc, temp, src[0][0], src[1][0]); 1109 emit_mad(pc, temp, src[0][1], src[1][1], temp); 1110 emit_mad(pc, temp, src[0][2], src[1][2], temp); 1111 emit_add(pc, temp, src[1][3], temp); 1112 for (c = 0; c < 4; c++) { 1113 if (!(mask & (1 << c))) 1114 continue; 1115 emit_mov(pc, dst[c], temp); 1116 } 1117 break; 1118 case TGSI_OPCODE_DST: 1119 { 1120 struct nv50_reg *one = alloc_immd(pc, 1.0); 1121 if (mask & (1 << 0)) 1122 emit_mov(pc, dst[0], one); 1123 if (mask & (1 << 1)) 1124 emit_mul(pc, dst[1], src[0][1], src[1][1]); 1125 if (mask & (1 << 2)) 1126 emit_mov(pc, dst[2], src[0][2]); 1127 if (mask & (1 << 3)) 1128 emit_mov(pc, dst[3], src[1][3]); 1129 FREE(one); 1130 } 1131 break; 1132 case TGSI_OPCODE_EX2: 1133 temp = temp_temp(pc); 1134 emit_preex2(pc, temp, src[0][0]); 1135 emit_flop(pc, 6, temp, temp); 1136 for (c = 0; c < 4; c++) { 1137 if (!(mask & (1 << c))) 1138 continue; 1139 emit_mov(pc, dst[c], temp); 1140 } 1141 break; 1142 case TGSI_OPCODE_FLR: 1143 for (c = 0; c < 4; c++) { 1144 if (!(mask & (1 << c))) 1145 continue; 1146 emit_flr(pc, dst[c], src[0][c]); 1147 } 1148 break; 1149 case TGSI_OPCODE_FRC: 1150 temp = temp_temp(pc); 1151 for (c = 0; c < 4; c++) { 1152 if (!(mask & (1 << c))) 1153 continue; 1154 emit_flr(pc, temp, src[0][c]); 1155 emit_sub(pc, dst[c], src[0][c], temp); 1156 } 1157 break; 1158 case TGSI_OPCODE_KIL: 1159 emit_kil(pc, src[0][0]); 1160 emit_kil(pc, src[0][1]); 1161 emit_kil(pc, src[0][2]); 1162 emit_kil(pc, src[0][3]); 1163 break; 1164 case TGSI_OPCODE_LIT: 1165 emit_lit(pc, &dst[0], mask, &src[0][0]); 1166 break; 1167 case TGSI_OPCODE_LG2: 1168 temp = temp_temp(pc); 1169 emit_flop(pc, 3, temp, src[0][0]); 1170 for (c = 0; c < 4; c++) { 1171 if (!(mask & (1 << c))) 1172 continue; 1173 emit_mov(pc, dst[c], temp); 1174 } 1175 break; 1176 case TGSI_OPCODE_LRP: 1177 temp = temp_temp(pc); 1178 for (c = 0; c < 4; c++) { 1179 if (!(mask & (1 << c))) 1180 continue; 1181 emit_sub(pc, temp, src[1][c], src[2][c]); 1182 emit_mad(pc, dst[c], temp, src[0][c], src[2][c]); 1183 } 1184 break; 1185 case TGSI_OPCODE_MAD: 1186 for (c = 0; c < 4; c++) { 1187 if (!(mask & (1 << c))) 1188 continue; 1189 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 1190 } 1191 break; 1192 case TGSI_OPCODE_MAX: 1193 for (c = 0; c < 4; c++) { 1194 if (!(mask & (1 << c))) 1195 continue; 1196 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 1197 } 1198 break; 1199 case TGSI_OPCODE_MIN: 1200 for (c = 0; c < 4; c++) { 1201 if (!(mask & (1 << c))) 1202 continue; 1203 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 1204 } 1205 break; 1206 case TGSI_OPCODE_MOV: 1207 for (c = 0; c < 4; c++) { 1208 if (!(mask & (1 << c))) 1209 continue; 1210 emit_mov(pc, dst[c], src[0][c]); 1211 } 1212 break; 1213 case TGSI_OPCODE_MUL: 1214 for (c = 0; c < 4; c++) { 1215 if (!(mask & (1 << c))) 1216 continue; 1217 emit_mul(pc, dst[c], src[0][c], src[1][c]); 1218 } 1219 break; 1220 case TGSI_OPCODE_POW: 1221 temp = temp_temp(pc); 1222 emit_pow(pc, temp, src[0][0], src[1][0]); 1223 for (c = 0; c < 4; c++) { 1224 if (!(mask & (1 << c))) 1225 continue; 1226 emit_mov(pc, dst[c], temp); 1227 } 1228 break; 1229 case TGSI_OPCODE_RCP: 1230 for (c = 0; c < 4; c++) { 1231 if (!(mask & (1 << c))) 1232 continue; 1233 emit_flop(pc, 0, dst[c], src[0][0]); 1234 } 1235 break; 1236 case TGSI_OPCODE_RSQ: 1237 for (c = 0; c < 4; c++) { 1238 if (!(mask & (1 << c))) 1239 continue; 1240 emit_flop(pc, 2, dst[c], src[0][0]); 1241 } 1242 break; 1243 case TGSI_OPCODE_SCS: 1244 temp = temp_temp(pc); 1245 emit_precossin(pc, temp, src[0][0]); 1246 if (mask & (1 << 0)) 1247 emit_flop(pc, 5, dst[0], temp); 1248 if (mask & (1 << 1)) 1249 emit_flop(pc, 4, dst[1], temp); 1250 if (mask & (1 << 2)) 1251 emit_mov_immdval(pc, dst[2], 0.0); 1252 if (mask & (1 << 3)) 1253 emit_mov_immdval(pc, dst[3], 1.0); 1254 break; 1255 case TGSI_OPCODE_SGE: 1256 for (c = 0; c < 4; c++) { 1257 if (!(mask & (1 << c))) 1258 continue; 1259 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 1260 } 1261 break; 1262 case TGSI_OPCODE_SIN: 1263 temp = temp_temp(pc); 1264 emit_precossin(pc, temp, src[0][0]); 1265 emit_flop(pc, 4, temp, temp); 1266 for (c = 0; c < 4; c++) { 1267 if (!(mask & (1 << c))) 1268 continue; 1269 emit_mov(pc, dst[c], temp); 1270 } 1271 break; 1272 case TGSI_OPCODE_SLT: 1273 for (c = 0; c < 4; c++) { 1274 if (!(mask & (1 << c))) 1275 continue; 1276 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 1277 } 1278 break; 1279 case TGSI_OPCODE_SUB: 1280 for (c = 0; c < 4; c++) { 1281 if (!(mask & (1 << c))) 1282 continue; 1283 emit_sub(pc, dst[c], src[0][c], src[1][c]); 1284 } 1285 break; 1286 case TGSI_OPCODE_TEX: 1287 case TGSI_OPCODE_TXP: 1288 { 1289 struct nv50_reg *t[4]; 1290 struct nv50_program_exec *e; 1291 1292 alloc_temp4(pc, t, 0); 1293 emit_mov(pc, t[0], src[0][0]); 1294 emit_mov(pc, t[1], src[0][1]); 1295 1296 e = exec(pc); 1297 e->inst[0] = 0xf6400000; 1298 e->inst[0] |= (unit << 9); 1299 set_long(pc, e); 1300 e->inst[1] |= 0x0000c004; 1301 set_dst(pc, t[0], e); 1302 emit(pc, e); 1303 1304 if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); 1305 if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); 1306 if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); 1307 if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); 1308 1309 free_temp4(pc, t); 1310 } 1311 break; 1312 case TGSI_OPCODE_XPD: 1313 temp = temp_temp(pc); 1314 if (mask & (1 << 0)) { 1315 emit_mul(pc, temp, src[0][2], src[1][1]); 1316 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 1317 } 1318 if (mask & (1 << 1)) { 1319 emit_mul(pc, temp, src[0][0], src[1][2]); 1320 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 1321 } 1322 if (mask & (1 << 2)) { 1323 emit_mul(pc, temp, src[0][1], src[1][0]); 1324 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 1325 } 1326 if (mask & (1 << 3)) 1327 emit_mov_immdval(pc, dst[3], 1.0); 1328 break; 1329 case TGSI_OPCODE_END: 1330 break; 1331 default: 1332 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 1333 return FALSE; 1334 } 1335 1336 if (sat) { 1337 for (c = 0; c < 4; c++) { 1338 struct nv50_program_exec *e; 1339 1340 if (!(mask & (1 << c))) 1341 continue; 1342 e = exec(pc); 1343 1344 e->inst[0] = 0xa0000000; /* cvt */ 1345 set_long(pc, e); 1346 e->inst[1] |= (6 << 29); /* cvt */ 1347 e->inst[1] |= 0x04000000; /* 32 bit */ 1348 e->inst[1] |= (1 << 14); /* src .f32 */ 1349 e->inst[1] |= ((1 << 5) << 14); /* .sat */ 1350 set_dst(pc, rdst[c], e); 1351 set_src_0(pc, dst[c], e); 1352 emit(pc, e); 1353 } 1354 } 1355 1356 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1357 for (c = 0; c < 4; c++) { 1358 if (!src[i][c]) 1359 continue; 1360 if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) 1361 FREE(src[i][c]); 1362 } 1363 } 1364 1365 kill_temp_temp(pc); 1366 return TRUE; 1367} 1368 1369/* Adjust a bitmask that indicates what components of a source are used, 1370 * we use this in tx_prep so we only load interpolants that are needed. 1371 */ 1372static void 1373insn_adjust_mask(const struct tgsi_full_instruction *insn, unsigned *mask) 1374{ 1375 const struct tgsi_instruction_ext_texture *tex; 1376 1377 switch (insn->Instruction.Opcode) { 1378 case TGSI_OPCODE_DP3: 1379 *mask = 0x7; 1380 break; 1381 case TGSI_OPCODE_DP4: 1382 case TGSI_OPCODE_DPH: 1383 *mask = 0xF; 1384 break; 1385 case TGSI_OPCODE_LIT: 1386 *mask = 0xB; 1387 break; 1388 case TGSI_OPCODE_RCP: 1389 case TGSI_OPCODE_RSQ: 1390 *mask = 0x1; 1391 break; 1392 case TGSI_OPCODE_TEX: 1393 case TGSI_OPCODE_TXP: 1394 assert(insn->Instruction.Extended); 1395 tex = &insn->InstructionExtTexture; 1396 1397 *mask = 0x7; 1398 if (tex->Texture == TGSI_TEXTURE_1D) 1399 *mask = 0x1; 1400 else 1401 if (tex->Texture == TGSI_TEXTURE_2D) 1402 *mask = 0x3; 1403 1404 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) 1405 *mask |= 0x8; 1406 break; 1407 default: 1408 break; 1409 } 1410} 1411 1412static void 1413prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok, 1414 unsigned *r_usage[2]) 1415{ 1416 const struct tgsi_full_instruction *insn; 1417 const struct tgsi_full_src_register *src; 1418 const struct tgsi_dst_register *dst; 1419 1420 unsigned i, c, k, n, mask, *acc_p; 1421 1422 insn = &tok->FullInstruction; 1423 dst = &insn->FullDstRegisters[0].DstRegister; 1424 mask = dst->WriteMask; 1425 1426 if (!r_usage[0]) 1427 r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned)); 1428 if (!r_usage[1]) 1429 r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned)); 1430 1431 if (dst->File == TGSI_FILE_TEMPORARY) { 1432 for (c = 0; c < 4; c++) { 1433 if (!(mask & (1 << c))) 1434 continue; 1435 r_usage[0][dst->Index * 4 + c] = pc->insn_nr; 1436 } 1437 } 1438 1439 for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { 1440 src = &insn->FullSrcRegisters[i]; 1441 1442 switch (src->SrcRegister.File) { 1443 case TGSI_FILE_TEMPORARY: 1444 acc_p = r_usage[0]; 1445 break; 1446 case TGSI_FILE_INPUT: 1447 acc_p = r_usage[1]; 1448 break; 1449 default: 1450 continue; 1451 } 1452 1453 insn_adjust_mask(insn, &mask); 1454 1455 for (c = 0; c < 4; c++) { 1456 if (!(mask & (1 << c))) 1457 continue; 1458 1459 k = tgsi_util_get_full_src_register_extswizzle(src, c); 1460 switch (k) { 1461 case TGSI_EXTSWIZZLE_X: 1462 case TGSI_EXTSWIZZLE_Y: 1463 case TGSI_EXTSWIZZLE_Z: 1464 case TGSI_EXTSWIZZLE_W: 1465 n = src->SrcRegister.Index * 4 + k; 1466 acc_p[n] = pc->insn_nr; 1467 break; 1468 default: 1469 break; 1470 } 1471 } 1472 } 1473} 1474 1475static unsigned 1476load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid, 1477 int *aid, int *p_oid) 1478{ 1479 struct nv50_reg *iv; 1480 int oid, c, n; 1481 unsigned mask = 0; 1482 1483 iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p; 1484 1485 for (c = 0, n = i * 4; c < 4; c++, n++) { 1486 oid = (*p_oid)++; 1487 pc->attr[n].type = P_TEMP; 1488 pc->attr[n].index = i; 1489 1490 if (pc->attr[n].acc == acc[n]) 1491 continue; 1492 mask |= (1 << c); 1493 1494 pc->attr[n].acc = acc[n]; 1495 pc->attr[n].rhw = pc->attr[n].hw = -1; 1496 alloc_reg(pc, &pc->attr[n]); 1497 1498 pc->attr[n].rhw = (*aid)++; 1499 emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]); 1500 1501 pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4)); 1502 (*mid)++; 1503 pc->p->cfg.fp.regs[1] += 0x00010001; 1504 } 1505 1506 return mask; 1507} 1508 1509static boolean 1510nv50_program_tx_prep(struct nv50_pc *pc) 1511{ 1512 struct tgsi_parse_context p; 1513 boolean ret = FALSE; 1514 unsigned i, c; 1515 unsigned fcol, bcol, fcrd, depr; 1516 1517 /* count (centroid) perspective interpolations */ 1518 unsigned centroid_loads = 0; 1519 unsigned perspect_loads = 0; 1520 1521 /* track register access for temps and attrs */ 1522 unsigned *r_usage[2]; 1523 r_usage[0] = NULL; 1524 r_usage[1] = NULL; 1525 1526 depr = fcol = bcol = fcrd = 0xffff; 1527 1528 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1529 pc->p->cfg.fp.regs[0] = 0x01000404; 1530 pc->p->cfg.fp.regs[1] = 0x00000400; 1531 } 1532 1533 tgsi_parse_init(&p, pc->p->pipe.tokens); 1534 while (!tgsi_parse_end_of_tokens(&p)) { 1535 const union tgsi_full_token *tok = &p.FullToken; 1536 1537 tgsi_parse_token(&p); 1538 switch (tok->Token.Type) { 1539 case TGSI_TOKEN_TYPE_IMMEDIATE: 1540 { 1541 const struct tgsi_full_immediate *imm = 1542 &p.FullToken.FullImmediate; 1543 1544 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1545 imm->u.ImmediateFloat32[1].Float, 1546 imm->u.ImmediateFloat32[2].Float, 1547 imm->u.ImmediateFloat32[3].Float); 1548 } 1549 break; 1550 case TGSI_TOKEN_TYPE_DECLARATION: 1551 { 1552 const struct tgsi_full_declaration *d; 1553 unsigned last, first, mode; 1554 1555 d = &p.FullToken.FullDeclaration; 1556 first = d->DeclarationRange.First; 1557 last = d->DeclarationRange.Last; 1558 1559 switch (d->Declaration.File) { 1560 case TGSI_FILE_TEMPORARY: 1561 if (pc->temp_nr < (last + 1)) 1562 pc->temp_nr = last + 1; 1563 break; 1564 case TGSI_FILE_OUTPUT: 1565 if (pc->result_nr < (last + 1)) 1566 pc->result_nr = last + 1; 1567 1568 if (!d->Declaration.Semantic) 1569 break; 1570 1571 switch (d->Semantic.SemanticName) { 1572 case TGSI_SEMANTIC_POSITION: 1573 depr = first; 1574 pc->p->cfg.fp.regs[2] |= 0x00000100; 1575 pc->p->cfg.fp.regs[3] |= 0x00000011; 1576 break; 1577 default: 1578 break; 1579 } 1580 1581 break; 1582 case TGSI_FILE_INPUT: 1583 { 1584 if (pc->attr_nr < (last + 1)) 1585 pc->attr_nr = last + 1; 1586 1587 if (pc->p->type != PIPE_SHADER_FRAGMENT) 1588 break; 1589 1590 switch (d->Declaration.Interpolate) { 1591 case TGSI_INTERPOLATE_CONSTANT: 1592 mode = INTERP_FLAT; 1593 break; 1594 case TGSI_INTERPOLATE_PERSPECTIVE: 1595 mode = INTERP_PERSPECTIVE; 1596 break; 1597 default: 1598 mode = INTERP_LINEAR; 1599 break; 1600 } 1601 1602 if (d->Declaration.Semantic) { 1603 switch (d->Semantic.SemanticName) { 1604 case TGSI_SEMANTIC_POSITION: 1605 fcrd = first; 1606 break; 1607 case TGSI_SEMANTIC_COLOR: 1608 fcol = first; 1609 mode = INTERP_PERSPECTIVE; 1610 break; 1611 case TGSI_SEMANTIC_BCOLOR: 1612 bcol = first; 1613 mode = INTERP_PERSPECTIVE; 1614 break; 1615 } 1616 } 1617 1618 if (d->Declaration.Centroid) { 1619 mode |= INTERP_CENTROID; 1620 if (mode & INTERP_PERSPECTIVE) 1621 centroid_loads++; 1622 } else 1623 if (mode & INTERP_PERSPECTIVE) 1624 perspect_loads++; 1625 1626 assert(last < 32); 1627 for (i = first; i <= last; i++) 1628 pc->interp_mode[i] = mode; 1629 } 1630 break; 1631 case TGSI_FILE_CONSTANT: 1632 if (pc->param_nr < (last + 1)) 1633 pc->param_nr = last + 1; 1634 break; 1635 case TGSI_FILE_SAMPLER: 1636 break; 1637 default: 1638 NOUVEAU_ERR("bad decl file %d\n", 1639 d->Declaration.File); 1640 goto out_err; 1641 } 1642 } 1643 break; 1644 case TGSI_TOKEN_TYPE_INSTRUCTION: 1645 pc->insn_nr++; 1646 prep_inspect_insn(pc, tok, r_usage); 1647 break; 1648 default: 1649 break; 1650 } 1651 } 1652 1653 if (pc->temp_nr) { 1654 pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1655 if (!pc->temp) 1656 goto out_err; 1657 1658 for (i = 0; i < pc->temp_nr; i++) { 1659 for (c = 0; c < 4; c++) { 1660 pc->temp[i*4+c].type = P_TEMP; 1661 pc->temp[i*4+c].hw = -1; 1662 pc->temp[i*4+c].rhw = -1; 1663 pc->temp[i*4+c].index = i; 1664 pc->temp[i*4+c].acc = r_usage[0][i*4+c]; 1665 } 1666 } 1667 } 1668 1669 if (pc->attr_nr) { 1670 int oid = 4, mid = 4, aid = 0; 1671 /* oid = VP output id 1672 * aid = FP attribute/interpolant id 1673 * mid = VP output mapping field ID 1674 */ 1675 1676 pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1677 if (!pc->attr) 1678 goto out_err; 1679 1680 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1681 /* position should be loaded first */ 1682 if (fcrd != 0xffff) { 1683 unsigned mask; 1684 mid = 0; 1685 mask = load_fp_attrib(pc, fcrd, r_usage[1], 1686 &mid, &aid, &oid); 1687 oid = 0; 1688 pc->p->cfg.fp.regs[1] |= (mask << 24); 1689 pc->p->cfg.fp.map[0] = 0x04040404 * fcrd; 1690 } 1691 pc->p->cfg.fp.map[0] += 0x03020100; 1692 1693 /* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */ 1694 1695 if (perspect_loads) { 1696 pc->iv_p = alloc_temp(pc, NULL); 1697 1698 if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) { 1699 pc->p->cfg.fp.regs[1] |= 0x08000000; 1700 pc->iv_p->rhw = aid++; 1701 emit_interp(pc, pc->iv_p, NULL, 1702 INTERP_LINEAR); 1703 emit_flop(pc, 0, pc->iv_p, pc->iv_p); 1704 } else { 1705 pc->iv_p->rhw = aid - 1; 1706 emit_flop(pc, 0, pc->iv_p, 1707 &pc->attr[fcrd * 4 + 3]); 1708 } 1709 } 1710 1711 if (centroid_loads) { 1712 pc->iv_c = alloc_temp(pc, NULL); 1713 pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++; 1714 emit_interp(pc, pc->iv_c, NULL, 1715 INTERP_CENTROID); 1716 emit_flop(pc, 0, pc->iv_c, pc->iv_c); 1717 pc->p->cfg.fp.regs[1] |= 0x08000000; 1718 } 1719 1720 for (c = 0; c < 4; c++) { 1721 /* I don't know what these values do, but 1722 * let's set them like the blob does: 1723 */ 1724 if (fcol != 0xffff && r_usage[1][fcol * 4 + c]) 1725 pc->p->cfg.fp.regs[0] += 0x00010000; 1726 if (bcol != 0xffff && r_usage[1][bcol * 4 + c]) 1727 pc->p->cfg.fp.regs[0] += 0x00010000; 1728 } 1729 1730 for (i = 0; i < pc->attr_nr; i++) 1731 load_fp_attrib(pc, i, r_usage[1], 1732 &mid, &aid, &oid); 1733 1734 if (pc->iv_p) 1735 free_temp(pc, pc->iv_p); 1736 if (pc->iv_c) 1737 free_temp(pc, pc->iv_c); 1738 1739 pc->p->cfg.fp.high_map = (mid / 4); 1740 pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0); 1741 } else { 1742 /* vertex program */ 1743 for (i = 0; i < pc->attr_nr * 4; i++) { 1744 pc->p->cfg.vp.attr[aid / 32] |= 1745 (1 << (aid % 32)); 1746 pc->attr[i].type = P_ATTR; 1747 pc->attr[i].hw = aid++; 1748 pc->attr[i].index = i / 4; 1749 } 1750 } 1751 } 1752 1753 if (pc->result_nr) { 1754 int rid = 0; 1755 1756 pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); 1757 if (!pc->result) 1758 goto out_err; 1759 1760 for (i = 0; i < pc->result_nr; i++) { 1761 for (c = 0; c < 4; c++) { 1762 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1763 pc->result[i*4+c].type = P_TEMP; 1764 pc->result[i*4+c].hw = -1; 1765 pc->result[i*4+c].rhw = (i == depr) ? 1766 -1 : rid++; 1767 } else { 1768 pc->result[i*4+c].type = P_RESULT; 1769 pc->result[i*4+c].hw = rid++; 1770 } 1771 pc->result[i*4+c].index = i; 1772 } 1773 1774 if (pc->p->type == PIPE_SHADER_FRAGMENT && 1775 depr != 0xffff) { 1776 pc->result[depr * 4 + 2].rhw = 1777 (pc->result_nr - 1) * 4; 1778 } 1779 } 1780 } 1781 1782 if (pc->param_nr) { 1783 int rid = 0; 1784 1785 pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); 1786 if (!pc->param) 1787 goto out_err; 1788 1789 for (i = 0; i < pc->param_nr; i++) { 1790 for (c = 0; c < 4; c++) { 1791 pc->param[i*4+c].type = P_CONST; 1792 pc->param[i*4+c].hw = rid++; 1793 pc->param[i*4+c].index = i; 1794 } 1795 } 1796 } 1797 1798 if (pc->immd_nr) { 1799 int rid = pc->param_nr * 4; 1800 1801 pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1802 if (!pc->immd) 1803 goto out_err; 1804 1805 for (i = 0; i < pc->immd_nr; i++) { 1806 for (c = 0; c < 4; c++) { 1807 pc->immd[i*4+c].type = P_IMMD; 1808 pc->immd[i*4+c].hw = rid++; 1809 pc->immd[i*4+c].index = i; 1810 } 1811 } 1812 } 1813 1814 ret = TRUE; 1815out_err: 1816 if (r_usage[0]) 1817 FREE(r_usage[0]); 1818 if (r_usage[1]) 1819 FREE(r_usage[1]); 1820 1821 tgsi_parse_free(&p); 1822 return ret; 1823} 1824 1825static void 1826free_nv50_pc(struct nv50_pc *pc) 1827{ 1828 unsigned i; 1829 1830 if (pc->immd) 1831 FREE(pc->immd); 1832 if (pc->param) 1833 FREE(pc->param); 1834 if (pc->result) 1835 FREE(pc->result); 1836 if (pc->attr) 1837 FREE(pc->attr); 1838 if (pc->temp) 1839 FREE(pc->temp); 1840 1841 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 1842 /* deallocate fragment program attributes */ 1843 if (pc->r_temp[i] && pc->r_temp[i]->index == -1) 1844 FREE(pc->r_temp[i]); 1845 } 1846 1847 FREE(pc); 1848} 1849 1850static boolean 1851nv50_program_tx(struct nv50_program *p) 1852{ 1853 struct tgsi_parse_context parse; 1854 struct nv50_pc *pc; 1855 unsigned k; 1856 boolean ret; 1857 1858 pc = CALLOC_STRUCT(nv50_pc); 1859 if (!pc) 1860 return FALSE; 1861 pc->p = p; 1862 pc->p->cfg.high_temp = 4; 1863 1864 ret = nv50_program_tx_prep(pc); 1865 if (ret == FALSE) 1866 goto out_cleanup; 1867 1868 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1869 while (!tgsi_parse_end_of_tokens(&parse)) { 1870 const union tgsi_full_token *tok = &parse.FullToken; 1871 1872 tgsi_parse_token(&parse); 1873 1874 switch (tok->Token.Type) { 1875 case TGSI_TOKEN_TYPE_INSTRUCTION: 1876 ++pc->insn_cur; 1877 ret = nv50_program_tx_insn(pc, tok); 1878 if (ret == FALSE) 1879 goto out_err; 1880 break; 1881 default: 1882 break; 1883 } 1884 } 1885 1886 if (p->type == PIPE_SHADER_FRAGMENT) { 1887 struct nv50_reg out; 1888 1889 out.type = P_TEMP; 1890 for (k = 0; k < pc->result_nr * 4; k++) { 1891 if (pc->result[k].rhw == -1) 1892 continue; 1893 if (pc->result[k].hw != pc->result[k].rhw) { 1894 out.hw = pc->result[k].rhw; 1895 emit_mov(pc, &out, &pc->result[k]); 1896 } 1897 if (pc->p->cfg.high_result < (pc->result[k].rhw + 1)) 1898 pc->p->cfg.high_result = pc->result[k].rhw + 1; 1899 } 1900 } 1901 1902 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); 1903 pc->p->exec_tail->inst[1] |= 0x00000001; 1904 1905 p->param_nr = pc->param_nr * 4; 1906 p->immd_nr = pc->immd_nr * 4; 1907 p->immd = pc->immd_buf; 1908 1909out_err: 1910 tgsi_parse_free(&parse); 1911 1912out_cleanup: 1913 free_nv50_pc(pc); 1914 return ret; 1915} 1916 1917static void 1918nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1919{ 1920 if (nv50_program_tx(p) == FALSE) 1921 assert(0); 1922 p->translated = TRUE; 1923} 1924 1925static void 1926nv50_program_upload_data(struct nv50_context *nv50, float *map, 1927 unsigned start, unsigned count) 1928{ 1929 struct nouveau_channel *chan = nv50->screen->nvws->channel; 1930 struct nouveau_grobj *tesla = nv50->screen->tesla; 1931 1932 while (count) { 1933 unsigned nr = count > 2047 ? 2047 : count; 1934 1935 BEGIN_RING(chan, tesla, 0x00000f00, 1); 1936 OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8)); 1937 BEGIN_RING(chan, tesla, 0x40000f04, nr); 1938 OUT_RINGp (chan, map, nr); 1939 1940 map += nr; 1941 start += nr; 1942 count -= nr; 1943 } 1944} 1945 1946static void 1947nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1948{ 1949 struct nouveau_winsys *nvws = nv50->screen->nvws; 1950 struct pipe_winsys *ws = nv50->pipe.winsys; 1951 unsigned nr = p->param_nr + p->immd_nr; 1952 1953 if (!p->data && nr) { 1954 struct nouveau_resource *heap = nv50->screen->vp_data_heap; 1955 1956 if (nvws->res_alloc(heap, nr, p, &p->data)) { 1957 while (heap->next && heap->size < nr) { 1958 struct nv50_program *evict = heap->next->priv; 1959 nvws->res_free(&evict->data); 1960 } 1961 1962 if (nvws->res_alloc(heap, nr, p, &p->data)) 1963 assert(0); 1964 } 1965 } 1966 1967 if (p->param_nr) { 1968 float *map = ws->buffer_map(ws, nv50->constbuf[p->type], 1969 PIPE_BUFFER_USAGE_CPU_READ); 1970 nv50_program_upload_data(nv50, map, p->data->start, 1971 p->param_nr); 1972 ws->buffer_unmap(ws, nv50->constbuf[p->type]); 1973 } 1974 1975 if (p->immd_nr) { 1976 nv50_program_upload_data(nv50, p->immd, 1977 p->data->start + p->param_nr, 1978 p->immd_nr); 1979 } 1980} 1981 1982static void 1983nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1984{ 1985 struct nouveau_channel *chan = nv50->screen->nvws->channel; 1986 struct nouveau_grobj *tesla = nv50->screen->tesla; 1987 struct pipe_screen *screen = nv50->pipe.screen; 1988 struct nv50_program_exec *e; 1989 struct nouveau_stateobj *so; 1990 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; 1991 unsigned start, count, *up, *ptr; 1992 boolean upload = FALSE; 1993 1994 if (!p->buffer) { 1995 p->buffer = screen->buffer_create(screen, 0x100, 0, p->exec_size * 4); 1996 upload = TRUE; 1997 } 1998 1999 if (p->data && p->data->start != p->data_start) { 2000 for (e = p->exec_head; e; e = e->next) { 2001 unsigned ei, ci; 2002 2003 if (e->param.index < 0) 2004 continue; 2005 ei = e->param.shift >> 5; 2006 ci = e->param.index + p->data->start; 2007 2008 e->inst[ei] &= ~e->param.mask; 2009 e->inst[ei] |= (ci << e->param.shift); 2010 } 2011 2012 p->data_start = p->data->start; 2013 upload = TRUE; 2014 } 2015 2016 if (!upload) 2017 return; 2018 2019#ifdef NV50_PROGRAM_DUMP 2020 NOUVEAU_ERR("-------\n"); 2021 for (e = p->exec_head; e; e = e->next) { 2022 NOUVEAU_ERR("0x%08x\n", e->inst[0]); 2023 if (is_long(e)) 2024 NOUVEAU_ERR("0x%08x\n", e->inst[1]); 2025 } 2026#endif 2027 2028 up = ptr = MALLOC(p->exec_size * 4); 2029 for (e = p->exec_head; e; e = e->next) { 2030 *(ptr++) = e->inst[0]; 2031 if (is_long(e)) 2032 *(ptr++) = e->inst[1]; 2033 } 2034 2035 so = so_new(4,2); 2036 so_method(so, nv50->screen->tesla, 0x1280, 3); 2037 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); 2038 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); 2039 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); 2040 2041 start = 0; count = p->exec_size; 2042 while (count) { 2043 struct nouveau_winsys *nvws = nv50->screen->nvws; 2044 unsigned nr; 2045 2046 so_emit(nvws, so); 2047 2048 nr = MIN2(count, 2047); 2049 nr = MIN2(nvws->channel->pushbuf->remaining, nr); 2050 if (nvws->channel->pushbuf->remaining < (nr + 3)) { 2051 FIRE_RING(chan); 2052 continue; 2053 } 2054 2055 BEGIN_RING(chan, tesla, 0x0f00, 1); 2056 OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); 2057 BEGIN_RING(chan, tesla, 0x40000f04, nr); 2058 OUT_RINGp (chan, up + start, nr); 2059 2060 start += nr; 2061 count -= nr; 2062 } 2063 2064 FREE(up); 2065 so_ref(NULL, &so); 2066} 2067 2068void 2069nv50_vertprog_validate(struct nv50_context *nv50) 2070{ 2071 struct nouveau_grobj *tesla = nv50->screen->tesla; 2072 struct nv50_program *p = nv50->vertprog; 2073 struct nouveau_stateobj *so; 2074 2075 if (!p->translated) { 2076 nv50_program_validate(nv50, p); 2077 if (!p->translated) 2078 assert(0); 2079 } 2080 2081 nv50_program_validate_data(nv50, p); 2082 nv50_program_validate_code(nv50, p); 2083 2084 so = so_new(13, 2); 2085 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 2086 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 2087 NOUVEAU_BO_HIGH, 0, 0); 2088 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 2089 NOUVEAU_BO_LOW, 0, 0); 2090 so_method(so, tesla, 0x1650, 2); 2091 so_data (so, p->cfg.vp.attr[0]); 2092 so_data (so, p->cfg.vp.attr[1]); 2093 so_method(so, tesla, 0x16b8, 1); 2094 so_data (so, p->cfg.high_result); 2095 so_method(so, tesla, 0x16ac, 2); 2096 so_data (so, p->cfg.high_result); //8); 2097 so_data (so, p->cfg.high_temp); 2098 so_method(so, tesla, 0x140c, 1); 2099 so_data (so, 0); /* program start offset */ 2100 so_ref(so, &nv50->state.vertprog); 2101 so_ref(NULL, &so); 2102} 2103 2104void 2105nv50_fragprog_validate(struct nv50_context *nv50) 2106{ 2107 struct nouveau_grobj *tesla = nv50->screen->tesla; 2108 struct nv50_program *p = nv50->fragprog; 2109 struct nouveau_stateobj *so; 2110 unsigned i; 2111 2112 if (!p->translated) { 2113 nv50_program_validate(nv50, p); 2114 if (!p->translated) 2115 assert(0); 2116 } 2117 2118 nv50_program_validate_data(nv50, p); 2119 nv50_program_validate_code(nv50, p); 2120 2121 so = so_new(64, 2); 2122 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 2123 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 2124 NOUVEAU_BO_HIGH, 0, 0); 2125 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 2126 NOUVEAU_BO_LOW, 0, 0); 2127 so_method(so, tesla, 0x1904, 4); 2128 so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */ 2129 so_data (so, 0x00000004); 2130 so_data (so, 0x00000000); 2131 so_data (so, 0x00000000); 2132 so_method(so, tesla, 0x16bc, p->cfg.fp.high_map); 2133 for (i = 0; i < p->cfg.fp.high_map; i++) 2134 so_data(so, p->cfg.fp.map[i]); 2135 so_method(so, tesla, 0x1988, 2); 2136 so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */ 2137 so_data (so, p->cfg.high_temp); 2138 so_method(so, tesla, 0x1298, 1); 2139 so_data (so, p->cfg.high_result); 2140 so_method(so, tesla, 0x19a8, 1); 2141 so_data (so, p->cfg.fp.regs[2]); 2142 so_method(so, tesla, 0x196c, 1); 2143 so_data (so, p->cfg.fp.regs[3]); 2144 so_method(so, tesla, 0x1414, 1); 2145 so_data (so, 0); /* program start offset */ 2146 so_ref(so, &nv50->state.fragprog); 2147 so_ref(NULL, &so); 2148} 2149 2150void 2151nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 2152{ 2153 struct pipe_screen *pscreen = nv50->pipe.screen; 2154 2155 while (p->exec_head) { 2156 struct nv50_program_exec *e = p->exec_head; 2157 2158 p->exec_head = e->next; 2159 FREE(e); 2160 } 2161 p->exec_tail = NULL; 2162 p->exec_size = 0; 2163 2164 if (p->buffer) 2165 pipe_buffer_reference(&p->buffer, NULL); 2166 2167 nv50->screen->nvws->res_free(&p->data); 2168 2169 p->translated = 0; 2170} 2171 2172