nv50_program.c revision 525f529d138168386224136dc45abb858677bac7
1/* 2 * Copyright 2008 Ben Skeggs 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23#include "pipe/p_context.h" 24#include "pipe/p_defines.h" 25#include "pipe/p_state.h" 26#include "pipe/p_inlines.h" 27 28#include "pipe/p_shader_tokens.h" 29#include "tgsi/tgsi_parse.h" 30#include "tgsi/tgsi_util.h" 31 32#include "nv50_context.h" 33 34#define NV50_SU_MAX_TEMP 127 35#define NV50_SU_MAX_ADDR 4 36//#define NV50_PROGRAM_DUMP 37 38/* $a5 and $a6 always seem to be 0, and using $a7 gives you noise */ 39 40/* ARL - gallium craps itself on progs/vp/arl.txt 41 * 42 * MSB - Like MAD, but MUL+SUB 43 * - Fuck it off, introduce a way to negate args for ops that 44 * support it. 45 * 46 * Look into inlining IMMD for ops other than MOV (make it general?) 47 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 48 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 49 * 50 * In ops such as ADD it's possible to construct a bad opcode in the !is_long() 51 * case, if the emit_src() causes the inst to suddenly become long. 52 * 53 * Verify half-insns work where expected - and force disable them where they 54 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 55 * 56 * FUCK! watch dst==src vectors, can overwrite components that are needed. 57 * ie. SUB R0, R0.yzxw, R0 58 * 59 * Things to check with renouveau: 60 * FP attr/result assignment - how? 61 * attrib 62 * - 0x16bc maps vp output onto fp hpos 63 * - 0x16c0 maps vp output onto fp col0 64 * result 65 * - colr always 0-3 66 * - depr always 4 67 * 0x16bc->0x16e8 --> some binding between vp/fp regs 68 * 0x16b8 --> VP output count 69 * 70 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 71 * "MOV rcol.x, fcol.y" = 0x00000004 72 * 0x19a8 --> as above but 0x00000100 and 0x00000000 73 * - 0x00100000 used when KIL used 74 * 0x196c --> as above but 0x00000011 and 0x00000000 75 * 76 * 0x1988 --> 0xXXNNNNNN 77 * - XX == FP high something 78 */ 79struct nv50_reg { 80 enum { 81 P_TEMP, 82 P_ATTR, 83 P_RESULT, 84 P_CONST, 85 P_IMMD, 86 P_ADDR 87 } type; 88 int index; 89 90 int hw; 91 int neg; 92 93 int rhw; /* result hw for FP outputs, or interpolant index */ 94 int acc; /* instruction where this reg is last read (first insn == 1) */ 95}; 96 97/* arbitrary limits */ 98#define MAX_IF_DEPTH 4 99#define MAX_LOOP_DEPTH 4 100 101struct nv50_pc { 102 struct nv50_program *p; 103 104 /* hw resources */ 105 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 106 struct nv50_reg r_addr[NV50_SU_MAX_ADDR]; 107 108 /* tgsi resources */ 109 struct nv50_reg *temp; 110 int temp_nr; 111 struct nv50_reg *attr; 112 int attr_nr; 113 struct nv50_reg *result; 114 int result_nr; 115 struct nv50_reg *param; 116 int param_nr; 117 struct nv50_reg *immd; 118 float *immd_buf; 119 int immd_nr; 120 struct nv50_reg **addr; 121 int addr_nr; 122 123 struct nv50_reg *temp_temp[16]; 124 unsigned temp_temp_nr; 125 126 /* broadcast and destination replacement regs */ 127 struct nv50_reg *r_brdc; 128 struct nv50_reg *r_dst[4]; 129 130 unsigned interp_mode[32]; 131 /* perspective interpolation registers */ 132 struct nv50_reg *iv_p; 133 struct nv50_reg *iv_c; 134 135 struct nv50_program_exec *if_cond; 136 struct nv50_program_exec *if_insn[MAX_IF_DEPTH]; 137 struct nv50_program_exec *br_join[MAX_IF_DEPTH]; 138 struct nv50_program_exec *br_loop[MAX_LOOP_DEPTH]; /* for BRK branch */ 139 int if_lvl, loop_lvl; 140 unsigned loop_pos[MAX_LOOP_DEPTH]; 141 142 /* current instruction and total number of insns */ 143 unsigned insn_cur; 144 unsigned insn_nr; 145 146 boolean allow32; 147}; 148 149static INLINE void 150ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) 151{ 152 reg->type = type; 153 reg->index = index; 154 reg->hw = hw; 155 reg->neg = 0; 156 reg->rhw = -1; 157 reg->acc = 0; 158} 159 160static INLINE unsigned 161popcnt4(uint32_t val) 162{ 163 static const unsigned cnt[16] 164 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; 165 return cnt[val & 0xf]; 166} 167 168static void 169terminate_mbb(struct nv50_pc *pc) 170{ 171 int i; 172 173 /* remove records of temporary address register values */ 174 for (i = 0; i < NV50_SU_MAX_ADDR; ++i) 175 if (pc->r_addr[i].index < 0) 176 pc->r_addr[i].rhw = -1; 177} 178 179static void 180alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 181{ 182 int i = 0; 183 184 if (reg->type == P_RESULT) { 185 if (pc->p->cfg.high_result < (reg->hw + 1)) 186 pc->p->cfg.high_result = reg->hw + 1; 187 } 188 189 if (reg->type != P_TEMP) 190 return; 191 192 if (reg->hw >= 0) { 193 /*XXX: do this here too to catch FP temp-as-attr usage.. 194 * not clean, but works */ 195 if (pc->p->cfg.high_temp < (reg->hw + 1)) 196 pc->p->cfg.high_temp = reg->hw + 1; 197 return; 198 } 199 200 if (reg->rhw != -1) { 201 /* try to allocate temporary with index rhw first */ 202 if (!(pc->r_temp[reg->rhw])) { 203 pc->r_temp[reg->rhw] = reg; 204 reg->hw = reg->rhw; 205 if (pc->p->cfg.high_temp < (reg->rhw + 1)) 206 pc->p->cfg.high_temp = reg->rhw + 1; 207 return; 208 } 209 /* make sure we don't get things like $r0 needs to go 210 * in $r1 and $r1 in $r0 211 */ 212 i = pc->result_nr * 4; 213 } 214 215 for (; i < NV50_SU_MAX_TEMP; i++) { 216 if (!(pc->r_temp[i])) { 217 pc->r_temp[i] = reg; 218 reg->hw = i; 219 if (pc->p->cfg.high_temp < (i + 1)) 220 pc->p->cfg.high_temp = i + 1; 221 return; 222 } 223 } 224 225 assert(0); 226} 227 228/* XXX: For shaders that aren't executed linearly (e.g. shaders that 229 * contain loops), we need to assign all hw regs to TGSI TEMPs early, 230 * lest we risk temp_temps overwriting regs alloc'd "later". 231 */ 232static struct nv50_reg * 233alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 234{ 235 struct nv50_reg *r; 236 int i; 237 238 if (dst && dst->type == P_TEMP && dst->hw == -1) 239 return dst; 240 241 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 242 if (!pc->r_temp[i]) { 243 r = MALLOC_STRUCT(nv50_reg); 244 ctor_reg(r, P_TEMP, -1, i); 245 pc->r_temp[i] = r; 246 return r; 247 } 248 } 249 250 assert(0); 251 return NULL; 252} 253 254/* Assign the hw of the discarded temporary register src 255 * to the tgsi register dst and free src. 256 */ 257static void 258assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 259{ 260 assert(src->index == -1 && src->hw != -1); 261 262 if (dst->hw != -1) 263 pc->r_temp[dst->hw] = NULL; 264 pc->r_temp[src->hw] = dst; 265 dst->hw = src->hw; 266 267 FREE(src); 268} 269 270/* release the hardware resource held by r */ 271static void 272release_hw(struct nv50_pc *pc, struct nv50_reg *r) 273{ 274 assert(r->type == P_TEMP); 275 if (r->hw == -1) 276 return; 277 278 assert(pc->r_temp[r->hw] == r); 279 pc->r_temp[r->hw] = NULL; 280 281 r->acc = 0; 282 if (r->index == -1) 283 FREE(r); 284} 285 286static void 287free_temp(struct nv50_pc *pc, struct nv50_reg *r) 288{ 289 if (r->index == -1) { 290 unsigned hw = r->hw; 291 292 FREE(pc->r_temp[hw]); 293 pc->r_temp[hw] = NULL; 294 } 295} 296 297static int 298alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) 299{ 300 int i; 301 302 if ((idx + 4) >= NV50_SU_MAX_TEMP) 303 return 1; 304 305 if (pc->r_temp[idx] || pc->r_temp[idx + 1] || 306 pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) 307 return alloc_temp4(pc, dst, idx + 4); 308 309 for (i = 0; i < 4; i++) { 310 dst[i] = MALLOC_STRUCT(nv50_reg); 311 ctor_reg(dst[i], P_TEMP, -1, idx + i); 312 pc->r_temp[idx + i] = dst[i]; 313 } 314 315 return 0; 316} 317 318static void 319free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) 320{ 321 int i; 322 323 for (i = 0; i < 4; i++) 324 free_temp(pc, reg[i]); 325} 326 327static struct nv50_reg * 328temp_temp(struct nv50_pc *pc) 329{ 330 if (pc->temp_temp_nr >= 16) 331 assert(0); 332 333 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 334 return pc->temp_temp[pc->temp_temp_nr++]; 335} 336 337static void 338kill_temp_temp(struct nv50_pc *pc) 339{ 340 int i; 341 342 for (i = 0; i < pc->temp_temp_nr; i++) 343 free_temp(pc, pc->temp_temp[i]); 344 pc->temp_temp_nr = 0; 345} 346 347static int 348ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 349{ 350 pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)), 351 (pc->immd_nr + 1) * 4 * sizeof(float)); 352 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 353 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 354 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 355 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 356 357 return pc->immd_nr++; 358} 359 360static struct nv50_reg * 361alloc_immd(struct nv50_pc *pc, float f) 362{ 363 struct nv50_reg *r = MALLOC_STRUCT(nv50_reg); 364 unsigned hw; 365 366 for (hw = 0; hw < pc->immd_nr * 4; hw++) 367 if (pc->immd_buf[hw] == f) 368 break; 369 370 if (hw == pc->immd_nr * 4) 371 hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4; 372 373 ctor_reg(r, P_IMMD, -1, hw); 374 return r; 375} 376 377static struct nv50_program_exec * 378exec(struct nv50_pc *pc) 379{ 380 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); 381 382 e->param.index = -1; 383 return e; 384} 385 386static void 387emit(struct nv50_pc *pc, struct nv50_program_exec *e) 388{ 389 struct nv50_program *p = pc->p; 390 391 if (p->exec_tail) 392 p->exec_tail->next = e; 393 if (!p->exec_head) 394 p->exec_head = e; 395 p->exec_tail = e; 396 p->exec_size += (e->inst[0] & 1) ? 2 : 1; 397} 398 399static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); 400 401static boolean 402is_long(struct nv50_program_exec *e) 403{ 404 if (e->inst[0] & 1) 405 return TRUE; 406 return FALSE; 407} 408 409static boolean 410is_immd(struct nv50_program_exec *e) 411{ 412 if (is_long(e) && (e->inst[1] & 3) == 3) 413 return TRUE; 414 return FALSE; 415} 416 417static INLINE void 418set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, 419 struct nv50_program_exec *e) 420{ 421 set_long(pc, e); 422 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 423 e->inst[1] |= (pred << 7) | (idx << 12); 424} 425 426static INLINE void 427set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, 428 struct nv50_program_exec *e) 429{ 430 set_long(pc, e); 431 e->inst[1] &= ~((0x3 << 4) | (1 << 6)); 432 e->inst[1] |= (idx << 4) | (on << 6); 433} 434 435static INLINE void 436set_long(struct nv50_pc *pc, struct nv50_program_exec *e) 437{ 438 if (is_long(e)) 439 return; 440 441 e->inst[0] |= 1; 442 set_pred(pc, 0xf, 0, e); 443 set_pred_wr(pc, 0, 0, e); 444} 445 446static INLINE void 447set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) 448{ 449 if (dst->type == P_RESULT) { 450 set_long(pc, e); 451 e->inst[1] |= 0x00000008; 452 } 453 454 alloc_reg(pc, dst); 455 if (dst->hw > 63) 456 set_long(pc, e); 457 e->inst[0] |= (dst->hw << 2); 458} 459 460static INLINE void 461set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) 462{ 463 float f = pc->immd_buf[imm->hw]; 464 unsigned val = fui(imm->neg ? -f : f); 465 466 set_long(pc, e); 467 /*XXX: can't be predicated - bits overlap.. catch cases where both 468 * are required and avoid them. */ 469 set_pred(pc, 0, 0, e); 470 set_pred_wr(pc, 0, 0, e); 471 472 e->inst[1] |= 0x00000002 | 0x00000001; 473 e->inst[0] |= (val & 0x3f) << 16; 474 e->inst[1] |= (val >> 6) << 2; 475} 476 477static INLINE void 478set_addr(struct nv50_program_exec *e, struct nv50_reg *a) 479{ 480 assert(!(e->inst[0] & 0x0c000000)); 481 assert(!(e->inst[1] & 0x00000004)); 482 483 e->inst[0] |= (a->hw & 3) << 26; 484 e->inst[1] |= (a->hw >> 2) << 2; 485} 486 487static void 488emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, 489 struct nv50_reg *src0, uint16_t src1_val) 490{ 491 struct nv50_program_exec *e = exec(pc); 492 493 e->inst[0] = 0xd0000000 | (src1_val << 9); 494 e->inst[1] = 0x20000000; 495 set_long(pc, e); 496 e->inst[0] |= dst->hw << 2; 497 if (src0) /* otherwise will add to $a0, which is always 0 */ 498 set_addr(e, src0); 499 500 emit(pc, e); 501} 502 503static struct nv50_reg * 504alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) 505{ 506 int i; 507 struct nv50_reg *a_tgsi = NULL, *a = NULL; 508 509 if (!ref) { 510 /* allocate for TGSI address reg */ 511 for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { 512 if (pc->r_addr[i].index >= 0) 513 continue; 514 if (pc->r_addr[i].rhw >= 0 && 515 pc->r_addr[i].acc == pc->insn_cur) 516 continue; 517 518 pc->r_addr[i].rhw = -1; 519 pc->r_addr[i].index = i; 520 return &pc->r_addr[i]; 521 } 522 assert(0); 523 return NULL; 524 } 525 526 /* Allocate and set an address reg so we can access 'ref'. 527 * 528 * If and r_addr has index < 0, it is not reserved for TGSI, 529 * and index will be the negative of the TGSI addr index the 530 * value in rhw is relative to, or -256 if rhw is an offset 531 * from 0. If rhw < 0, the reg has not been initialized. 532 */ 533 for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) { 534 if (pc->r_addr[i].index >= 0) /* occupied for TGSI */ 535 continue; 536 if (pc->r_addr[i].rhw < 0) { /* unused */ 537 a = &pc->r_addr[i]; 538 continue; 539 } 540 if (!a && pc->r_addr[i].acc != pc->insn_cur) 541 a = &pc->r_addr[i]; 542 543 if (ref->hw - pc->r_addr[i].rhw >= 128) 544 continue; 545 546 if ((ref->acc >= 0 && pc->r_addr[i].index == -256) || 547 (ref->acc < 0 && -pc->r_addr[i].index == ref->index)) { 548 pc->r_addr[i].acc = pc->insn_cur; 549 return &pc->r_addr[i]; 550 } 551 } 552 assert(a); 553 554 if (ref->acc < 0) 555 a_tgsi = pc->addr[ref->index]; 556 557 emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); 558 559 a->rhw = ref->hw & ~0x7f; 560 a->acc = pc->insn_cur; 561 a->index = a_tgsi ? -ref->index : -256; 562 return a; 563} 564 565#define INTERP_LINEAR 0 566#define INTERP_FLAT 1 567#define INTERP_PERSPECTIVE 2 568#define INTERP_CENTROID 4 569 570/* interpolant index has been stored in dst->rhw */ 571static void 572emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv, 573 unsigned mode) 574{ 575 assert(dst->rhw != -1); 576 struct nv50_program_exec *e = exec(pc); 577 578 e->inst[0] |= 0x80000000; 579 set_dst(pc, dst, e); 580 e->inst[0] |= (dst->rhw << 16); 581 582 if (mode & INTERP_FLAT) { 583 e->inst[0] |= (1 << 8); 584 } else { 585 if (mode & INTERP_PERSPECTIVE) { 586 e->inst[0] |= (1 << 25); 587 alloc_reg(pc, iv); 588 e->inst[0] |= (iv->hw << 9); 589 } 590 591 if (mode & INTERP_CENTROID) 592 e->inst[0] |= (1 << 24); 593 } 594 595 emit(pc, e); 596} 597 598static void 599set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, 600 struct nv50_program_exec *e) 601{ 602 set_long(pc, e); 603 604 e->param.index = src->hw & 127; 605 e->param.shift = s; 606 e->param.mask = m << (s % 32); 607 608 if (src->hw > 127) 609 set_addr(e, alloc_addr(pc, src)); 610 else 611 if (src->acc < 0) { 612 assert(src->type == P_CONST); 613 set_addr(e, pc->addr[src->index]); 614 } 615 616 e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); 617} 618 619static void 620emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 621{ 622 struct nv50_program_exec *e = exec(pc); 623 624 e->inst[0] = 0x10000000; 625 if (!pc->allow32) 626 set_long(pc, e); 627 628 set_dst(pc, dst, e); 629 630 if (!is_long(e) && src->type == P_IMMD) { 631 set_immd(pc, src, e); 632 /*XXX: 32-bit, but steals part of "half" reg space - need to 633 * catch and handle this case if/when we do half-regs 634 */ 635 } else 636 if (src->type == P_IMMD || src->type == P_CONST) { 637 set_long(pc, e); 638 set_data(pc, src, 0x7f, 9, e); 639 e->inst[1] |= 0x20000000; /* src0 const? */ 640 } else { 641 if (src->type == P_ATTR) { 642 set_long(pc, e); 643 e->inst[1] |= 0x00200000; 644 } 645 646 alloc_reg(pc, src); 647 if (src->hw > 63) 648 set_long(pc, e); 649 e->inst[0] |= (src->hw << 9); 650 } 651 652 if (is_long(e) && !is_immd(e)) { 653 e->inst[1] |= 0x04000000; /* 32-bit */ 654 e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */ 655 if (!(e->inst[1] & 0x20000000)) 656 e->inst[1] |= 0x00030000; /* "subsubop" 0xf */ 657 } else 658 e->inst[0] |= 0x00008000; 659 660 emit(pc, e); 661} 662 663static INLINE void 664emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) 665{ 666 struct nv50_reg *imm = alloc_immd(pc, f); 667 emit_mov(pc, dst, imm); 668 FREE(imm); 669} 670 671static boolean 672check_swap_src_0_1(struct nv50_pc *pc, 673 struct nv50_reg **s0, struct nv50_reg **s1) 674{ 675 struct nv50_reg *src0 = *s0, *src1 = *s1; 676 677 if (src0->type == P_CONST) { 678 if (src1->type != P_CONST) { 679 *s0 = src1; 680 *s1 = src0; 681 return TRUE; 682 } 683 } else 684 if (src1->type == P_ATTR) { 685 if (src0->type != P_ATTR) { 686 *s0 = src1; 687 *s1 = src0; 688 return TRUE; 689 } 690 } 691 692 return FALSE; 693} 694 695static void 696set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, 697 struct nv50_program_exec *e) 698{ 699 struct nv50_reg *temp; 700 701 if (src->type != P_TEMP) { 702 temp = temp_temp(pc); 703 emit_mov(pc, temp, src); 704 src = temp; 705 } 706 707 alloc_reg(pc, src); 708 if (src->hw > 63) 709 set_long(pc, e); 710 e->inst[0] |= (src->hw << 9); 711} 712 713static void 714set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 715{ 716 if (src->type == P_ATTR) { 717 set_long(pc, e); 718 e->inst[1] |= 0x00200000; 719 } else 720 if (src->type == P_CONST || src->type == P_IMMD) { 721 struct nv50_reg *temp = temp_temp(pc); 722 723 emit_mov(pc, temp, src); 724 src = temp; 725 } 726 727 alloc_reg(pc, src); 728 if (src->hw > 63) 729 set_long(pc, e); 730 e->inst[0] |= (src->hw << 9); 731} 732 733static void 734set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 735{ 736 if (src->type == P_ATTR) { 737 struct nv50_reg *temp = temp_temp(pc); 738 739 emit_mov(pc, temp, src); 740 src = temp; 741 } else 742 if (src->type == P_CONST || src->type == P_IMMD) { 743 assert(!(e->inst[0] & 0x00800000)); 744 if (e->inst[0] & 0x01000000) { 745 struct nv50_reg *temp = temp_temp(pc); 746 747 emit_mov(pc, temp, src); 748 src = temp; 749 } else { 750 set_data(pc, src, 0x7f, 16, e); 751 e->inst[0] |= 0x00800000; 752 } 753 } 754 755 alloc_reg(pc, src); 756 if (src->hw > 63) 757 set_long(pc, e); 758 e->inst[0] |= ((src->hw & 127) << 16); 759} 760 761static void 762set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 763{ 764 set_long(pc, e); 765 766 if (src->type == P_ATTR) { 767 struct nv50_reg *temp = temp_temp(pc); 768 769 emit_mov(pc, temp, src); 770 src = temp; 771 } else 772 if (src->type == P_CONST || src->type == P_IMMD) { 773 assert(!(e->inst[0] & 0x01000000)); 774 if (e->inst[0] & 0x00800000) { 775 struct nv50_reg *temp = temp_temp(pc); 776 777 emit_mov(pc, temp, src); 778 src = temp; 779 } else { 780 set_data(pc, src, 0x7f, 32+14, e); 781 e->inst[0] |= 0x01000000; 782 } 783 } 784 785 alloc_reg(pc, src); 786 e->inst[1] |= ((src->hw & 127) << 14); 787} 788 789static void 790emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 791 struct nv50_reg *src1) 792{ 793 struct nv50_program_exec *e = exec(pc); 794 795 e->inst[0] |= 0xc0000000; 796 797 if (!pc->allow32) 798 set_long(pc, e); 799 800 check_swap_src_0_1(pc, &src0, &src1); 801 set_dst(pc, dst, e); 802 set_src_0(pc, src0, e); 803 if (src1->type == P_IMMD && !is_long(e)) { 804 if (src0->neg) 805 e->inst[0] |= 0x00008000; 806 set_immd(pc, src1, e); 807 } else { 808 set_src_1(pc, src1, e); 809 if (src0->neg ^ src1->neg) { 810 if (is_long(e)) 811 e->inst[1] |= 0x08000000; 812 else 813 e->inst[0] |= 0x00008000; 814 } 815 } 816 817 emit(pc, e); 818} 819 820static void 821emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 822 struct nv50_reg *src0, struct nv50_reg *src1) 823{ 824 struct nv50_program_exec *e = exec(pc); 825 826 e->inst[0] = 0xb0000000; 827 828 alloc_reg(pc, src1); 829 check_swap_src_0_1(pc, &src0, &src1); 830 831 if (!pc->allow32 || (src0->neg | src1->neg) || src1->hw > 63) { 832 set_long(pc, e); 833 e->inst[1] |= (src0->neg << 26) | (src1->neg << 27); 834 } 835 836 set_dst(pc, dst, e); 837 set_src_0(pc, src0, e); 838 if (src1->type == P_CONST || src1->type == P_ATTR || is_long(e)) 839 set_src_2(pc, src1, e); 840 else 841 if (src1->type == P_IMMD) 842 set_immd(pc, src1, e); 843 else 844 set_src_1(pc, src1, e); 845 846 emit(pc, e); 847} 848 849static void 850emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, 851 uint8_t s) 852{ 853 struct nv50_program_exec *e = exec(pc); 854 855 set_long(pc, e); 856 e->inst[1] |= 0xc0000000; 857 858 e->inst[0] |= dst->hw << 2; 859 e->inst[0] |= s << 16; /* shift left */ 860 set_src_0_restricted(pc, src, e); 861 862 emit(pc, e); 863} 864 865static void 866emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 867 struct nv50_reg *src0, struct nv50_reg *src1) 868{ 869 struct nv50_program_exec *e = exec(pc); 870 871 set_long(pc, e); 872 e->inst[0] |= 0xb0000000; 873 e->inst[1] |= (sub << 29); 874 875 check_swap_src_0_1(pc, &src0, &src1); 876 set_dst(pc, dst, e); 877 set_src_0(pc, src0, e); 878 set_src_1(pc, src1, e); 879 880 emit(pc, e); 881} 882 883static INLINE void 884emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 885 struct nv50_reg *src1) 886{ 887 assert(src0 != src1); 888 src1->neg ^= 1; 889 emit_add(pc, dst, src0, src1); 890 src1->neg ^= 1; 891} 892 893static void 894emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 895 struct nv50_reg *src1, struct nv50_reg *src2) 896{ 897 struct nv50_program_exec *e = exec(pc); 898 899 e->inst[0] |= 0xe0000000; 900 901 check_swap_src_0_1(pc, &src0, &src1); 902 set_dst(pc, dst, e); 903 set_src_0(pc, src0, e); 904 set_src_1(pc, src1, e); 905 set_src_2(pc, src2, e); 906 907 if (src0->neg ^ src1->neg) 908 e->inst[1] |= 0x04000000; 909 if (src2->neg) 910 e->inst[1] |= 0x08000000; 911 912 emit(pc, e); 913} 914 915static INLINE void 916emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 917 struct nv50_reg *src1, struct nv50_reg *src2) 918{ 919 assert(src2 != src0 && src2 != src1); 920 src2->neg ^= 1; 921 emit_mad(pc, dst, src0, src1, src2); 922 src2->neg ^= 1; 923} 924 925static void 926emit_flop(struct nv50_pc *pc, unsigned sub, 927 struct nv50_reg *dst, struct nv50_reg *src) 928{ 929 struct nv50_program_exec *e = exec(pc); 930 931 e->inst[0] |= 0x90000000; 932 if (sub) { 933 set_long(pc, e); 934 e->inst[1] |= (sub << 29); 935 } 936 937 set_dst(pc, dst, e); 938 939 if (sub == 0 || sub == 2) 940 set_src_0_restricted(pc, src, e); 941 else 942 set_src_0(pc, src, e); 943 944 emit(pc, e); 945} 946 947static void 948emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 949{ 950 struct nv50_program_exec *e = exec(pc); 951 952 e->inst[0] |= 0xb0000000; 953 954 set_dst(pc, dst, e); 955 set_src_0(pc, src, e); 956 set_long(pc, e); 957 e->inst[1] |= (6 << 29) | 0x00004000; 958 959 emit(pc, e); 960} 961 962static void 963emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 964{ 965 struct nv50_program_exec *e = exec(pc); 966 967 e->inst[0] |= 0xb0000000; 968 969 set_dst(pc, dst, e); 970 set_src_0(pc, src, e); 971 set_long(pc, e); 972 e->inst[1] |= (6 << 29); 973 974 emit(pc, e); 975} 976 977#define CVTOP_RN 0x01 978#define CVTOP_FLOOR 0x03 979#define CVTOP_CEIL 0x05 980#define CVTOP_TRUNC 0x07 981#define CVTOP_SAT 0x08 982#define CVTOP_ABS 0x10 983 984/* 0x04 == 32 bit dst */ 985/* 0x40 == dst is float */ 986/* 0x80 == src is float */ 987#define CVT_F32_F32 0xc4 988#define CVT_F32_S32 0x44 989#define CVT_S32_F32 0x8c 990#define CVT_S32_S32 0x0c 991#define CVT_NEG 0x20 992#define CVT_RI 0x08 993 994static void 995emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, 996 int wp, unsigned cvn, unsigned fmt) 997{ 998 struct nv50_program_exec *e; 999 1000 e = exec(pc); 1001 set_long(pc, e); 1002 1003 e->inst[0] |= 0xa0000000; 1004 e->inst[1] |= 0x00004000; /* 32 bit src */ 1005 e->inst[1] |= (cvn << 16); 1006 e->inst[1] |= (fmt << 24); 1007 set_src_0(pc, src, e); 1008 1009 if (wp >= 0) 1010 set_pred_wr(pc, 1, wp, e); 1011 1012 if (dst) 1013 set_dst(pc, dst, e); 1014 else { 1015 e->inst[0] |= 0x000001fc; 1016 e->inst[1] |= 0x00000008; 1017 } 1018 1019 emit(pc, e); 1020} 1021 1022/* nv50 Condition codes: 1023 * 0x1 = LT 1024 * 0x2 = EQ 1025 * 0x3 = LE 1026 * 0x4 = GT 1027 * 0x5 = NE 1028 * 0x6 = GE 1029 * 0x7 = set condition code ? (used before bra.lt/le/gt/ge) 1030 * 0x8 = unordered bit (allows NaN) 1031 */ 1032static void 1033emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp, 1034 struct nv50_reg *src0, struct nv50_reg *src1) 1035{ 1036 static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; 1037 1038 struct nv50_program_exec *e = exec(pc); 1039 struct nv50_reg *rdst; 1040 1041 assert(ccode < 16); 1042 if (check_swap_src_0_1(pc, &src0, &src1)) 1043 ccode = cc_swapped[ccode & 7] | (ccode & 8); 1044 1045 rdst = dst; 1046 if (dst && dst->type != P_TEMP) 1047 dst = alloc_temp(pc, NULL); 1048 1049 /* set.u32 */ 1050 set_long(pc, e); 1051 e->inst[0] |= 0xb0000000; 1052 e->inst[1] |= 0x60000000 | (ccode << 14); 1053 1054 /* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but 1055 * that doesn't seem to match what the hw actually does 1056 e->inst[1] |= 0x04000000; << breaks things, u32 by default ? 1057 */ 1058 1059 if (wp >= 0) 1060 set_pred_wr(pc, 1, wp, e); 1061 if (dst) 1062 set_dst(pc, dst, e); 1063 else { 1064 e->inst[0] |= 0x000001fc; 1065 e->inst[1] |= 0x00000008; 1066 } 1067 1068 set_src_0(pc, src0, e); 1069 set_src_1(pc, src1, e); 1070 1071 emit(pc, e); 1072 pc->if_cond = pc->p->exec_tail; /* record for OPCODE_IF */ 1073 1074 /* cvt.f32.u32/s32 (?) if we didn't only write the predicate */ 1075 if (rdst) 1076 emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32); 1077 if (rdst && rdst != dst) 1078 free_temp(pc, dst); 1079} 1080 1081static INLINE unsigned 1082map_tgsi_setop_cc(unsigned op) 1083{ 1084 switch (op) { 1085 case TGSI_OPCODE_SLT: return 0x1; 1086 case TGSI_OPCODE_SGE: return 0x6; 1087 case TGSI_OPCODE_SEQ: return 0x2; 1088 case TGSI_OPCODE_SGT: return 0x4; 1089 case TGSI_OPCODE_SLE: return 0x3; 1090 case TGSI_OPCODE_SNE: return 0xd; 1091 default: 1092 assert(0); 1093 return 0; 1094 } 1095} 1096 1097static INLINE void 1098emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 1099{ 1100 emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI); 1101} 1102 1103static void 1104emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 1105 struct nv50_reg *v, struct nv50_reg *e) 1106{ 1107 struct nv50_reg *temp = alloc_temp(pc, NULL); 1108 1109 emit_flop(pc, 3, temp, v); 1110 emit_mul(pc, temp, temp, e); 1111 emit_preex2(pc, temp, temp); 1112 emit_flop(pc, 6, dst, temp); 1113 1114 free_temp(pc, temp); 1115} 1116 1117static INLINE void 1118emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 1119{ 1120 emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32); 1121} 1122 1123static INLINE void 1124emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 1125{ 1126 emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32); 1127} 1128 1129static void 1130emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, 1131 struct nv50_reg **src) 1132{ 1133 struct nv50_reg *one = alloc_immd(pc, 1.0); 1134 struct nv50_reg *zero = alloc_immd(pc, 0.0); 1135 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 1136 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 1137 struct nv50_reg *tmp[4]; 1138 boolean allow32 = pc->allow32; 1139 1140 pc->allow32 = FALSE; 1141 1142 if (mask & (3 << 1)) { 1143 tmp[0] = alloc_temp(pc, NULL); 1144 emit_minmax(pc, 4, tmp[0], src[0], zero); 1145 } 1146 1147 if (mask & (1 << 2)) { 1148 set_pred_wr(pc, 1, 0, pc->p->exec_tail); 1149 1150 tmp[1] = temp_temp(pc); 1151 emit_minmax(pc, 4, tmp[1], src[1], zero); 1152 1153 tmp[3] = temp_temp(pc); 1154 emit_minmax(pc, 4, tmp[3], src[3], neg128); 1155 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 1156 1157 emit_pow(pc, dst[2], tmp[1], tmp[3]); 1158 emit_mov(pc, dst[2], zero); 1159 set_pred(pc, 3, 0, pc->p->exec_tail); 1160 } 1161 1162 if (mask & (1 << 1)) 1163 assimilate_temp(pc, dst[1], tmp[0]); 1164 else 1165 if (mask & (1 << 2)) 1166 free_temp(pc, tmp[0]); 1167 1168 pc->allow32 = allow32; 1169 1170 /* do this last, in case src[i,j] == dst[0,3] */ 1171 if (mask & (1 << 0)) 1172 emit_mov(pc, dst[0], one); 1173 1174 if (mask & (1 << 3)) 1175 emit_mov(pc, dst[3], one); 1176 1177 FREE(pos128); 1178 FREE(neg128); 1179 FREE(zero); 1180 FREE(one); 1181} 1182 1183static INLINE void 1184emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 1185{ 1186 emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG); 1187} 1188 1189static void 1190emit_kil(struct nv50_pc *pc, struct nv50_reg *src) 1191{ 1192 struct nv50_program_exec *e; 1193 const int r_pred = 1; 1194 unsigned cvn = CVT_F32_F32; 1195 1196 if (src->neg) 1197 cvn |= CVT_NEG; 1198 /* write predicate reg */ 1199 emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn); 1200 1201 /* conditional discard */ 1202 e = exec(pc); 1203 e->inst[0] = 0x00000002; 1204 set_long(pc, e); 1205 set_pred(pc, 0x1 /* LT */, r_pred, e); 1206 emit(pc, e); 1207} 1208 1209static void 1210emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, 1211 struct nv50_reg **src, unsigned unit, unsigned type, boolean proj) 1212{ 1213 struct nv50_reg *temp, *t[4]; 1214 struct nv50_program_exec *e; 1215 1216 unsigned c, mode, dim; 1217 1218 switch (type) { 1219 case TGSI_TEXTURE_1D: 1220 dim = 1; 1221 break; 1222 case TGSI_TEXTURE_UNKNOWN: 1223 case TGSI_TEXTURE_2D: 1224 case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */ 1225 case TGSI_TEXTURE_RECT: 1226 dim = 2; 1227 break; 1228 case TGSI_TEXTURE_3D: 1229 case TGSI_TEXTURE_CUBE: 1230 case TGSI_TEXTURE_SHADOW2D: 1231 case TGSI_TEXTURE_SHADOWRECT: /* XXX */ 1232 dim = 3; 1233 break; 1234 default: 1235 assert(0); 1236 break; 1237 } 1238 1239 /* some cards need t[0]'s hw index to be a multiple of 4 */ 1240 alloc_temp4(pc, t, 0); 1241 1242 if (proj) { 1243 if (src[0]->type == P_TEMP && src[0]->rhw != -1) { 1244 mode = pc->interp_mode[src[0]->index]; 1245 1246 t[3]->rhw = src[3]->rhw; 1247 emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); 1248 emit_flop(pc, 0, t[3], t[3]); 1249 1250 for (c = 0; c < dim; c++) { 1251 t[c]->rhw = src[c]->rhw; 1252 emit_interp(pc, t[c], t[3], 1253 (mode | INTERP_PERSPECTIVE)); 1254 } 1255 } else { 1256 emit_flop(pc, 0, t[3], src[3]); 1257 for (c = 0; c < dim; c++) 1258 emit_mul(pc, t[c], src[c], t[3]); 1259 1260 /* XXX: for some reason the blob sometimes uses MAD: 1261 * emit_mad(pc, t[c], src[0][c], t[3], t[3]) 1262 * pc->p->exec_tail->inst[1] |= 0x080fc000; 1263 */ 1264 } 1265 } else { 1266 if (type == TGSI_TEXTURE_CUBE) { 1267 temp = temp_temp(pc); 1268 emit_minmax(pc, 4, temp, src[0], src[1]); 1269 emit_minmax(pc, 4, temp, temp, src[2]); 1270 emit_flop(pc, 0, temp, temp); 1271 for (c = 0; c < 3; c++) 1272 emit_mul(pc, t[c], src[c], temp); 1273 } else { 1274 for (c = 0; c < dim; c++) 1275 emit_mov(pc, t[c], src[c]); 1276 } 1277 } 1278 1279 e = exec(pc); 1280 set_long(pc, e); 1281 e->inst[0] |= 0xf0000000; 1282 e->inst[1] |= 0x00000004; 1283 set_dst(pc, t[0], e); 1284 e->inst[0] |= (unit << 9); 1285 1286 if (dim == 2) 1287 e->inst[0] |= 0x00400000; 1288 else 1289 if (dim == 3) 1290 e->inst[0] |= 0x00800000; 1291 1292 e->inst[0] |= (mask & 0x3) << 25; 1293 e->inst[1] |= (mask & 0xc) << 12; 1294 1295 emit(pc, e); 1296 1297#if 1 1298 c = 0; 1299 if (mask & 1) emit_mov(pc, dst[0], t[c++]); 1300 if (mask & 2) emit_mov(pc, dst[1], t[c++]); 1301 if (mask & 4) emit_mov(pc, dst[2], t[c++]); 1302 if (mask & 8) emit_mov(pc, dst[3], t[c]); 1303 1304 free_temp4(pc, t); 1305#else 1306 /* XXX: if p.e. MUL is used directly after TEX, it would still use 1307 * the texture coordinates, not the fetched values: latency ? */ 1308 1309 for (c = 0; c < 4; c++) { 1310 if (mask & (1 << c)) 1311 assimilate_temp(pc, dst[c], t[c]); 1312 else 1313 free_temp(pc, t[c]); 1314 } 1315#endif 1316} 1317 1318static void 1319emit_branch(struct nv50_pc *pc, int pred, unsigned cc, 1320 struct nv50_program_exec **join) 1321{ 1322 struct nv50_program_exec *e = exec(pc); 1323 1324 if (join) { 1325 set_long(pc, e); 1326 e->inst[0] |= 0xa0000002; 1327 emit(pc, e); 1328 *join = e; 1329 e = exec(pc); 1330 } 1331 1332 set_long(pc, e); 1333 e->inst[0] |= 0x10000002; 1334 if (pred >= 0) 1335 set_pred(pc, cc, pred, e); 1336 emit(pc, e); 1337} 1338 1339static void 1340emit_nop(struct nv50_pc *pc) 1341{ 1342 struct nv50_program_exec *e = exec(pc); 1343 1344 e->inst[0] = 0xf0000000; 1345 set_long(pc, e); 1346 e->inst[1] = 0xe0000000; 1347 emit(pc, e); 1348} 1349 1350static void 1351emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 1352{ 1353 struct nv50_program_exec *e = exec(pc); 1354 1355 assert(src->type == P_TEMP); 1356 1357 e->inst[0] = 0xc0140000; 1358 e->inst[1] = 0x89800000; 1359 set_long(pc, e); 1360 set_dst(pc, dst, e); 1361 set_src_0(pc, src, e); 1362 set_src_2(pc, src, e); 1363 1364 emit(pc, e); 1365} 1366 1367static void 1368emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 1369{ 1370 struct nv50_program_exec *e = exec(pc); 1371 1372 assert(src->type == P_TEMP); 1373 1374 if (!src->neg) /* ! double negation */ 1375 emit_neg(pc, src, src); 1376 1377 e->inst[0] = 0xc0150000; 1378 e->inst[1] = 0x8a400000; 1379 set_long(pc, e); 1380 set_dst(pc, dst, e); 1381 set_src_0(pc, src, e); 1382 set_src_2(pc, src, e); 1383 1384 emit(pc, e); 1385} 1386 1387static void 1388convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) 1389{ 1390 unsigned q = 0, m = ~0; 1391 1392 assert(!is_long(e)); 1393 1394 switch (e->inst[0] >> 28) { 1395 case 0x1: 1396 /* MOV */ 1397 q = 0x0403c000; 1398 m = 0xffff7fff; 1399 break; 1400 case 0x8: 1401 /* INTERP (move centroid, perspective and flat bits) */ 1402 m = ~0x03000100; 1403 q = (e->inst[0] & (3 << 24)) >> (24 - 16); 1404 q |= (e->inst[0] & (1 << 8)) << (18 - 8); 1405 break; 1406 case 0x9: 1407 /* RCP */ 1408 break; 1409 case 0xB: 1410 /* ADD */ 1411 m = ~(127 << 16); 1412 q = ((e->inst[0] & (~m)) >> 2); 1413 break; 1414 case 0xC: 1415 /* MUL */ 1416 m = ~0x00008000; 1417 q = ((e->inst[0] & (~m)) << 12); 1418 break; 1419 case 0xE: 1420 /* MAD (if src2 == dst) */ 1421 q = ((e->inst[0] & 0x1fc) << 12); 1422 break; 1423 default: 1424 assert(0); 1425 break; 1426 } 1427 1428 set_long(pc, e); 1429 pc->p->exec_size++; 1430 1431 e->inst[0] &= m; 1432 e->inst[1] |= q; 1433} 1434 1435/* Some operations support an optional negation flag. */ 1436static boolean 1437negate_supported(const struct tgsi_full_instruction *insn, int i) 1438{ 1439 int s; 1440 1441 switch (insn->Instruction.Opcode) { 1442 case TGSI_OPCODE_DDY: 1443 case TGSI_OPCODE_DP3: 1444 case TGSI_OPCODE_DP4: 1445 case TGSI_OPCODE_MUL: 1446 case TGSI_OPCODE_KIL: 1447 case TGSI_OPCODE_ADD: 1448 case TGSI_OPCODE_SUB: 1449 case TGSI_OPCODE_MAD: 1450 break; 1451 case TGSI_OPCODE_POW: 1452 if (i == 1) 1453 break; 1454 return FALSE; 1455 default: 1456 return FALSE; 1457 } 1458 1459 /* Watch out for possible multiple uses of an nv50_reg, we 1460 * can't use nv50_reg::neg in these cases. 1461 */ 1462 for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) { 1463 if (s == i) 1464 continue; 1465 if ((insn->FullSrcRegisters[s].SrcRegister.Index == 1466 insn->FullSrcRegisters[i].SrcRegister.Index) && 1467 (insn->FullSrcRegisters[s].SrcRegister.File == 1468 insn->FullSrcRegisters[i].SrcRegister.File)) 1469 return FALSE; 1470 } 1471 1472 return TRUE; 1473} 1474 1475/* Return a read mask for source registers deduced from opcode & write mask. */ 1476static unsigned 1477nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) 1478{ 1479 unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask; 1480 1481 switch (insn->Instruction.Opcode) { 1482 case TGSI_OPCODE_COS: 1483 case TGSI_OPCODE_SIN: 1484 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); 1485 case TGSI_OPCODE_DP3: 1486 return 0x7; 1487 case TGSI_OPCODE_DP4: 1488 case TGSI_OPCODE_DPH: 1489 case TGSI_OPCODE_KIL: /* WriteMask ignored */ 1490 return 0xf; 1491 case TGSI_OPCODE_DST: 1492 return mask & (c ? 0xa : 0x6); 1493 case TGSI_OPCODE_EX2: 1494 case TGSI_OPCODE_LG2: 1495 case TGSI_OPCODE_POW: 1496 case TGSI_OPCODE_RCP: 1497 case TGSI_OPCODE_RSQ: 1498 case TGSI_OPCODE_SCS: 1499 return 0x1; 1500 case TGSI_OPCODE_LIT: 1501 return 0xb; 1502 case TGSI_OPCODE_TEX: 1503 case TGSI_OPCODE_TXP: 1504 { 1505 const struct tgsi_instruction_ext_texture *tex; 1506 1507 assert(insn->Instruction.Extended); 1508 tex = &insn->InstructionExtTexture; 1509 1510 mask = 0x7; 1511 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) 1512 mask |= 0x8; 1513 1514 switch (tex->Texture) { 1515 case TGSI_TEXTURE_1D: 1516 mask &= 0x9; 1517 break; 1518 case TGSI_TEXTURE_2D: 1519 mask &= 0xb; 1520 break; 1521 default: 1522 break; 1523 } 1524 } 1525 return mask; 1526 case TGSI_OPCODE_XPD: 1527 x = 0; 1528 if (mask & 1) x |= 0x6; 1529 if (mask & 2) x |= 0x5; 1530 if (mask & 4) x |= 0x3; 1531 return x; 1532 default: 1533 break; 1534 } 1535 1536 return mask; 1537} 1538 1539static struct nv50_reg * 1540tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 1541{ 1542 switch (dst->DstRegister.File) { 1543 case TGSI_FILE_TEMPORARY: 1544 return &pc->temp[dst->DstRegister.Index * 4 + c]; 1545 case TGSI_FILE_OUTPUT: 1546 return &pc->result[dst->DstRegister.Index * 4 + c]; 1547 case TGSI_FILE_ADDRESS: 1548 { 1549 struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c]; 1550 if (!r) { 1551 r = alloc_addr(pc, NULL); 1552 pc->addr[dst->DstRegister.Index * 4 + c] = r; 1553 } 1554 assert(r); 1555 return r; 1556 } 1557 case TGSI_FILE_NULL: 1558 return NULL; 1559 default: 1560 break; 1561 } 1562 1563 return NULL; 1564} 1565 1566static struct nv50_reg * 1567tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, 1568 boolean neg) 1569{ 1570 struct nv50_reg *r = NULL; 1571 struct nv50_reg *temp; 1572 unsigned sgn, c, swz; 1573 1574 if (src->SrcRegister.File != TGSI_FILE_CONSTANT) 1575 assert(!src->SrcRegister.Indirect); 1576 1577 sgn = tgsi_util_get_full_src_register_sign_mode(src, chan); 1578 1579 c = tgsi_util_get_full_src_register_swizzle(src, chan); 1580 switch (c) { 1581 case TGSI_SWIZZLE_X: 1582 case TGSI_SWIZZLE_Y: 1583 case TGSI_SWIZZLE_Z: 1584 case TGSI_SWIZZLE_W: 1585 switch (src->SrcRegister.File) { 1586 case TGSI_FILE_INPUT: 1587 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 1588 break; 1589 case TGSI_FILE_TEMPORARY: 1590 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 1591 break; 1592 case TGSI_FILE_CONSTANT: 1593 if (!src->SrcRegister.Indirect) { 1594 r = &pc->param[src->SrcRegister.Index * 4 + c]; 1595 break; 1596 } 1597 /* Indicate indirection by setting r->acc < 0 and 1598 * use the index field to select the address reg. 1599 */ 1600 r = MALLOC_STRUCT(nv50_reg); 1601 swz = tgsi_util_get_src_register_swizzle( 1602 &src->SrcRegisterInd, 0); 1603 ctor_reg(r, P_CONST, 1604 src->SrcRegisterInd.Index * 4 + swz, 1605 src->SrcRegister.Index * 4 + c); 1606 r->acc = -1; 1607 break; 1608 case TGSI_FILE_IMMEDIATE: 1609 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 1610 break; 1611 case TGSI_FILE_SAMPLER: 1612 break; 1613 case TGSI_FILE_ADDRESS: 1614 r = pc->addr[src->SrcRegister.Index * 4 + c]; 1615 assert(r); 1616 break; 1617 default: 1618 assert(0); 1619 break; 1620 } 1621 break; 1622 default: 1623 assert(0); 1624 break; 1625 } 1626 1627 switch (sgn) { 1628 case TGSI_UTIL_SIGN_KEEP: 1629 break; 1630 case TGSI_UTIL_SIGN_CLEAR: 1631 temp = temp_temp(pc); 1632 emit_abs(pc, temp, r); 1633 r = temp; 1634 break; 1635 case TGSI_UTIL_SIGN_TOGGLE: 1636 if (neg) 1637 r->neg = 1; 1638 else { 1639 temp = temp_temp(pc); 1640 emit_neg(pc, temp, r); 1641 r = temp; 1642 } 1643 break; 1644 case TGSI_UTIL_SIGN_SET: 1645 temp = temp_temp(pc); 1646 emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG); 1647 r = temp; 1648 break; 1649 default: 1650 assert(0); 1651 break; 1652 } 1653 1654 return r; 1655} 1656 1657/* return TRUE for ops that produce only a single result */ 1658static boolean 1659is_scalar_op(unsigned op) 1660{ 1661 switch (op) { 1662 case TGSI_OPCODE_COS: 1663 case TGSI_OPCODE_DP2: 1664 case TGSI_OPCODE_DP3: 1665 case TGSI_OPCODE_DP4: 1666 case TGSI_OPCODE_DPH: 1667 case TGSI_OPCODE_EX2: 1668 case TGSI_OPCODE_LG2: 1669 case TGSI_OPCODE_POW: 1670 case TGSI_OPCODE_RCP: 1671 case TGSI_OPCODE_RSQ: 1672 case TGSI_OPCODE_SIN: 1673 /* 1674 case TGSI_OPCODE_KIL: 1675 case TGSI_OPCODE_LIT: 1676 case TGSI_OPCODE_SCS: 1677 */ 1678 return TRUE; 1679 default: 1680 return FALSE; 1681 } 1682} 1683 1684/* Returns a bitmask indicating which dst components depend 1685 * on source s, component c (reverse of nv50_tgsi_src_mask). 1686 */ 1687static unsigned 1688nv50_tgsi_dst_revdep(unsigned op, int s, int c) 1689{ 1690 if (is_scalar_op(op)) 1691 return 0x1; 1692 1693 switch (op) { 1694 case TGSI_OPCODE_DST: 1695 return (1 << c) & (s ? 0xa : 0x6); 1696 case TGSI_OPCODE_XPD: 1697 switch (c) { 1698 case 0: return 0x6; 1699 case 1: return 0x5; 1700 case 2: return 0x3; 1701 case 3: return 0x0; 1702 default: 1703 assert(0); 1704 return 0x0; 1705 } 1706 case TGSI_OPCODE_LIT: 1707 case TGSI_OPCODE_SCS: 1708 case TGSI_OPCODE_TEX: 1709 case TGSI_OPCODE_TXP: 1710 /* these take care of dangerous swizzles themselves */ 1711 return 0x0; 1712 case TGSI_OPCODE_IF: 1713 case TGSI_OPCODE_KIL: 1714 /* don't call this function for these ops */ 1715 assert(0); 1716 return 0; 1717 default: 1718 /* linear vector instruction */ 1719 return (1 << c); 1720 } 1721} 1722 1723static INLINE boolean 1724has_pred(struct nv50_program_exec *e, unsigned cc) 1725{ 1726 if (!is_long(e) || is_immd(e)) 1727 return FALSE; 1728 return ((e->inst[1] & 0x780) == (cc << 7)); 1729} 1730 1731/* on ENDIF see if we can do "@p0.neu single_op" instead of: 1732 * join_at ENDIF 1733 * @p0.eq bra ENDIF 1734 * single_op 1735 * ENDIF: nop.join 1736 */ 1737static boolean 1738nv50_kill_branch(struct nv50_pc *pc) 1739{ 1740 int lvl = pc->if_lvl; 1741 1742 if (pc->if_insn[lvl]->next != pc->p->exec_tail) 1743 return FALSE; 1744 1745 /* if ccode == 'true', the BRA is from an ELSE and the predicate 1746 * reg may no longer be valid, since we currently always use $p0 1747 */ 1748 if (has_pred(pc->if_insn[lvl], 0xf)) 1749 return FALSE; 1750 assert(pc->if_insn[lvl] && pc->br_join[lvl]); 1751 1752 /* We'll use the exec allocated for JOIN_AT (as we can't easily 1753 * update prev's next); if exec_tail is BRK, update the pointer. 1754 */ 1755 if (pc->loop_lvl && pc->br_loop[pc->loop_lvl - 1] == pc->p->exec_tail) 1756 pc->br_loop[pc->loop_lvl - 1] = pc->br_join[lvl]; 1757 1758 pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */ 1759 1760 *pc->br_join[lvl] = *pc->p->exec_tail; 1761 1762 FREE(pc->if_insn[lvl]); 1763 FREE(pc->p->exec_tail); 1764 1765 pc->p->exec_tail = pc->br_join[lvl]; 1766 pc->p->exec_tail->next = NULL; 1767 set_pred(pc, 0xd, 0, pc->p->exec_tail); 1768 1769 return TRUE; 1770} 1771 1772static boolean 1773nv50_program_tx_insn(struct nv50_pc *pc, 1774 const struct tgsi_full_instruction *inst) 1775{ 1776 struct nv50_reg *rdst[4], *dst[4], *brdc, *src[3][4], *temp; 1777 unsigned mask, sat, unit; 1778 int i, c; 1779 1780 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 1781 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 1782 1783 memset(src, 0, sizeof(src)); 1784 1785 for (c = 0; c < 4; c++) { 1786 if ((mask & (1 << c)) && !pc->r_dst[c]) 1787 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 1788 else 1789 dst[c] = pc->r_dst[c]; 1790 rdst[c] = dst[c]; 1791 } 1792 1793 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1794 const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; 1795 unsigned src_mask; 1796 boolean neg_supp; 1797 1798 src_mask = nv50_tgsi_src_mask(inst, i); 1799 neg_supp = negate_supported(inst, i); 1800 1801 if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) 1802 unit = fs->SrcRegister.Index; 1803 1804 for (c = 0; c < 4; c++) 1805 if (src_mask & (1 << c)) 1806 src[i][c] = tgsi_src(pc, c, fs, neg_supp); 1807 } 1808 1809 brdc = temp = pc->r_brdc; 1810 if (brdc && brdc->type != P_TEMP) { 1811 temp = temp_temp(pc); 1812 if (sat) 1813 brdc = temp; 1814 } else 1815 if (sat) { 1816 for (c = 0; c < 4; c++) { 1817 if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) 1818 continue; 1819 /* rdst[c] = dst[c]; */ /* done above */ 1820 dst[c] = temp_temp(pc); 1821 } 1822 } 1823 1824 assert(brdc || !is_scalar_op(inst->Instruction.Opcode)); 1825 1826 switch (inst->Instruction.Opcode) { 1827 case TGSI_OPCODE_ABS: 1828 for (c = 0; c < 4; c++) { 1829 if (!(mask & (1 << c))) 1830 continue; 1831 emit_abs(pc, dst[c], src[0][c]); 1832 } 1833 break; 1834 case TGSI_OPCODE_ADD: 1835 for (c = 0; c < 4; c++) { 1836 if (!(mask & (1 << c))) 1837 continue; 1838 emit_add(pc, dst[c], src[0][c], src[1][c]); 1839 } 1840 break; 1841 case TGSI_OPCODE_ARL: 1842 assert(src[0][0]); 1843 temp = temp_temp(pc); 1844 emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32); 1845 emit_arl(pc, dst[0], temp, 4); 1846 break; 1847 case TGSI_OPCODE_BGNLOOP: 1848 pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; 1849 terminate_mbb(pc); 1850 break; 1851 case TGSI_OPCODE_BRK: 1852 emit_branch(pc, -1, 0, NULL); 1853 assert(pc->loop_lvl > 0); 1854 pc->br_loop[pc->loop_lvl - 1] = pc->p->exec_tail; 1855 break; 1856 case TGSI_OPCODE_CEIL: 1857 for (c = 0; c < 4; c++) { 1858 if (!(mask & (1 << c))) 1859 continue; 1860 emit_cvt(pc, dst[c], src[0][c], -1, 1861 CVTOP_CEIL, CVT_F32_F32 | CVT_RI); 1862 } 1863 break; 1864 case TGSI_OPCODE_CMP: 1865 pc->allow32 = FALSE; 1866 for (c = 0; c < 4; c++) { 1867 if (!(mask & (1 << c))) 1868 continue; 1869 emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32); 1870 emit_mov(pc, dst[c], src[1][c]); 1871 set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */ 1872 emit_mov(pc, dst[c], src[2][c]); 1873 set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ 1874 } 1875 break; 1876 case TGSI_OPCODE_COS: 1877 if (mask & 8) { 1878 emit_precossin(pc, temp, src[0][3]); 1879 emit_flop(pc, 5, dst[3], temp); 1880 if (!(mask &= 7)) 1881 break; 1882 if (temp == dst[3]) 1883 temp = brdc = temp_temp(pc); 1884 } 1885 emit_precossin(pc, temp, src[0][0]); 1886 emit_flop(pc, 5, brdc, temp); 1887 break; 1888 case TGSI_OPCODE_DDX: 1889 for (c = 0; c < 4; c++) { 1890 if (!(mask & (1 << c))) 1891 continue; 1892 emit_ddx(pc, dst[c], src[0][c]); 1893 } 1894 break; 1895 case TGSI_OPCODE_DDY: 1896 for (c = 0; c < 4; c++) { 1897 if (!(mask & (1 << c))) 1898 continue; 1899 emit_ddy(pc, dst[c], src[0][c]); 1900 } 1901 break; 1902 case TGSI_OPCODE_DP3: 1903 emit_mul(pc, temp, src[0][0], src[1][0]); 1904 emit_mad(pc, temp, src[0][1], src[1][1], temp); 1905 emit_mad(pc, brdc, src[0][2], src[1][2], temp); 1906 break; 1907 case TGSI_OPCODE_DP4: 1908 emit_mul(pc, temp, src[0][0], src[1][0]); 1909 emit_mad(pc, temp, src[0][1], src[1][1], temp); 1910 emit_mad(pc, temp, src[0][2], src[1][2], temp); 1911 emit_mad(pc, brdc, src[0][3], src[1][3], temp); 1912 break; 1913 case TGSI_OPCODE_DPH: 1914 emit_mul(pc, temp, src[0][0], src[1][0]); 1915 emit_mad(pc, temp, src[0][1], src[1][1], temp); 1916 emit_mad(pc, temp, src[0][2], src[1][2], temp); 1917 emit_add(pc, brdc, src[1][3], temp); 1918 break; 1919 case TGSI_OPCODE_DST: 1920 if (mask & (1 << 1)) 1921 emit_mul(pc, dst[1], src[0][1], src[1][1]); 1922 if (mask & (1 << 2)) 1923 emit_mov(pc, dst[2], src[0][2]); 1924 if (mask & (1 << 3)) 1925 emit_mov(pc, dst[3], src[1][3]); 1926 if (mask & (1 << 0)) 1927 emit_mov_immdval(pc, dst[0], 1.0f); 1928 break; 1929 case TGSI_OPCODE_ELSE: 1930 emit_branch(pc, -1, 0, NULL); 1931 pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; 1932 pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; 1933 terminate_mbb(pc); 1934 break; 1935 case TGSI_OPCODE_ENDIF: 1936 pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; 1937 1938 /* try to replace branch over 1 insn with a predicated insn */ 1939 if (nv50_kill_branch(pc) == TRUE) 1940 break; 1941 1942 if (pc->br_join[pc->if_lvl]) { 1943 pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size; 1944 pc->br_join[pc->if_lvl] = NULL; 1945 } 1946 terminate_mbb(pc); 1947 /* emit a NOP as join point, we could set it on the next 1948 * one, but would have to make sure it is long and !immd 1949 */ 1950 emit_nop(pc); 1951 pc->p->exec_tail->inst[1] |= 2; 1952 break; 1953 case TGSI_OPCODE_ENDLOOP: 1954 emit_branch(pc, -1, 0, NULL); 1955 pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl]; 1956 pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size; 1957 terminate_mbb(pc); 1958 break; 1959 case TGSI_OPCODE_EX2: 1960 emit_preex2(pc, temp, src[0][0]); 1961 emit_flop(pc, 6, brdc, temp); 1962 break; 1963 case TGSI_OPCODE_FLR: 1964 for (c = 0; c < 4; c++) { 1965 if (!(mask & (1 << c))) 1966 continue; 1967 emit_flr(pc, dst[c], src[0][c]); 1968 } 1969 break; 1970 case TGSI_OPCODE_FRC: 1971 temp = temp_temp(pc); 1972 for (c = 0; c < 4; c++) { 1973 if (!(mask & (1 << c))) 1974 continue; 1975 emit_flr(pc, temp, src[0][c]); 1976 emit_sub(pc, dst[c], src[0][c], temp); 1977 } 1978 break; 1979 case TGSI_OPCODE_IF: 1980 /* emitting a join_at may not be necessary */ 1981 assert(pc->if_lvl < MAX_IF_DEPTH); 1982 set_pred_wr(pc, 1, 0, pc->if_cond); 1983 emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]); 1984 pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; 1985 terminate_mbb(pc); 1986 break; 1987 case TGSI_OPCODE_KIL: 1988 emit_kil(pc, src[0][0]); 1989 emit_kil(pc, src[0][1]); 1990 emit_kil(pc, src[0][2]); 1991 emit_kil(pc, src[0][3]); 1992 break; 1993 case TGSI_OPCODE_LIT: 1994 emit_lit(pc, &dst[0], mask, &src[0][0]); 1995 break; 1996 case TGSI_OPCODE_LG2: 1997 emit_flop(pc, 3, brdc, src[0][0]); 1998 break; 1999 case TGSI_OPCODE_LRP: 2000 temp = temp_temp(pc); 2001 for (c = 0; c < 4; c++) { 2002 if (!(mask & (1 << c))) 2003 continue; 2004 emit_sub(pc, temp, src[1][c], src[2][c]); 2005 emit_mad(pc, dst[c], temp, src[0][c], src[2][c]); 2006 } 2007 break; 2008 case TGSI_OPCODE_MAD: 2009 for (c = 0; c < 4; c++) { 2010 if (!(mask & (1 << c))) 2011 continue; 2012 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 2013 } 2014 break; 2015 case TGSI_OPCODE_MAX: 2016 for (c = 0; c < 4; c++) { 2017 if (!(mask & (1 << c))) 2018 continue; 2019 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 2020 } 2021 break; 2022 case TGSI_OPCODE_MIN: 2023 for (c = 0; c < 4; c++) { 2024 if (!(mask & (1 << c))) 2025 continue; 2026 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 2027 } 2028 break; 2029 case TGSI_OPCODE_MOV: 2030 for (c = 0; c < 4; c++) { 2031 if (!(mask & (1 << c))) 2032 continue; 2033 emit_mov(pc, dst[c], src[0][c]); 2034 } 2035 break; 2036 case TGSI_OPCODE_MUL: 2037 for (c = 0; c < 4; c++) { 2038 if (!(mask & (1 << c))) 2039 continue; 2040 emit_mul(pc, dst[c], src[0][c], src[1][c]); 2041 } 2042 break; 2043 case TGSI_OPCODE_POW: 2044 emit_pow(pc, brdc, src[0][0], src[1][0]); 2045 break; 2046 case TGSI_OPCODE_RCP: 2047 emit_flop(pc, 0, brdc, src[0][0]); 2048 break; 2049 case TGSI_OPCODE_RSQ: 2050 emit_flop(pc, 2, brdc, src[0][0]); 2051 break; 2052 case TGSI_OPCODE_SCS: 2053 temp = temp_temp(pc); 2054 if (mask & 3) 2055 emit_precossin(pc, temp, src[0][0]); 2056 if (mask & (1 << 0)) 2057 emit_flop(pc, 5, dst[0], temp); 2058 if (mask & (1 << 1)) 2059 emit_flop(pc, 4, dst[1], temp); 2060 if (mask & (1 << 2)) 2061 emit_mov_immdval(pc, dst[2], 0.0); 2062 if (mask & (1 << 3)) 2063 emit_mov_immdval(pc, dst[3], 1.0); 2064 break; 2065 case TGSI_OPCODE_SIN: 2066 if (mask & 8) { 2067 emit_precossin(pc, temp, src[0][3]); 2068 emit_flop(pc, 4, dst[3], temp); 2069 if (!(mask &= 7)) 2070 break; 2071 if (temp == dst[3]) 2072 temp = brdc = temp_temp(pc); 2073 } 2074 emit_precossin(pc, temp, src[0][0]); 2075 emit_flop(pc, 4, brdc, temp); 2076 break; 2077 case TGSI_OPCODE_SLT: 2078 case TGSI_OPCODE_SGE: 2079 case TGSI_OPCODE_SEQ: 2080 case TGSI_OPCODE_SGT: 2081 case TGSI_OPCODE_SLE: 2082 case TGSI_OPCODE_SNE: 2083 i = map_tgsi_setop_cc(inst->Instruction.Opcode); 2084 for (c = 0; c < 4; c++) { 2085 if (!(mask & (1 << c))) 2086 continue; 2087 emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]); 2088 } 2089 break; 2090 case TGSI_OPCODE_SUB: 2091 for (c = 0; c < 4; c++) { 2092 if (!(mask & (1 << c))) 2093 continue; 2094 emit_sub(pc, dst[c], src[0][c], src[1][c]); 2095 } 2096 break; 2097 case TGSI_OPCODE_TEX: 2098 emit_tex(pc, dst, mask, src[0], unit, 2099 inst->InstructionExtTexture.Texture, FALSE); 2100 break; 2101 case TGSI_OPCODE_TXP: 2102 emit_tex(pc, dst, mask, src[0], unit, 2103 inst->InstructionExtTexture.Texture, TRUE); 2104 break; 2105 case TGSI_OPCODE_TRUNC: 2106 for (c = 0; c < 4; c++) { 2107 if (!(mask & (1 << c))) 2108 continue; 2109 emit_cvt(pc, dst[c], src[0][c], -1, 2110 CVTOP_TRUNC, CVT_F32_F32 | CVT_RI); 2111 } 2112 break; 2113 case TGSI_OPCODE_XPD: 2114 temp = temp_temp(pc); 2115 if (mask & (1 << 0)) { 2116 emit_mul(pc, temp, src[0][2], src[1][1]); 2117 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 2118 } 2119 if (mask & (1 << 1)) { 2120 emit_mul(pc, temp, src[0][0], src[1][2]); 2121 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 2122 } 2123 if (mask & (1 << 2)) { 2124 emit_mul(pc, temp, src[0][1], src[1][0]); 2125 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 2126 } 2127 if (mask & (1 << 3)) 2128 emit_mov_immdval(pc, dst[3], 1.0); 2129 break; 2130 case TGSI_OPCODE_END: 2131 break; 2132 default: 2133 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 2134 return FALSE; 2135 } 2136 2137 if (brdc) { 2138 if (sat) 2139 emit_sat(pc, brdc, brdc); 2140 for (c = 0; c < 4; c++) 2141 if ((mask & (1 << c)) && dst[c] != brdc) 2142 emit_mov(pc, dst[c], brdc); 2143 } else 2144 if (sat) { 2145 for (c = 0; c < 4; c++) { 2146 if (!(mask & (1 << c))) 2147 continue; 2148 /* In this case we saturate later, and dst[c] won't 2149 * be another temp_temp (and thus lost), since rdst 2150 * already is TEMP (see above). */ 2151 if (rdst[c]->type == P_TEMP && rdst[c]->index < 0) 2152 continue; 2153 emit_sat(pc, rdst[c], dst[c]); 2154 } 2155 } 2156 2157 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 2158 for (c = 0; c < 4; c++) { 2159 if (!src[i][c]) 2160 continue; 2161 src[i][c]->neg = 0; 2162 if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD) 2163 FREE(src[i][c]); 2164 else 2165 if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST) 2166 FREE(src[i][c]); /* indirect constant */ 2167 } 2168 } 2169 2170 kill_temp_temp(pc); 2171 return TRUE; 2172} 2173 2174static void 2175prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) 2176{ 2177 struct nv50_reg *reg = NULL; 2178 const struct tgsi_full_src_register *src; 2179 const struct tgsi_dst_register *dst; 2180 unsigned i, c, k, mask; 2181 2182 dst = &insn->FullDstRegisters[0].DstRegister; 2183 mask = dst->WriteMask; 2184 2185 if (dst->File == TGSI_FILE_TEMPORARY) 2186 reg = pc->temp; 2187 else 2188 if (dst->File == TGSI_FILE_OUTPUT) 2189 reg = pc->result; 2190 2191 if (reg) { 2192 for (c = 0; c < 4; c++) { 2193 if (!(mask & (1 << c))) 2194 continue; 2195 reg[dst->Index * 4 + c].acc = pc->insn_nr; 2196 } 2197 } 2198 2199 for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { 2200 src = &insn->FullSrcRegisters[i]; 2201 2202 if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) 2203 reg = pc->temp; 2204 else 2205 if (src->SrcRegister.File == TGSI_FILE_INPUT) 2206 reg = pc->attr; 2207 else 2208 continue; 2209 2210 mask = nv50_tgsi_src_mask(insn, i); 2211 2212 for (c = 0; c < 4; c++) { 2213 if (!(mask & (1 << c))) 2214 continue; 2215 k = tgsi_util_get_full_src_register_swizzle(src, c); 2216 2217 reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr; 2218 } 2219 } 2220} 2221 2222/* Returns a bitmask indicating which dst components need to be 2223 * written to temporaries first to avoid 'corrupting' sources. 2224 * 2225 * m[i] (out) indicate component to write in the i-th position 2226 * rdep[c] (in) bitmasks of dst[i] that require dst[c] as source 2227 */ 2228static unsigned 2229nv50_revdep_reorder(unsigned m[4], unsigned rdep[4]) 2230{ 2231 unsigned i, c, x, unsafe; 2232 2233 for (c = 0; c < 4; c++) 2234 m[c] = c; 2235 2236 /* Swap as long as a dst component written earlier is depended on 2237 * by one written later, but the next one isn't depended on by it. 2238 */ 2239 for (c = 0; c < 3; c++) { 2240 if (rdep[m[c + 1]] & (1 << m[c])) 2241 continue; /* if next one is depended on by us */ 2242 for (i = c + 1; i < 4; i++) 2243 /* if we are depended on by a later one */ 2244 if (rdep[m[c]] & (1 << m[i])) 2245 break; 2246 if (i == 4) 2247 continue; 2248 /* now, swap */ 2249 x = m[c]; 2250 m[c] = m[c + 1]; 2251 m[c + 1] = x; 2252 2253 /* restart */ 2254 c = 0; 2255 } 2256 2257 /* mark dependencies that could not be resolved by reordering */ 2258 for (i = 0; i < 3; ++i) 2259 for (c = i + 1; c < 4; ++c) 2260 if (rdep[m[i]] & (1 << m[c])) 2261 unsafe |= (1 << i); 2262 2263 /* NOTE: $unsafe is with respect to order, not component */ 2264 return unsafe; 2265} 2266 2267/* Select a suitable dst register for broadcasting scalar results, 2268 * or return NULL if we have to allocate an extra TEMP. 2269 * 2270 * If e.g. only 1 component is written, we may also emit the final 2271 * result to a write-only register. 2272 */ 2273static struct nv50_reg * 2274tgsi_broadcast_dst(struct nv50_pc *pc, 2275 const struct tgsi_full_dst_register *fd, unsigned mask) 2276{ 2277 if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) { 2278 int c = ffs(~mask & fd->DstRegister.WriteMask); 2279 if (c) 2280 return tgsi_dst(pc, c - 1, fd); 2281 } else { 2282 int c = ffs(fd->DstRegister.WriteMask) - 1; 2283 if ((1 << c) == fd->DstRegister.WriteMask) 2284 return tgsi_dst(pc, c, fd); 2285 } 2286 2287 return NULL; 2288} 2289 2290/* Scan source swizzles and return a bitmask indicating dst regs that 2291 * also occur among the src regs, and fill rdep for nv50_revdep_reoder. 2292 */ 2293static unsigned 2294nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn, 2295 unsigned rdep[4]) 2296{ 2297 const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0]; 2298 const struct tgsi_full_src_register *fs; 2299 unsigned i, deqs = 0; 2300 2301 for (i = 0; i < 4; ++i) 2302 rdep[i] = 0; 2303 2304 for (i = 0; i < insn->Instruction.NumSrcRegs; i++) { 2305 unsigned chn, mask = nv50_tgsi_src_mask(insn, i); 2306 boolean neg_supp = negate_supported(insn, i); 2307 2308 fs = &insn->FullSrcRegisters[i]; 2309 if (fs->SrcRegister.File != fd->DstRegister.File || 2310 fs->SrcRegister.Index != fd->DstRegister.Index) 2311 continue; 2312 2313 for (chn = 0; chn < 4; ++chn) { 2314 unsigned s, c; 2315 2316 if (!(mask & (1 << chn))) /* src is not read */ 2317 continue; 2318 c = tgsi_util_get_full_src_register_swizzle(fs, chn); 2319 s = tgsi_util_get_full_src_register_sign_mode(fs, chn); 2320 2321 if (!(fd->DstRegister.WriteMask & (1 << c))) 2322 continue; 2323 2324 /* no danger if src is copied to TEMP first */ 2325 if ((s != TGSI_UTIL_SIGN_KEEP) && 2326 (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp)) 2327 continue; 2328 2329 rdep[c] |= nv50_tgsi_dst_revdep( 2330 insn->Instruction.Opcode, i, chn); 2331 deqs |= (1 << c); 2332 } 2333 } 2334 2335 return deqs; 2336} 2337 2338static boolean 2339nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 2340{ 2341 struct tgsi_full_instruction insn = tok->FullInstruction; 2342 const struct tgsi_full_dst_register *fd; 2343 unsigned i, deqs, rdep[4], m[4]; 2344 2345 fd = &tok->FullInstruction.FullDstRegisters[0]; 2346 deqs = nv50_tgsi_scan_swizzle(&insn, rdep); 2347 2348 if (is_scalar_op(insn.Instruction.Opcode)) { 2349 pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); 2350 if (!pc->r_brdc) 2351 pc->r_brdc = temp_temp(pc); 2352 return nv50_program_tx_insn(pc, &insn); 2353 } 2354 pc->r_brdc = NULL; 2355 2356 if (!deqs) 2357 return nv50_program_tx_insn(pc, &insn); 2358 2359 deqs = nv50_revdep_reorder(m, rdep); 2360 2361 for (i = 0; i < 4; ++i) { 2362 assert(pc->r_dst[m[i]] == NULL); 2363 2364 insn.FullDstRegisters[0].DstRegister.WriteMask = 2365 fd->DstRegister.WriteMask & (1 << m[i]); 2366 2367 if (!insn.FullDstRegisters[0].DstRegister.WriteMask) 2368 continue; 2369 2370 if (deqs & (1 << i)) 2371 pc->r_dst[m[i]] = alloc_temp(pc, NULL); 2372 2373 if (!nv50_program_tx_insn(pc, &insn)) 2374 return FALSE; 2375 } 2376 2377 for (i = 0; i < 4; i++) { 2378 struct nv50_reg *reg = pc->r_dst[i]; 2379 if (!reg) 2380 continue; 2381 pc->r_dst[i] = NULL; 2382 2383 if (insn.Instruction.Saturate == TGSI_SAT_ZERO_ONE) 2384 emit_sat(pc, tgsi_dst(pc, i, fd), reg); 2385 else 2386 emit_mov(pc, tgsi_dst(pc, i, fd), reg); 2387 free_temp(pc, reg); 2388 } 2389 2390 return TRUE; 2391} 2392 2393static void 2394load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) 2395{ 2396 struct nv50_reg *iv, **ppiv; 2397 unsigned mode = pc->interp_mode[reg->index]; 2398 2399 ppiv = (mode & INTERP_CENTROID) ? &pc->iv_c : &pc->iv_p; 2400 iv = *ppiv; 2401 2402 if ((mode & INTERP_PERSPECTIVE) && !iv) { 2403 iv = *ppiv = alloc_temp(pc, NULL); 2404 iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; 2405 2406 emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); 2407 emit_flop(pc, 0, iv, iv); 2408 2409 /* XXX: when loading interpolants dynamically, move these 2410 * to the program head, or make sure it can't be skipped. 2411 */ 2412 } 2413 2414 emit_interp(pc, reg, iv, mode); 2415} 2416 2417static boolean 2418nv50_program_tx_prep(struct nv50_pc *pc) 2419{ 2420 struct tgsi_parse_context tp; 2421 struct nv50_program *p = pc->p; 2422 boolean ret = FALSE; 2423 unsigned i, c, flat_nr = 0; 2424 2425 tgsi_parse_init(&tp, pc->p->pipe.tokens); 2426 while (!tgsi_parse_end_of_tokens(&tp)) { 2427 const union tgsi_full_token *tok = &tp.FullToken; 2428 2429 tgsi_parse_token(&tp); 2430 switch (tok->Token.Type) { 2431 case TGSI_TOKEN_TYPE_IMMEDIATE: 2432 { 2433 const struct tgsi_full_immediate *imm = 2434 &tp.FullToken.FullImmediate; 2435 2436 ctor_immd(pc, imm->u[0].Float, 2437 imm->u[1].Float, 2438 imm->u[2].Float, 2439 imm->u[3].Float); 2440 } 2441 break; 2442 case TGSI_TOKEN_TYPE_DECLARATION: 2443 { 2444 const struct tgsi_full_declaration *d; 2445 unsigned si, last, first, mode; 2446 2447 d = &tp.FullToken.FullDeclaration; 2448 first = d->DeclarationRange.First; 2449 last = d->DeclarationRange.Last; 2450 2451 switch (d->Declaration.File) { 2452 case TGSI_FILE_TEMPORARY: 2453 break; 2454 case TGSI_FILE_OUTPUT: 2455 if (!d->Declaration.Semantic || 2456 p->type == PIPE_SHADER_FRAGMENT) 2457 break; 2458 2459 si = d->Semantic.SemanticIndex; 2460 switch (d->Semantic.SemanticName) { 2461 case TGSI_SEMANTIC_BCOLOR: 2462 p->cfg.two_side[si].hw = first; 2463 if (p->cfg.io_nr > first) 2464 p->cfg.io_nr = first; 2465 break; 2466 case TGSI_SEMANTIC_PSIZE: 2467 p->cfg.psiz = first; 2468 if (p->cfg.io_nr > first) 2469 p->cfg.io_nr = first; 2470 break; 2471 /* 2472 case TGSI_SEMANTIC_CLIP_DISTANCE: 2473 p->cfg.clpd = MIN2(p->cfg.clpd, first); 2474 break; 2475 */ 2476 default: 2477 break; 2478 } 2479 break; 2480 case TGSI_FILE_INPUT: 2481 { 2482 if (p->type != PIPE_SHADER_FRAGMENT) 2483 break; 2484 2485 switch (d->Declaration.Interpolate) { 2486 case TGSI_INTERPOLATE_CONSTANT: 2487 mode = INTERP_FLAT; 2488 flat_nr++; 2489 break; 2490 case TGSI_INTERPOLATE_PERSPECTIVE: 2491 mode = INTERP_PERSPECTIVE; 2492 p->cfg.regs[1] |= 0x08 << 24; 2493 break; 2494 default: 2495 mode = INTERP_LINEAR; 2496 break; 2497 } 2498 if (d->Declaration.Centroid) 2499 mode |= INTERP_CENTROID; 2500 2501 assert(last < 32); 2502 for (i = first; i <= last; i++) 2503 pc->interp_mode[i] = mode; 2504 } 2505 break; 2506 case TGSI_FILE_ADDRESS: 2507 case TGSI_FILE_CONSTANT: 2508 case TGSI_FILE_SAMPLER: 2509 break; 2510 default: 2511 NOUVEAU_ERR("bad decl file %d\n", 2512 d->Declaration.File); 2513 goto out_err; 2514 } 2515 } 2516 break; 2517 case TGSI_TOKEN_TYPE_INSTRUCTION: 2518 pc->insn_nr++; 2519 prep_inspect_insn(pc, &tok->FullInstruction); 2520 break; 2521 default: 2522 break; 2523 } 2524 } 2525 2526 if (p->type == PIPE_SHADER_VERTEX) { 2527 int rid = 0; 2528 2529 for (i = 0; i < pc->attr_nr * 4; ++i) { 2530 if (pc->attr[i].acc) { 2531 pc->attr[i].hw = rid++; 2532 p->cfg.attr[i / 32] |= 1 << (i % 32); 2533 } 2534 } 2535 2536 for (i = 0, rid = 0; i < pc->result_nr; ++i) { 2537 p->cfg.io[i].hw = rid; 2538 p->cfg.io[i].id_vp = i; 2539 2540 for (c = 0; c < 4; ++c) { 2541 int n = i * 4 + c; 2542 if (!pc->result[n].acc) 2543 continue; 2544 pc->result[n].hw = rid++; 2545 p->cfg.io[i].mask |= 1 << c; 2546 } 2547 } 2548 2549 for (c = 0; c < 2; ++c) 2550 if (p->cfg.two_side[c].hw < 0x40) 2551 p->cfg.two_side[c] = p->cfg.io[ 2552 p->cfg.two_side[c].hw]; 2553 2554 if (p->cfg.psiz < 0x40) 2555 p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; 2556 } else 2557 if (p->type == PIPE_SHADER_FRAGMENT) { 2558 int rid, aid; 2559 unsigned n = 0, m = pc->attr_nr - flat_nr; 2560 2561 int base = (TGSI_SEMANTIC_POSITION == 2562 p->info.input_semantic_name[0]) ? 0 : 1; 2563 2564 /* non-flat interpolants have to be mapped to 2565 * the lower hardware IDs, so sort them: 2566 */ 2567 for (i = 0; i < pc->attr_nr; i++) { 2568 if (pc->interp_mode[i] == INTERP_FLAT) { 2569 p->cfg.io[m].id_vp = i + base; 2570 p->cfg.io[m++].id_fp = i; 2571 } else { 2572 if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) 2573 p->cfg.io[n].linear = TRUE; 2574 p->cfg.io[n].id_vp = i + base; 2575 p->cfg.io[n++].id_fp = i; 2576 } 2577 } 2578 2579 if (!base) /* set w-coordinate mask from perspective interp */ 2580 p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; 2581 2582 aid = popcnt4( /* if fcrd isn't contained in cfg.io */ 2583 base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); 2584 2585 for (n = 0; n < pc->attr_nr; ++n) { 2586 p->cfg.io[n].hw = rid = aid; 2587 i = p->cfg.io[n].id_fp; 2588 2589 for (c = 0; c < 4; ++c) { 2590 if (!pc->attr[i * 4 + c].acc) 2591 continue; 2592 pc->attr[i * 4 + c].rhw = rid++; 2593 p->cfg.io[n].mask |= 1 << c; 2594 2595 load_interpolant(pc, &pc->attr[i * 4 + c]); 2596 } 2597 aid += popcnt4(p->cfg.io[n].mask); 2598 } 2599 2600 if (!base) 2601 p->cfg.regs[1] |= p->cfg.io[0].mask << 24; 2602 2603 m = popcnt4(p->cfg.regs[1] >> 24); 2604 2605 /* set count of non-position inputs and of non-flat 2606 * non-position inputs for FP_INTERPOLANT_CTRL 2607 */ 2608 p->cfg.regs[1] |= aid - m; 2609 2610 if (flat_nr) { 2611 i = p->cfg.io[pc->attr_nr - flat_nr].hw; 2612 p->cfg.regs[1] |= (i - m) << 16; 2613 } else 2614 p->cfg.regs[1] |= p->cfg.regs[1] << 16; 2615 2616 /* mark color semantic for light-twoside */ 2617 n = 0x40; 2618 for (i = 0; i < pc->attr_nr; i++) { 2619 ubyte si, sn; 2620 2621 sn = p->info.input_semantic_name[p->cfg.io[i].id_fp]; 2622 si = p->info.input_semantic_index[p->cfg.io[i].id_fp]; 2623 2624 if (sn == TGSI_SEMANTIC_COLOR) { 2625 p->cfg.two_side[si] = p->cfg.io[i]; 2626 2627 /* increase colour count */ 2628 p->cfg.regs[0] += popcnt4( 2629 p->cfg.two_side[si].mask) << 16; 2630 2631 n = MIN2(n, p->cfg.io[i].hw - m); 2632 } 2633 } 2634 if (n < 0x40) 2635 p->cfg.regs[0] += n; 2636 2637 /* Initialize FP results: 2638 * FragDepth is always first TGSI and last hw output 2639 */ 2640 i = p->info.writes_z ? 4 : 0; 2641 for (rid = 0; i < pc->result_nr * 4; i++) 2642 pc->result[i].rhw = rid++; 2643 if (p->info.writes_z) 2644 pc->result[2].rhw = rid; 2645 2646 p->cfg.high_result = rid; 2647 2648 /* separate/different colour results for MRTs ? */ 2649 if (pc->result_nr - (p->info.writes_z ? 1 : 0) > 1) 2650 p->cfg.regs[2] |= 1; 2651 } 2652 2653 if (pc->immd_nr) { 2654 int rid = 0; 2655 2656 pc->immd = MALLOC(pc->immd_nr * 4 * sizeof(struct nv50_reg)); 2657 if (!pc->immd) 2658 goto out_err; 2659 2660 for (i = 0; i < pc->immd_nr; i++) { 2661 for (c = 0; c < 4; c++, rid++) 2662 ctor_reg(&pc->immd[rid], P_IMMD, i, rid); 2663 } 2664 } 2665 2666 ret = TRUE; 2667out_err: 2668 if (pc->iv_p) 2669 free_temp(pc, pc->iv_p); 2670 if (pc->iv_c) 2671 free_temp(pc, pc->iv_c); 2672 2673 tgsi_parse_free(&tp); 2674 return ret; 2675} 2676 2677static void 2678free_nv50_pc(struct nv50_pc *pc) 2679{ 2680 if (pc->immd) 2681 FREE(pc->immd); 2682 if (pc->param) 2683 FREE(pc->param); 2684 if (pc->result) 2685 FREE(pc->result); 2686 if (pc->attr) 2687 FREE(pc->attr); 2688 if (pc->temp) 2689 FREE(pc->temp); 2690 2691 FREE(pc); 2692} 2693 2694static boolean 2695ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) 2696{ 2697 int i, c; 2698 unsigned rtype[2] = { P_ATTR, P_RESULT }; 2699 2700 pc->p = p; 2701 pc->temp_nr = p->info.file_max[TGSI_FILE_TEMPORARY] + 1; 2702 pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1; 2703 pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1; 2704 pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; 2705 pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1; 2706 assert(pc->addr_nr <= 2); 2707 2708 p->cfg.high_temp = 4; 2709 2710 p->cfg.two_side[0].hw = 0x40; 2711 p->cfg.two_side[1].hw = 0x40; 2712 2713 switch (p->type) { 2714 case PIPE_SHADER_VERTEX: 2715 p->cfg.psiz = 0x40; 2716 p->cfg.clpd = 0x40; 2717 p->cfg.io_nr = pc->result_nr; 2718 break; 2719 case PIPE_SHADER_FRAGMENT: 2720 rtype[0] = rtype[1] = P_TEMP; 2721 2722 p->cfg.regs[0] = 0x01000004; 2723 p->cfg.io_nr = pc->attr_nr; 2724 2725 if (p->info.writes_z) { 2726 p->cfg.regs[2] |= 0x00000100; 2727 p->cfg.regs[3] |= 0x00000011; 2728 } 2729 if (p->info.uses_kill) 2730 p->cfg.regs[2] |= 0x00100000; 2731 break; 2732 } 2733 2734 if (pc->temp_nr) { 2735 pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg)); 2736 if (!pc->temp) 2737 return FALSE; 2738 2739 for (i = 0; i < pc->temp_nr * 4; ++i) 2740 ctor_reg(&pc->temp[i], P_TEMP, i / 4, -1); 2741 } 2742 2743 if (pc->attr_nr) { 2744 pc->attr = MALLOC(pc->attr_nr * 4 * sizeof(struct nv50_reg)); 2745 if (!pc->attr) 2746 return FALSE; 2747 2748 for (i = 0; i < pc->attr_nr * 4; ++i) 2749 ctor_reg(&pc->attr[i], rtype[0], i / 4, -1); 2750 } 2751 2752 if (pc->result_nr) { 2753 unsigned nr = pc->result_nr * 4; 2754 2755 pc->result = MALLOC(nr * sizeof(struct nv50_reg)); 2756 if (!pc->result) 2757 return FALSE; 2758 2759 for (i = 0; i < nr; ++i) 2760 ctor_reg(&pc->result[i], rtype[1], i / 4, -1); 2761 } 2762 2763 if (pc->param_nr) { 2764 int rid = 0; 2765 2766 pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg)); 2767 if (!pc->param) 2768 return FALSE; 2769 2770 for (i = 0; i < pc->param_nr; ++i) 2771 for (c = 0; c < 4; ++c, ++rid) 2772 ctor_reg(&pc->param[rid], P_CONST, i, rid); 2773 } 2774 2775 if (pc->addr_nr) { 2776 pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *)); 2777 if (!pc->addr) 2778 return FALSE; 2779 } 2780 for (i = 0; i < NV50_SU_MAX_ADDR; ++i) 2781 ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); 2782 2783 return TRUE; 2784} 2785 2786static void 2787nv50_fp_move_results(struct nv50_pc *pc) 2788{ 2789 struct nv50_reg reg; 2790 unsigned i; 2791 2792 ctor_reg(®, P_TEMP, -1, -1); 2793 2794 for (i = 0; i < pc->result_nr * 4; ++i) { 2795 if (pc->result[i].rhw < 0 || pc->result[i].hw < 0) 2796 continue; 2797 if (pc->result[i].rhw != pc->result[i].hw) { 2798 reg.hw = pc->result[i].rhw; 2799 emit_mov(pc, ®, &pc->result[i]); 2800 } 2801 } 2802} 2803 2804static void 2805nv50_program_fixup_insns(struct nv50_pc *pc) 2806{ 2807 struct nv50_program_exec *e, *prev = NULL, **bra_list; 2808 unsigned i, n, pos; 2809 2810 bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *)); 2811 2812 /* Collect branch instructions, we need to adjust their offsets 2813 * when converting 32 bit instructions to 64 bit ones 2814 */ 2815 for (n = 0, e = pc->p->exec_head; e; e = e->next) 2816 if (e->param.index >= 0 && !e->param.mask) 2817 bra_list[n++] = e; 2818 2819 /* Make sure we don't have any single 32 bit instructions. */ 2820 for (e = pc->p->exec_head, pos = 0; e; e = e->next) { 2821 pos += is_long(e) ? 2 : 1; 2822 2823 if ((pos & 1) && (!e->next || is_long(e->next))) { 2824 for (i = 0; i < n; ++i) 2825 if (bra_list[i]->param.index >= pos) 2826 bra_list[i]->param.index += 1; 2827 convert_to_long(pc, e); 2828 ++pos; 2829 } 2830 if (e->next) 2831 prev = e; 2832 } 2833 2834 assert(!is_immd(pc->p->exec_head)); 2835 assert(!is_immd(pc->p->exec_tail)); 2836 2837 /* last instruction must be long so it can have the end bit set */ 2838 if (!is_long(pc->p->exec_tail)) { 2839 convert_to_long(pc, pc->p->exec_tail); 2840 if (prev) 2841 convert_to_long(pc, prev); 2842 } 2843 assert(!(pc->p->exec_tail->inst[1] & 2)); 2844 /* set the end-bit */ 2845 pc->p->exec_tail->inst[1] |= 1; 2846 2847 FREE(bra_list); 2848} 2849 2850static boolean 2851nv50_program_tx(struct nv50_program *p) 2852{ 2853 struct tgsi_parse_context parse; 2854 struct nv50_pc *pc; 2855 boolean ret; 2856 2857 pc = CALLOC_STRUCT(nv50_pc); 2858 if (!pc) 2859 return FALSE; 2860 2861 ret = ctor_nv50_pc(pc, p); 2862 if (ret == FALSE) 2863 goto out_cleanup; 2864 2865 ret = nv50_program_tx_prep(pc); 2866 if (ret == FALSE) 2867 goto out_cleanup; 2868 2869 tgsi_parse_init(&parse, pc->p->pipe.tokens); 2870 while (!tgsi_parse_end_of_tokens(&parse)) { 2871 const union tgsi_full_token *tok = &parse.FullToken; 2872 2873 /* don't allow half insn/immd on first and last instruction */ 2874 pc->allow32 = TRUE; 2875 if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) 2876 pc->allow32 = FALSE; 2877 2878 tgsi_parse_token(&parse); 2879 2880 switch (tok->Token.Type) { 2881 case TGSI_TOKEN_TYPE_INSTRUCTION: 2882 ++pc->insn_cur; 2883 ret = nv50_tgsi_insn(pc, tok); 2884 if (ret == FALSE) 2885 goto out_err; 2886 break; 2887 default: 2888 break; 2889 } 2890 } 2891 2892 if (pc->p->type == PIPE_SHADER_FRAGMENT) 2893 nv50_fp_move_results(pc); 2894 2895 nv50_program_fixup_insns(pc); 2896 2897 p->param_nr = pc->param_nr * 4; 2898 p->immd_nr = pc->immd_nr * 4; 2899 p->immd = pc->immd_buf; 2900 2901out_err: 2902 tgsi_parse_free(&parse); 2903 2904out_cleanup: 2905 free_nv50_pc(pc); 2906 return ret; 2907} 2908 2909static void 2910nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 2911{ 2912 if (nv50_program_tx(p) == FALSE) 2913 assert(0); 2914 p->translated = TRUE; 2915} 2916 2917static void 2918nv50_program_upload_data(struct nv50_context *nv50, float *map, 2919 unsigned start, unsigned count, unsigned cbuf) 2920{ 2921 struct nouveau_channel *chan = nv50->screen->base.channel; 2922 struct nouveau_grobj *tesla = nv50->screen->tesla; 2923 2924 while (count) { 2925 unsigned nr = count > 2047 ? 2047 : count; 2926 2927 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); 2928 OUT_RING (chan, (cbuf << 0) | (start << 8)); 2929 BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr); 2930 OUT_RINGp (chan, map, nr); 2931 2932 map += nr; 2933 start += nr; 2934 count -= nr; 2935 } 2936} 2937 2938static void 2939nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 2940{ 2941 struct pipe_screen *pscreen = nv50->pipe.screen; 2942 2943 if (!p->data[0] && p->immd_nr) { 2944 struct nouveau_resource *heap = nv50->screen->immd_heap[0]; 2945 2946 if (nouveau_resource_alloc(heap, p->immd_nr, p, &p->data[0])) { 2947 while (heap->next && heap->size < p->immd_nr) { 2948 struct nv50_program *evict = heap->next->priv; 2949 nouveau_resource_free(&evict->data[0]); 2950 } 2951 2952 if (nouveau_resource_alloc(heap, p->immd_nr, p, 2953 &p->data[0])) 2954 assert(0); 2955 } 2956 2957 /* immediates only need to be uploaded again when freed */ 2958 nv50_program_upload_data(nv50, p->immd, p->data[0]->start, 2959 p->immd_nr, NV50_CB_PMISC); 2960 } 2961 2962 assert(p->param_nr <= 512); 2963 2964 if (p->param_nr) { 2965 unsigned cb; 2966 float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], 2967 PIPE_BUFFER_USAGE_CPU_READ); 2968 2969 if (p->type == PIPE_SHADER_VERTEX) 2970 cb = NV50_CB_PVP; 2971 else 2972 cb = NV50_CB_PFP; 2973 2974 nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); 2975 pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); 2976 } 2977} 2978 2979static void 2980nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 2981{ 2982 struct nouveau_channel *chan = nv50->screen->base.channel; 2983 struct nouveau_grobj *tesla = nv50->screen->tesla; 2984 struct nv50_program_exec *e; 2985 struct nouveau_stateobj *so; 2986 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; 2987 unsigned start, count, *up, *ptr; 2988 boolean upload = FALSE; 2989 2990 if (!p->bo) { 2991 nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100, 2992 p->exec_size * 4, &p->bo); 2993 upload = TRUE; 2994 } 2995 2996 if (p->data[0] && p->data[0]->start != p->data_start[0]) 2997 upload = TRUE; 2998 2999 if (!upload) 3000 return; 3001 3002 for (e = p->exec_head; e; e = e->next) { 3003 unsigned ei, ci, bs; 3004 3005 if (e->param.index < 0) 3006 continue; 3007 3008 if (e->param.mask == 0) { 3009 assert(!(e->param.index & 1)); 3010 /* seem to be 8 byte steps */ 3011 ei = (e->param.index >> 1) + 0 /* START_ID */; 3012 3013 e->inst[0] &= 0xf0000fff; 3014 e->inst[0] |= ei << 12; 3015 continue; 3016 } 3017 3018 bs = (e->inst[1] >> 22) & 0x07; 3019 assert(bs < 2); 3020 ei = e->param.shift >> 5; 3021 ci = e->param.index; 3022 if (bs == 0) 3023 ci += p->data[bs]->start; 3024 3025 e->inst[ei] &= ~e->param.mask; 3026 e->inst[ei] |= (ci << e->param.shift); 3027 } 3028 3029 if (p->data[0]) 3030 p->data_start[0] = p->data[0]->start; 3031 3032#ifdef NV50_PROGRAM_DUMP 3033 NOUVEAU_ERR("-------\n"); 3034 for (e = p->exec_head; e; e = e->next) { 3035 NOUVEAU_ERR("0x%08x\n", e->inst[0]); 3036 if (is_long(e)) 3037 NOUVEAU_ERR("0x%08x\n", e->inst[1]); 3038 } 3039#endif 3040 3041 up = ptr = MALLOC(p->exec_size * 4); 3042 for (e = p->exec_head; e; e = e->next) { 3043 *(ptr++) = e->inst[0]; 3044 if (is_long(e)) 3045 *(ptr++) = e->inst[1]; 3046 } 3047 3048 so = so_new(4,2); 3049 so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); 3050 so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0); 3051 so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0); 3052 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); 3053 3054 start = 0; count = p->exec_size; 3055 while (count) { 3056 struct nouveau_channel *chan = nv50->screen->base.channel; 3057 unsigned nr; 3058 3059 so_emit(chan, so); 3060 3061 nr = MIN2(count, 2047); 3062 nr = MIN2(chan->pushbuf->remaining, nr); 3063 if (chan->pushbuf->remaining < (nr + 3)) { 3064 FIRE_RING(chan); 3065 continue; 3066 } 3067 3068 BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1); 3069 OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); 3070 BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr); 3071 OUT_RINGp (chan, up + start, nr); 3072 3073 start += nr; 3074 count -= nr; 3075 } 3076 3077 FREE(up); 3078 so_ref(NULL, &so); 3079} 3080 3081void 3082nv50_vertprog_validate(struct nv50_context *nv50) 3083{ 3084 struct nouveau_grobj *tesla = nv50->screen->tesla; 3085 struct nv50_program *p = nv50->vertprog; 3086 struct nouveau_stateobj *so; 3087 3088 if (!p->translated) { 3089 nv50_program_validate(nv50, p); 3090 if (!p->translated) 3091 assert(0); 3092 } 3093 3094 nv50_program_validate_data(nv50, p); 3095 nv50_program_validate_code(nv50, p); 3096 3097 so = so_new(13, 2); 3098 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 3099 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 3100 NOUVEAU_BO_HIGH, 0, 0); 3101 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 3102 NOUVEAU_BO_LOW, 0, 0); 3103 so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); 3104 so_data (so, p->cfg.attr[0]); 3105 so_data (so, p->cfg.attr[1]); 3106 so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); 3107 so_data (so, p->cfg.high_result); 3108 so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); 3109 so_data (so, p->cfg.high_result); //8); 3110 so_data (so, p->cfg.high_temp); 3111 so_method(so, tesla, NV50TCL_VP_START_ID, 1); 3112 so_data (so, 0); /* program start offset */ 3113 so_ref(so, &nv50->state.vertprog); 3114 so_ref(NULL, &so); 3115} 3116 3117void 3118nv50_fragprog_validate(struct nv50_context *nv50) 3119{ 3120 struct nouveau_grobj *tesla = nv50->screen->tesla; 3121 struct nv50_program *p = nv50->fragprog; 3122 struct nouveau_stateobj *so; 3123 3124 if (!p->translated) { 3125 nv50_program_validate(nv50, p); 3126 if (!p->translated) 3127 assert(0); 3128 } 3129 3130 nv50_program_validate_data(nv50, p); 3131 nv50_program_validate_code(nv50, p); 3132 3133 so = so_new(64, 2); 3134 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 3135 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 3136 NOUVEAU_BO_HIGH, 0, 0); 3137 so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 3138 NOUVEAU_BO_LOW, 0, 0); 3139 so_method(so, tesla, NV50TCL_FP_REG_ALLOC_TEMP, 1); 3140 so_data (so, p->cfg.high_temp); 3141 so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); 3142 so_data (so, p->cfg.high_result); 3143 so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); 3144 so_data (so, p->cfg.regs[2]); 3145 so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); 3146 so_data (so, p->cfg.regs[3]); 3147 so_method(so, tesla, NV50TCL_FP_START_ID, 1); 3148 so_data (so, 0); /* program start offset */ 3149 so_ref(so, &nv50->state.fragprog); 3150 so_ref(NULL, &so); 3151} 3152 3153static void 3154nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) 3155{ 3156 struct nv50_program *fp = nv50->fragprog; 3157 struct nv50_program *vp = nv50->vertprog; 3158 unsigned i, c, m = base; 3159 3160 /* XXX: This can't work correctly in all cases yet, we either 3161 * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has 3162 * to be per FP input instead of per VP output 3163 */ 3164 memset(pntc, 0, 8 * sizeof(uint32_t)); 3165 3166 for (i = 0; i < fp->cfg.io_nr; i++) { 3167 uint8_t sn, si; 3168 uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp; 3169 unsigned n = popcnt4(fp->cfg.io[i].mask); 3170 3171 if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { 3172 m += n; 3173 continue; 3174 } 3175 3176 sn = vp->info.input_semantic_name[j]; 3177 si = vp->info.input_semantic_index[j]; 3178 3179 if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) { 3180 ubyte mode = 3181 nv50->rasterizer->pipe.sprite_coord_mode[si]; 3182 3183 if (mode == PIPE_SPRITE_COORD_NONE) { 3184 m += n; 3185 continue; 3186 } 3187 } 3188 3189 /* this is either PointCoord or replaced by sprite coords */ 3190 for (c = 0; c < 4; c++) { 3191 if (!(fp->cfg.io[i].mask & (1 << c))) 3192 continue; 3193 pntc[m / 8] |= (c + 1) << ((m % 8) * 4); 3194 ++m; 3195 } 3196 } 3197} 3198 3199static int 3200nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], 3201 struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) 3202{ 3203 int c; 3204 uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; 3205 uint8_t *map = (uint8_t *)p_map; 3206 3207 for (c = 0; c < 4; ++c) { 3208 if (mf & 1) { 3209 if (fpi->linear == TRUE) 3210 lin[mid / 32] |= 1 << (mid % 32); 3211 map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); 3212 } 3213 3214 oid += mv & 1; 3215 mf >>= 1; 3216 mv >>= 1; 3217 } 3218 3219 return mid; 3220} 3221 3222void 3223nv50_linkage_validate(struct nv50_context *nv50) 3224{ 3225 struct nouveau_grobj *tesla = nv50->screen->tesla; 3226 struct nv50_program *vp = nv50->vertprog; 3227 struct nv50_program *fp = nv50->fragprog; 3228 struct nouveau_stateobj *so; 3229 struct nv50_sreg4 dummy, *vpo; 3230 int i, n, c, m = 0; 3231 uint32_t map[16], lin[4], reg[5], pcrd[8]; 3232 3233 memset(map, 0, sizeof(map)); 3234 memset(lin, 0, sizeof(lin)); 3235 3236 reg[1] = 0x00000004; /* low and high clip distance map ids */ 3237 reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ 3238 reg[3] = 0x00000000; /* point size map id & enable */ 3239 reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ 3240 reg[4] = fp->cfg.regs[1]; /* interpolant info */ 3241 3242 dummy.linear = FALSE; 3243 dummy.mask = 0xf; /* map all components of HPOS */ 3244 m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); 3245 3246 dummy.mask = 0x0; 3247 3248 if (vp->cfg.clpd < 0x40) { 3249 for (c = 0; c < vp->cfg.clpd_nr; ++c) 3250 map[m++] = vp->cfg.clpd + c; 3251 reg[1] = (m << 8); 3252 } 3253 3254 reg[0] |= m << 8; /* adjust BFC0 id */ 3255 3256 /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ 3257 if (nv50->rasterizer->pipe.light_twoside) { 3258 vpo = &vp->cfg.two_side[0]; 3259 3260 m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); 3261 m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); 3262 } 3263 3264 reg[0] += m - 4; /* adjust FFC0 id */ 3265 reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ 3266 3267 i = 0; 3268 if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) 3269 i = 1; 3270 for (; i < fp->cfg.io_nr; i++) { 3271 ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp]; 3272 ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp]; 3273 3274 n = fp->cfg.io[i].id_vp; 3275 if (n >= vp->cfg.io_nr || 3276 vp->info.output_semantic_name[n] != sn || 3277 vp->info.output_semantic_index[n] != si) 3278 vpo = &dummy; 3279 else 3280 vpo = &vp->cfg.io[n]; 3281 3282 m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); 3283 } 3284 3285 if (nv50->rasterizer->pipe.point_size_per_vertex) { 3286 map[m / 4] |= vp->cfg.psiz << ((m % 4) * 8); 3287 reg[3] = (m++ << 4) | 1; 3288 } 3289 3290 /* now fill the stateobj */ 3291 so = so_new(64, 0); 3292 3293 n = (m + 3) / 4; 3294 so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); 3295 so_data (so, m); 3296 so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); 3297 so_datap (so, map, n); 3298 3299 so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); 3300 so_datap (so, reg, 4); 3301 3302 so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); 3303 so_data (so, reg[4]); 3304 3305 so_method(so, tesla, 0x1540, 4); 3306 so_datap (so, lin, 4); 3307 3308 if (nv50->rasterizer->pipe.point_sprite) { 3309 nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); 3310 3311 so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); 3312 so_datap (so, pcrd, 8); 3313 } 3314 3315 so_ref(so, &nv50->state.programs); 3316 so_ref(NULL, &so); 3317} 3318 3319void 3320nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 3321{ 3322 while (p->exec_head) { 3323 struct nv50_program_exec *e = p->exec_head; 3324 3325 p->exec_head = e->next; 3326 FREE(e); 3327 } 3328 p->exec_tail = NULL; 3329 p->exec_size = 0; 3330 3331 nouveau_bo_ref(NULL, &p->bo); 3332 3333 nouveau_resource_free(&p->data[0]); 3334 3335 p->translated = 0; 3336} 3337