nv50_program.c revision 713ef6ccd2590bd866598bb6d4f646e9ec29ba78
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL - gallium craps itself on progs/vp/arl.txt 16 * 17 * MSB - Like MAD, but MUL+SUB 18 * - Fuck it off, introduce a way to negate args for ops that 19 * support it. 20 * 21 * Look into inlining IMMD for ops other than MOV (make it general?) 22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 24 * 25 * Verify half-insns work where expected - and force disable them where they 26 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 27 * 28 * FUCK! watch dst==src vectors, can overwrite components that are needed. 29 * ie. SUB R0, R0.yzxw, R0 30 * 31 * Things to check with renouveau: 32 * SGE/SLT with needed src0/1 swap 33 * FP attr/result assignment - how? 34 * FP/VP constbuf usage 35 */ 36struct nv50_reg { 37 enum { 38 P_TEMP, 39 P_ATTR, 40 P_RESULT, 41 P_CONST, 42 P_IMMD 43 } type; 44 int index; 45 46 int hw; 47 int neg; 48}; 49 50struct nv50_pc { 51 struct nv50_program *p; 52 53 /* hw resources */ 54 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 55 56 /* tgsi resources */ 57 struct nv50_reg *temp; 58 int temp_nr; 59 struct nv50_reg *attr; 60 int attr_nr; 61 struct nv50_reg *result; 62 int result_nr; 63 struct nv50_reg *param; 64 int param_nr; 65 struct nv50_reg *immd; 66 float *immd_buf; 67 int immd_nr; 68 69 struct nv50_reg *temp_temp[8]; 70 unsigned temp_temp_nr; 71}; 72 73static void 74alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 75{ 76 int i; 77 78 if (reg->type != P_TEMP) 79 return; 80 81 if (reg->hw >= 0) { 82 /*XXX: do this here too to catch FP temp-as-attr usage.. 83 * not clean, but works */ 84 if (pc->p->cfg.high_temp < (reg->hw + 1)) 85 pc->p->cfg.high_temp = reg->hw + 1; 86 return; 87 } 88 89 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 90 if (!(pc->r_temp[i])) { 91 pc->r_temp[i] = reg; 92 reg->hw = i; 93 if (pc->p->cfg.high_temp < (i + 1)) 94 pc->p->cfg.high_temp = i + 1; 95 return; 96 } 97 } 98 99 assert(0); 100} 101 102static struct nv50_reg * 103alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 104{ 105 struct nv50_reg *r; 106 int i; 107 108 if (dst && dst->type == P_TEMP && dst->hw == -1) 109 return dst; 110 111 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 112 if (!pc->r_temp[i]) { 113 r = CALLOC_STRUCT(nv50_reg); 114 r->type = P_TEMP; 115 r->index = -1; 116 r->hw = i; 117 pc->r_temp[i] = r; 118 return r; 119 } 120 } 121 122 assert(0); 123 return NULL; 124} 125 126static void 127free_temp(struct nv50_pc *pc, struct nv50_reg *r) 128{ 129 if (r->index == -1) { 130 FREE(pc->r_temp[r->hw]); 131 pc->r_temp[r->hw] = NULL; 132 } 133} 134 135static struct nv50_reg * 136temp_temp(struct nv50_pc *pc) 137{ 138 if (pc->temp_temp_nr >= 8) 139 assert(0); 140 141 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 142 return pc->temp_temp[pc->temp_temp_nr++]; 143} 144 145static void 146kill_temp_temp(struct nv50_pc *pc) 147{ 148 int i; 149 150 for (i = 0; i < pc->temp_temp_nr; i++) 151 free_temp(pc, pc->temp_temp[i]); 152 pc->temp_temp_nr = 0; 153} 154 155static int 156ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 157{ 158 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 159 sizeof(float)); 160 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 161 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 162 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 163 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 164 165 return pc->immd_nr++; 166} 167 168static struct nv50_reg * 169alloc_immd(struct nv50_pc *pc, float f) 170{ 171 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 172 unsigned hw; 173 174 hw = ctor_immd(pc, f, 0, 0, 0) * 4; 175 r->type = P_IMMD; 176 r->hw = hw; 177 r->index = -1; 178 return r; 179} 180 181static void 182emit(struct nv50_pc *pc, unsigned *inst) 183{ 184 struct nv50_program *p = pc->p; 185 186 if (inst[0] & 1) { 187 p->insns_nr += 2; 188 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 189 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 190 } else { 191 p->insns_nr += 1; 192 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 193 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 194 } 195} 196 197static INLINE void set_long(struct nv50_pc *, unsigned *); 198 199static boolean 200is_long(unsigned *inst) 201{ 202 if (inst[0] & 1) 203 return TRUE; 204 return FALSE; 205} 206 207static boolean 208is_immd(unsigned *inst) 209{ 210 if (is_long(inst) && (inst[1] & 3) == 3) 211 return TRUE; 212 return FALSE; 213} 214 215static INLINE void 216set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 217{ 218 set_long(pc, inst); 219 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 220 inst[1] |= (pred << 7) | (idx << 12); 221} 222 223static INLINE void 224set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 225{ 226 set_long(pc, inst); 227 inst[1] &= ~((0x3 << 4) | (1 << 6)); 228 inst[1] |= (idx << 4) | (on << 6); 229} 230 231static INLINE void 232set_long(struct nv50_pc *pc, unsigned *inst) 233{ 234 if (is_long(inst)) 235 return; 236 237 inst[0] |= 1; 238 set_pred(pc, 0xf, 0, inst); 239 set_pred_wr(pc, 0, 0, inst); 240} 241 242static INLINE void 243set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 244{ 245 if (dst->type == P_RESULT) { 246 set_long(pc, inst); 247 inst[1] |= 0x00000008; 248 } 249 250 alloc_reg(pc, dst); 251 inst[0] |= (dst->hw << 2); 252} 253 254static INLINE void 255set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 256{ 257 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 258 259 set_long(pc, inst); 260 /*XXX: can't be predicated - bits overlap.. catch cases where both 261 * are required and avoid them. */ 262 set_pred(pc, 0, 0, inst); 263 set_pred_wr(pc, 0, 0, inst); 264 265 inst[1] |= 0x00000002 | 0x00000001; 266 inst[0] |= (val & 0x3f) << 16; 267 inst[1] |= (val >> 6) << 2; 268} 269 270static void 271emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 272 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 273{ 274 unsigned inst[2] = { 0, 0 }; 275 276 inst[0] |= 0x80000000; 277 set_dst(pc, dst, inst); 278 alloc_reg(pc, iv); 279 inst[0] |= (iv->hw << 9); 280 alloc_reg(pc, src); 281 inst[0] |= (src->hw << 16); 282 if (noperspective) 283 inst[0] |= (1 << 25); 284 285 emit(pc, inst); 286} 287 288static void 289set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 290{ 291 set_long(pc, inst); 292 if (src->type == P_IMMD) { 293 inst[1] |= (NV50_CB_PMISC << 22); 294 } else { 295 if (pc->p->type == PIPE_SHADER_VERTEX) 296 inst[1] |= (NV50_CB_PVP << 22); 297 else 298 inst[1] |= (NV50_CB_PFP << 22); 299 } 300} 301 302static void 303emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 304{ 305 unsigned inst[2] = { 0, 0 }; 306 307 inst[0] |= 0x10000000; 308 309 set_dst(pc, dst, inst); 310 311 if (dst->type != P_RESULT && src->type == P_IMMD) { 312 set_immd(pc, src, inst); 313 /*XXX: 32-bit, but steals part of "half" reg space - need to 314 * catch and handle this case if/when we do half-regs 315 */ 316 inst[0] |= 0x00008000; 317 } else 318 if (src->type == P_IMMD || src->type == P_CONST) { 319 set_long(pc, inst); 320 set_cseg(pc, src, inst); 321 inst[0] |= (src->hw << 9); 322 inst[1] |= 0x20000000; /* src0 const? */ 323 } else { 324 if (src->type == P_ATTR) { 325 set_long(pc, inst); 326 inst[1] |= 0x00200000; 327 } 328 329 alloc_reg(pc, src); 330 inst[0] |= (src->hw << 9); 331 } 332 333 /* We really should support "half" instructions here at some point, 334 * but I don't feel confident enough about them yet. 335 */ 336 set_long(pc, inst); 337 if (is_long(inst) && !is_immd(inst)) { 338 inst[1] |= 0x04000000; /* 32-bit */ 339 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 340 } 341 342 emit(pc, inst); 343} 344 345static boolean 346check_swap_src_0_1(struct nv50_pc *pc, 347 struct nv50_reg **s0, struct nv50_reg **s1) 348{ 349 struct nv50_reg *src0 = *s0, *src1 = *s1; 350 351 if (src0->type == P_CONST) { 352 if (src1->type != P_CONST) { 353 *s0 = src1; 354 *s1 = src0; 355 return TRUE; 356 } 357 } else 358 if (src1->type == P_ATTR) { 359 if (src0->type != P_ATTR) { 360 *s0 = src1; 361 *s1 = src0; 362 return TRUE; 363 } 364 } 365 366 return FALSE; 367} 368 369static void 370set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 371{ 372 if (src->type == P_ATTR) { 373 set_long(pc, inst); 374 inst[1] |= 0x00200000; 375 } else 376 if (src->type == P_CONST || src->type == P_IMMD) { 377 struct nv50_reg *temp = temp_temp(pc); 378 379 emit_mov(pc, temp, src); 380 src = temp; 381 } 382 383 alloc_reg(pc, src); 384 inst[0] |= (src->hw << 9); 385} 386 387static void 388set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 389{ 390 if (src->type == P_ATTR) { 391 struct nv50_reg *temp = temp_temp(pc); 392 393 emit_mov(pc, temp, src); 394 src = temp; 395 } else 396 if (src->type == P_CONST || src->type == P_IMMD) { 397 assert(!(inst[0] & 0x00800000)); 398 if (inst[0] & 0x01000000) { 399 struct nv50_reg *temp = temp_temp(pc); 400 401 emit_mov(pc, temp, src); 402 src = temp; 403 } else { 404 set_cseg(pc, src, inst); 405 inst[0] |= 0x00800000; 406 } 407 } 408 409 alloc_reg(pc, src); 410 inst[0] |= (src->hw << 16); 411} 412 413static void 414set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 415{ 416 set_long(pc, inst); 417 418 if (src->type == P_ATTR) { 419 struct nv50_reg *temp = temp_temp(pc); 420 421 emit_mov(pc, temp, src); 422 src = temp; 423 } else 424 if (src->type == P_CONST || src->type == P_IMMD) { 425 assert(!(inst[0] & 0x01000000)); 426 if (inst[0] & 0x00800000) { 427 struct nv50_reg *temp = temp_temp(pc); 428 429 emit_mov(pc, temp, src); 430 src = temp; 431 } else { 432 set_cseg(pc, src, inst); 433 inst[0] |= 0x01000000; 434 } 435 } 436 437 alloc_reg(pc, src); 438 inst[1] |= (src->hw << 14); 439} 440 441static void 442emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 443 struct nv50_reg *src1) 444{ 445 unsigned inst[2] = { 0, 0 }; 446 447 inst[0] |= 0xc0000000; 448 set_long(pc, inst); 449 450 check_swap_src_0_1(pc, &src0, &src1); 451 set_dst(pc, dst, inst); 452 set_src_0(pc, src0, inst); 453 set_src_1(pc, src1, inst); 454 455 emit(pc, inst); 456} 457 458static void 459emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 460 struct nv50_reg *src0, struct nv50_reg *src1) 461{ 462 unsigned inst[2] = { 0, 0 }; 463 464 inst[0] |= 0xb0000000; 465 466 check_swap_src_0_1(pc, &src0, &src1); 467 set_dst(pc, dst, inst); 468 set_src_0(pc, src0, inst); 469 if (is_long(inst)) 470 set_src_2(pc, src1, inst); 471 else 472 set_src_1(pc, src1, inst); 473 474 emit(pc, inst); 475} 476 477static void 478emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 479 struct nv50_reg *src0, struct nv50_reg *src1) 480{ 481 unsigned inst[2] = { 0, 0 }; 482 483 set_long(pc, inst); 484 inst[0] |= 0xb0000000; 485 inst[1] |= (sub << 29); 486 487 check_swap_src_0_1(pc, &src0, &src1); 488 set_dst(pc, dst, inst); 489 set_src_0(pc, src0, inst); 490 set_src_1(pc, src1, inst); 491 492 emit(pc, inst); 493} 494 495static void 496emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 497 struct nv50_reg *src1) 498{ 499 unsigned inst[2] = { 0, 0 }; 500 501 inst[0] |= 0xb0000000; 502 503 set_long(pc, inst); 504 if (check_swap_src_0_1(pc, &src0, &src1)) 505 inst[1] |= 0x04000000; 506 else 507 inst[1] |= 0x08000000; 508 509 set_dst(pc, dst, inst); 510 set_src_0(pc, src0, inst); 511 set_src_2(pc, src1, inst); 512 513 emit(pc, inst); 514} 515 516static void 517emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 518 struct nv50_reg *src1, struct nv50_reg *src2) 519{ 520 unsigned inst[2] = { 0, 0 }; 521 522 inst[0] |= 0xe0000000; 523 524 check_swap_src_0_1(pc, &src0, &src1); 525 set_dst(pc, dst, inst); 526 set_src_0(pc, src0, inst); 527 set_src_1(pc, src1, inst); 528 set_src_2(pc, src2, inst); 529 530 emit(pc, inst); 531} 532 533static void 534emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 535 struct nv50_reg *src1, struct nv50_reg *src2) 536{ 537 unsigned inst[2] = { 0, 0 }; 538 539 inst[0] |= 0xe0000000; 540 set_long(pc, inst); 541 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 542 543 check_swap_src_0_1(pc, &src0, &src1); 544 set_dst(pc, dst, inst); 545 set_src_0(pc, src0, inst); 546 set_src_1(pc, src1, inst); 547 set_src_2(pc, src2, inst); 548 549 emit(pc, inst); 550} 551 552static void 553emit_flop(struct nv50_pc *pc, unsigned sub, 554 struct nv50_reg *dst, struct nv50_reg *src) 555{ 556 unsigned inst[2] = { 0, 0 }; 557 558 inst[0] |= 0x90000000; 559 if (sub) { 560 set_long(pc, inst); 561 inst[1] |= (sub << 29); 562 } 563 564 set_dst(pc, dst, inst); 565 set_src_0(pc, src, inst); 566 567 emit(pc, inst); 568} 569 570static void 571emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 572{ 573 unsigned inst[2] = { 0, 0 }; 574 575 inst[0] |= 0xb0000000; 576 577 set_dst(pc, dst, inst); 578 set_src_0(pc, src, inst); 579 set_long(pc, inst); 580 inst[1] |= (6 << 29) | 0x00004000; 581 582 emit(pc, inst); 583} 584 585/*XXX: inaccurate results.. why? */ 586#define ALLOW_SET_SWAP 0 587 588static void 589emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 590 struct nv50_reg *src0, struct nv50_reg *src1) 591{ 592 unsigned inst[2] = { 0, 0 }; 593#if ALLOW_SET_SWAP 594 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; 595#endif 596 struct nv50_reg *rdst; 597 598#if ALLOW_SET_SWAP 599 assert(c_op <= 7); 600 if (check_swap_src_0_1(pc, &src0, &src1)) 601 c_op = inv_cop[c_op]; 602#endif 603 604 rdst = dst; 605 if (dst->type != P_TEMP) 606 dst = alloc_temp(pc, NULL); 607 608 /* set.u32 */ 609 set_long(pc, inst); 610 inst[0] |= 0xb0000000; 611 inst[1] |= (3 << 29); 612 inst[1] |= (c_op << 14); 613 /*XXX: breaks things, .u32 by default? 614 * decuda will disasm as .u16 and use .lo/.hi regs, but this 615 * doesn't seem to match what the hw actually does. 616 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 617 */ 618 set_dst(pc, dst, inst); 619 set_src_0(pc, src0, inst); 620 set_src_1(pc, src1, inst); 621 emit(pc, inst); 622 623 /* cvt.f32.u32 */ 624 inst[0] = 0xa0000001; 625 inst[1] = 0x64014780; 626 set_dst(pc, rdst, inst); 627 set_src_0(pc, dst, inst); 628 emit(pc, inst); 629 630 if (dst != rdst) 631 free_temp(pc, dst); 632} 633 634static void 635emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 636{ 637 unsigned inst[2] = { 0, 0 }; 638 639 inst[0] = 0xa0000000; /* cvt */ 640 set_long(pc, inst); 641 inst[1] |= (6 << 29); /* cvt */ 642 inst[1] |= 0x08000000; /* integer mode */ 643 inst[1] |= 0x04000000; /* 32 bit */ 644 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 645 inst[1] |= (1 << 14); /* src .f32 */ 646 set_dst(pc, dst, inst); 647 set_src_0(pc, src, inst); 648 649 emit(pc, inst); 650} 651 652static void 653emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 654 struct nv50_reg *v, struct nv50_reg *e) 655{ 656 struct nv50_reg *temp = alloc_temp(pc, NULL); 657 658 emit_flop(pc, 3, temp, v); 659 emit_mul(pc, temp, temp, e); 660 emit_preex2(pc, temp, temp); 661 emit_flop(pc, 6, dst, temp); 662 663 free_temp(pc, temp); 664} 665 666static void 667emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 668{ 669 unsigned inst[2] = { 0, 0 }; 670 671 inst[0] = 0xa0000000; /* cvt */ 672 set_long(pc, inst); 673 inst[1] |= (6 << 29); /* cvt */ 674 inst[1] |= 0x04000000; /* 32 bit */ 675 inst[1] |= (1 << 14); /* src .f32 */ 676 inst[1] |= ((1 << 6) << 14); /* .abs */ 677 set_dst(pc, dst, inst); 678 set_src_0(pc, src, inst); 679 680 emit(pc, inst); 681} 682 683static void 684emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) 685{ 686 struct nv50_reg *one = alloc_immd(pc, 1.0); 687 struct nv50_reg *zero = alloc_immd(pc, 0.0); 688 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 689 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 690 struct nv50_reg *tmp[4]; 691 692 emit_mov(pc, dst[0], one); 693 emit_mov(pc, dst[3], one); 694 695 tmp[0] = temp_temp(pc); 696 emit_minmax(pc, 4, dst[1], src[0], zero); 697 set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); 698 699 tmp[1] = temp_temp(pc); 700 emit_minmax(pc, 4, tmp[1], src[1], zero); 701 702 tmp[3] = temp_temp(pc); 703 emit_minmax(pc, 4, tmp[3], src[3], neg128); 704 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 705 706 emit_pow(pc, dst[2], tmp[1], tmp[3]); 707 emit_mov(pc, dst[2], zero); 708 set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); 709} 710 711static struct nv50_reg * 712tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 713{ 714 switch (dst->DstRegister.File) { 715 case TGSI_FILE_TEMPORARY: 716 return &pc->temp[dst->DstRegister.Index * 4 + c]; 717 case TGSI_FILE_OUTPUT: 718 return &pc->result[dst->DstRegister.Index * 4 + c]; 719 case TGSI_FILE_NULL: 720 return NULL; 721 default: 722 break; 723 } 724 725 return NULL; 726} 727 728static struct nv50_reg * 729tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 730{ 731 struct nv50_reg *r = NULL; 732 struct nv50_reg *temp; 733 unsigned c; 734 735 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 736 switch (c) { 737 case TGSI_EXTSWIZZLE_X: 738 case TGSI_EXTSWIZZLE_Y: 739 case TGSI_EXTSWIZZLE_Z: 740 case TGSI_EXTSWIZZLE_W: 741 switch (src->SrcRegister.File) { 742 case TGSI_FILE_INPUT: 743 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 744 break; 745 case TGSI_FILE_TEMPORARY: 746 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 747 break; 748 case TGSI_FILE_CONSTANT: 749 r = &pc->param[src->SrcRegister.Index * 4 + c]; 750 break; 751 case TGSI_FILE_IMMEDIATE: 752 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 753 break; 754 default: 755 assert(0); 756 break; 757 } 758 break; 759 case TGSI_EXTSWIZZLE_ZERO: 760 r = alloc_immd(pc, 0.0); 761 break; 762 case TGSI_EXTSWIZZLE_ONE: 763 r = alloc_immd(pc, 1.0); 764 break; 765 default: 766 assert(0); 767 break; 768 } 769 770 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 771 case TGSI_UTIL_SIGN_KEEP: 772 break; 773 case TGSI_UTIL_SIGN_CLEAR: 774 temp = temp_temp(pc); 775 emit_abs(pc, temp, r); 776 r = temp; 777 break; 778 default: 779 assert(0); 780 break; 781 } 782 783 return r; 784} 785 786static boolean 787nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 788{ 789 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 790 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 791 unsigned mask, sat; 792 int i, c; 793 794 NOUVEAU_ERR("insn %p\n", tok); 795 796 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 797 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 798 799 for (c = 0; c < 4; c++) { 800 if (mask & (1 << c)) 801 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 802 else 803 dst[c] = NULL; 804 } 805 806 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 807 for (c = 0; c < 4; c++) 808 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 809 } 810 811 if (sat) { 812 for (c = 0; c < 4; c++) { 813 rdst[c] = dst[c]; 814 dst[c] = temp_temp(pc); 815 } 816 } 817 818 switch (inst->Instruction.Opcode) { 819 case TGSI_OPCODE_ABS: 820 for (c = 0; c < 4; c++) { 821 if (!(mask & (1 << c))) 822 continue; 823 emit_abs(pc, dst[c], src[0][c]); 824 } 825 break; 826 case TGSI_OPCODE_ADD: 827 for (c = 0; c < 4; c++) { 828 if (!(mask & (1 << c))) 829 continue; 830 emit_add(pc, dst[c], src[0][c], src[1][c]); 831 } 832 break; 833 case TGSI_OPCODE_COS: 834 for (c = 0; c < 4; c++) { 835 if (!(mask & (1 << c))) 836 continue; 837 emit_flop(pc, 5, dst[c], src[0][c]); 838 } 839 break; 840 case TGSI_OPCODE_DP3: 841 temp = alloc_temp(pc, NULL); 842 emit_mul(pc, temp, src[0][0], src[1][0]); 843 emit_mad(pc, temp, src[0][1], src[1][1], temp); 844 emit_mad(pc, temp, src[0][2], src[1][2], temp); 845 for (c = 0; c < 4; c++) { 846 if (!(mask & (1 << c))) 847 continue; 848 emit_mov(pc, dst[c], temp); 849 } 850 free_temp(pc, temp); 851 break; 852 case TGSI_OPCODE_DP4: 853 temp = alloc_temp(pc, NULL); 854 emit_mul(pc, temp, src[0][0], src[1][0]); 855 emit_mad(pc, temp, src[0][1], src[1][1], temp); 856 emit_mad(pc, temp, src[0][2], src[1][2], temp); 857 emit_mad(pc, temp, src[0][3], src[1][3], temp); 858 for (c = 0; c < 4; c++) { 859 if (!(mask & (1 << c))) 860 continue; 861 emit_mov(pc, dst[c], temp); 862 } 863 free_temp(pc, temp); 864 break; 865 case TGSI_OPCODE_DPH: 866 temp = alloc_temp(pc, NULL); 867 emit_mul(pc, temp, src[0][0], src[1][0]); 868 emit_mad(pc, temp, src[0][1], src[1][1], temp); 869 emit_mad(pc, temp, src[0][2], src[1][2], temp); 870 emit_add(pc, temp, src[1][3], temp); 871 for (c = 0; c < 4; c++) { 872 if (!(mask & (1 << c))) 873 continue; 874 emit_mov(pc, dst[c], temp); 875 } 876 free_temp(pc, temp); 877 break; 878 case TGSI_OPCODE_DST: 879 { 880 struct nv50_reg *one = alloc_immd(pc, 1.0); 881 if (mask & (1 << 0)) 882 emit_mov(pc, dst[0], one); 883 if (mask & (1 << 1)) 884 emit_mul(pc, dst[1], src[0][1], src[1][1]); 885 if (mask & (1 << 2)) 886 emit_mov(pc, dst[2], src[0][2]); 887 if (mask & (1 << 3)) 888 emit_mov(pc, dst[3], src[1][3]); 889 FREE(one); 890 } 891 break; 892 case TGSI_OPCODE_EX2: 893 temp = alloc_temp(pc, NULL); 894 for (c = 0; c < 4; c++) { 895 if (!(mask & (1 << c))) 896 continue; 897 emit_preex2(pc, temp, src[0][c]); 898 emit_flop(pc, 6, dst[c], temp); 899 } 900 free_temp(pc, temp); 901 break; 902 case TGSI_OPCODE_FLR: 903 for (c = 0; c < 4; c++) { 904 if (!(mask & (1 << c))) 905 continue; 906 emit_flr(pc, dst[c], src[0][c]); 907 } 908 break; 909 case TGSI_OPCODE_FRC: 910 temp = alloc_temp(pc, NULL); 911 for (c = 0; c < 4; c++) { 912 if (!(mask & (1 << c))) 913 continue; 914 emit_flr(pc, temp, src[0][c]); 915 emit_sub(pc, dst[c], src[0][c], temp); 916 } 917 free_temp(pc, temp); 918 break; 919 case TGSI_OPCODE_LIT: 920 /*XXX: writemask */ 921 emit_lit(pc, &dst[0], &src[0][0]); 922 break; 923 case TGSI_OPCODE_LG2: 924 for (c = 0; c < 4; c++) { 925 if (!(mask & (1 << c))) 926 continue; 927 emit_flop(pc, 3, dst[c], src[0][c]); 928 } 929 break; 930 case TGSI_OPCODE_MAD: 931 for (c = 0; c < 4; c++) { 932 if (!(mask & (1 << c))) 933 continue; 934 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 935 } 936 break; 937 case TGSI_OPCODE_MAX: 938 for (c = 0; c < 4; c++) { 939 if (!(mask & (1 << c))) 940 continue; 941 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 942 } 943 break; 944 case TGSI_OPCODE_MIN: 945 for (c = 0; c < 4; c++) { 946 if (!(mask & (1 << c))) 947 continue; 948 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 949 } 950 break; 951 case TGSI_OPCODE_MOV: 952 for (c = 0; c < 4; c++) { 953 if (!(mask & (1 << c))) 954 continue; 955 emit_mov(pc, dst[c], src[0][c]); 956 } 957 break; 958 case TGSI_OPCODE_MUL: 959 for (c = 0; c < 4; c++) { 960 if (!(mask & (1 << c))) 961 continue; 962 emit_mul(pc, dst[c], src[0][c], src[1][c]); 963 } 964 break; 965 case TGSI_OPCODE_POW: 966 temp = alloc_temp(pc, NULL); 967 emit_pow(pc, temp, src[0][0], src[1][0]); 968 for (c = 0; c < 4; c++) { 969 if (!(mask & (1 << c))) 970 continue; 971 emit_mov(pc, dst[c], temp); 972 } 973 free_temp(pc, temp); 974 break; 975 case TGSI_OPCODE_RCP: 976 for (c = 0; c < 4; c++) { 977 if (!(mask & (1 << c))) 978 continue; 979 emit_flop(pc, 0, dst[c], src[0][c]); 980 } 981 break; 982 case TGSI_OPCODE_RSQ: 983 for (c = 0; c < 4; c++) { 984 if (!(mask & (1 << c))) 985 continue; 986 emit_flop(pc, 2, dst[c], src[0][c]); 987 } 988 break; 989 case TGSI_OPCODE_SGE: 990 for (c = 0; c < 4; c++) { 991 if (!(mask & (1 << c))) 992 continue; 993 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 994 } 995 break; 996 case TGSI_OPCODE_SIN: 997 for (c = 0; c < 4; c++) { 998 if (!(mask & (1 << c))) 999 continue; 1000 emit_flop(pc, 4, dst[c], src[0][c]); 1001 } 1002 break; 1003 case TGSI_OPCODE_SLT: 1004 for (c = 0; c < 4; c++) { 1005 if (!(mask & (1 << c))) 1006 continue; 1007 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 1008 } 1009 break; 1010 case TGSI_OPCODE_SUB: 1011 for (c = 0; c < 4; c++) { 1012 if (!(mask & (1 << c))) 1013 continue; 1014 emit_sub(pc, dst[c], src[0][c], src[1][c]); 1015 } 1016 break; 1017 case TGSI_OPCODE_XPD: 1018 temp = alloc_temp(pc, NULL); 1019 if (mask & (1 << 0)) { 1020 emit_mul(pc, temp, src[0][2], src[1][1]); 1021 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 1022 } 1023 if (mask & (1 << 1)) { 1024 emit_mul(pc, temp, src[0][0], src[1][2]); 1025 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 1026 } 1027 if (mask & (1 << 2)) { 1028 emit_mul(pc, temp, src[0][1], src[1][0]); 1029 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 1030 } 1031 free_temp(pc, temp); 1032 break; 1033 case TGSI_OPCODE_END: 1034 break; 1035 default: 1036 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 1037 return FALSE; 1038 } 1039 1040 if (sat) { 1041 for (c = 0; c < 4; c++) { 1042 unsigned inst[2] = { 0, 0 }; 1043 1044 if (!(mask & (1 << c))) 1045 continue; 1046 1047 inst[0] = 0xa0000000; /* cvt */ 1048 set_long(pc, inst); 1049 inst[1] |= (6 << 29); /* cvt */ 1050 inst[1] |= 0x04000000; /* 32 bit */ 1051 inst[1] |= (1 << 14); /* src .f32 */ 1052 inst[1] |= ((1 << 5) << 14); /* .sat */ 1053 set_dst(pc, rdst[c], inst); 1054 set_src_0(pc, dst[c], inst); 1055 emit(pc, inst); 1056 } 1057 } 1058 1059 kill_temp_temp(pc); 1060 return TRUE; 1061} 1062 1063static boolean 1064nv50_program_tx_prep(struct nv50_pc *pc) 1065{ 1066 struct tgsi_parse_context p; 1067 boolean ret = FALSE; 1068 unsigned i, c; 1069 1070 tgsi_parse_init(&p, pc->p->pipe.tokens); 1071 while (!tgsi_parse_end_of_tokens(&p)) { 1072 const union tgsi_full_token *tok = &p.FullToken; 1073 1074 tgsi_parse_token(&p); 1075 switch (tok->Token.Type) { 1076 case TGSI_TOKEN_TYPE_IMMEDIATE: 1077 { 1078 const struct tgsi_full_immediate *imm = 1079 &p.FullToken.FullImmediate; 1080 1081 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1082 imm->u.ImmediateFloat32[1].Float, 1083 imm->u.ImmediateFloat32[2].Float, 1084 imm->u.ImmediateFloat32[3].Float); 1085 } 1086 break; 1087 case TGSI_TOKEN_TYPE_DECLARATION: 1088 { 1089 const struct tgsi_full_declaration *d; 1090 unsigned last; 1091 1092 d = &p.FullToken.FullDeclaration; 1093 last = d->u.DeclarationRange.Last; 1094 1095 switch (d->Declaration.File) { 1096 case TGSI_FILE_TEMPORARY: 1097 if (pc->temp_nr < (last + 1)) 1098 pc->temp_nr = last + 1; 1099 break; 1100 case TGSI_FILE_OUTPUT: 1101 if (pc->result_nr < (last + 1)) 1102 pc->result_nr = last + 1; 1103 break; 1104 case TGSI_FILE_INPUT: 1105 if (pc->attr_nr < (last + 1)) 1106 pc->attr_nr = last + 1; 1107 break; 1108 case TGSI_FILE_CONSTANT: 1109 if (pc->param_nr < (last + 1)) 1110 pc->param_nr = last + 1; 1111 break; 1112 default: 1113 NOUVEAU_ERR("bad decl file %d\n", 1114 d->Declaration.File); 1115 goto out_err; 1116 } 1117 } 1118 break; 1119 case TGSI_TOKEN_TYPE_INSTRUCTION: 1120 break; 1121 default: 1122 break; 1123 } 1124 } 1125 1126 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 1127 if (pc->temp_nr) { 1128 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1129 if (!pc->temp) 1130 goto out_err; 1131 1132 for (i = 0; i < pc->temp_nr; i++) { 1133 for (c = 0; c < 4; c++) { 1134 pc->temp[i*4+c].type = P_TEMP; 1135 pc->temp[i*4+c].hw = -1; 1136 pc->temp[i*4+c].index = i; 1137 } 1138 } 1139 } 1140 1141 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1142 if (pc->attr_nr) { 1143 struct nv50_reg *iv = NULL, *tmp = NULL; 1144 int aid = 0; 1145 1146 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1147 if (!pc->attr) 1148 goto out_err; 1149 1150 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1151 iv = alloc_temp(pc, NULL); 1152 aid++; 1153 } 1154 1155 for (i = 0; i < pc->attr_nr; i++) { 1156 struct nv50_reg *a = &pc->attr[i*4]; 1157 1158 for (c = 0; c < 4; c++) { 1159 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1160 struct nv50_reg *at = 1161 alloc_temp(pc, NULL); 1162 pc->attr[i*4+c].type = at->type; 1163 pc->attr[i*4+c].hw = at->hw; 1164 pc->attr[i*4+c].index = at->index; 1165 } else { 1166 pc->p->cfg.vp.attr[aid/32] |= 1167 (1 << (aid % 32)); 1168 pc->attr[i*4+c].type = P_ATTR; 1169 pc->attr[i*4+c].hw = aid++; 1170 pc->attr[i*4+c].index = i; 1171 } 1172 } 1173 1174 if (pc->p->type != PIPE_SHADER_FRAGMENT) 1175 continue; 1176 1177 emit_interp(pc, iv, iv, iv, FALSE); 1178 tmp = alloc_temp(pc, NULL); 1179 emit_flop(pc, 0, tmp, iv); 1180 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1181 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1182 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1183 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1184 free_temp(pc, tmp); 1185 } 1186 1187 if (iv) 1188 free_temp(pc, iv); 1189 } 1190 1191 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1192 if (pc->result_nr) { 1193 int rid = 0; 1194 1195 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1196 if (!pc->result) 1197 goto out_err; 1198 1199 for (i = 0; i < pc->result_nr; i++) { 1200 for (c = 0; c < 4; c++) { 1201 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1202 pc->result[i*4+c].type = P_TEMP; 1203 pc->result[i*4+c].hw = -1; 1204 } else { 1205 pc->result[i*4+c].type = P_RESULT; 1206 pc->result[i*4+c].hw = rid++; 1207 } 1208 pc->result[i*4+c].index = i; 1209 } 1210 } 1211 } 1212 1213 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1214 if (pc->param_nr) { 1215 int rid = 0; 1216 1217 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1218 if (!pc->param) 1219 goto out_err; 1220 1221 for (i = 0; i < pc->param_nr; i++) { 1222 for (c = 0; c < 4; c++) { 1223 pc->param[i*4+c].type = P_CONST; 1224 pc->param[i*4+c].hw = rid++; 1225 pc->param[i*4+c].index = i; 1226 } 1227 } 1228 } 1229 1230 if (pc->immd_nr) { 1231 int rid = 0; 1232 1233 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1234 if (!pc->immd) 1235 goto out_err; 1236 1237 for (i = 0; i < pc->immd_nr; i++) { 1238 for (c = 0; c < 4; c++) { 1239 pc->immd[i*4+c].type = P_IMMD; 1240 pc->immd[i*4+c].hw = rid++; 1241 pc->immd[i*4+c].index = i; 1242 } 1243 } 1244 } 1245 1246 ret = TRUE; 1247out_err: 1248 tgsi_parse_free(&p); 1249 return ret; 1250} 1251 1252static boolean 1253nv50_program_tx(struct nv50_program *p) 1254{ 1255 struct tgsi_parse_context parse; 1256 struct nv50_pc *pc; 1257 boolean ret; 1258 1259 pc = CALLOC_STRUCT(nv50_pc); 1260 if (!pc) 1261 return FALSE; 1262 pc->p = p; 1263 pc->p->cfg.high_temp = 4; 1264 1265 ret = nv50_program_tx_prep(pc); 1266 if (ret == FALSE) 1267 goto out_cleanup; 1268 1269 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1270 while (!tgsi_parse_end_of_tokens(&parse)) { 1271 const union tgsi_full_token *tok = &parse.FullToken; 1272 1273 tgsi_parse_token(&parse); 1274 1275 switch (tok->Token.Type) { 1276 case TGSI_TOKEN_TYPE_INSTRUCTION: 1277 ret = nv50_program_tx_insn(pc, tok); 1278 if (ret == FALSE) 1279 goto out_err; 1280 break; 1281 default: 1282 break; 1283 } 1284 } 1285 1286 if (p->type == PIPE_SHADER_FRAGMENT) { 1287 struct nv50_reg out; 1288 1289 out.type = P_TEMP; 1290 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) 1291 emit_mov(pc, &out, &pc->result[out.hw]); 1292 } 1293 1294 p->immd_nr = pc->immd_nr * 4; 1295 p->immd = pc->immd_buf; 1296 1297out_err: 1298 tgsi_parse_free(&parse); 1299 1300out_cleanup: 1301 return ret; 1302} 1303 1304static void 1305nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1306{ 1307 int i; 1308 1309 if (nv50_program_tx(p) == FALSE) 1310 assert(0); 1311 /* *not* sufficient, it's fine if last inst is long and 1312 * NOT immd - otherwise it's fucked fucked fucked */ 1313 p->insns[p->insns_nr - 1] |= 0x00000001; 1314 1315 if (p->type == PIPE_SHADER_VERTEX) { 1316 for (i = 0; i < p->insns_nr; i++) 1317 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1318 } else { 1319 for (i = 0; i < p->insns_nr; i++) 1320 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1321 } 1322 1323 p->translated = TRUE; 1324} 1325 1326static void 1327nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1328{ 1329 int i; 1330 1331 for (i = 0; i < p->immd_nr; i++) { 1332 BEGIN_RING(tesla, 0x0f00, 2); 1333 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 1334 OUT_RING (fui(p->immd[i])); 1335 } 1336} 1337 1338static void 1339nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1340{ 1341 struct pipe_winsys *ws = nv50->pipe.winsys; 1342 void *map; 1343 1344 if (!p->buffer) 1345 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1346 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1347 memcpy(map, p->insns, p->insns_nr * 4); 1348 ws->buffer_unmap(ws, p->buffer); 1349} 1350 1351void 1352nv50_vertprog_validate(struct nv50_context *nv50) 1353{ 1354 struct nouveau_grobj *tesla = nv50->screen->tesla; 1355 struct nv50_program *p = nv50->vertprog; 1356 struct nouveau_stateobj *so; 1357 1358 if (!p->translated) { 1359 nv50_program_validate(nv50, p); 1360 if (!p->translated) 1361 assert(0); 1362 } 1363 1364 nv50_program_validate_data(nv50, p); 1365 nv50_program_validate_code(nv50, p); 1366 1367 so = so_new(11, 2); 1368 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1369 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1370 NOUVEAU_BO_HIGH, 0, 0); 1371 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1372 NOUVEAU_BO_LOW, 0, 0); 1373 so_method(so, tesla, 0x1650, 2); 1374 so_data (so, p->cfg.vp.attr[0]); 1375 so_data (so, p->cfg.vp.attr[1]); 1376 so_method(so, tesla, 0x16ac, 2); 1377 so_data (so, 8); 1378 so_data (so, p->cfg.high_temp); 1379 so_method(so, tesla, 0x140c, 1); 1380 so_data (so, 0); /* program start offset */ 1381 so_emit(nv50->screen->nvws, so); 1382 so_ref(NULL, &so); 1383} 1384 1385void 1386nv50_fragprog_validate(struct nv50_context *nv50) 1387{ 1388 struct nouveau_grobj *tesla = nv50->screen->tesla; 1389 struct nv50_program *p = nv50->fragprog; 1390 struct nouveau_stateobj *so; 1391 1392 if (!p->translated) { 1393 nv50_program_validate(nv50, p); 1394 if (!p->translated) 1395 assert(0); 1396 } 1397 1398 nv50_program_validate_data(nv50, p); 1399 nv50_program_validate_code(nv50, p); 1400 1401 so = so_new(7, 2); 1402 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1403 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1404 NOUVEAU_BO_HIGH, 0, 0); 1405 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1406 NOUVEAU_BO_LOW, 0, 0); 1407 so_method(so, tesla, 0x198c, 1); 1408 so_data (so, p->cfg.high_temp); 1409 so_method(so, tesla, 0x1414, 1); 1410 so_data (so, 0); /* program start offset */ 1411 so_emit(nv50->screen->nvws, so); 1412 so_ref(NULL, &so); 1413} 1414 1415void 1416nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1417{ 1418 struct pipe_winsys *ws = nv50->pipe.winsys; 1419 1420 if (p->insns_nr) { 1421 if (p->insns) 1422 FREE(p->insns); 1423 p->insns_nr = 0; 1424 } 1425 1426 if (p->buffer) 1427 pipe_buffer_reference(ws, &p->buffer, NULL); 1428 1429 p->translated = 0; 1430} 1431 1432