nv50_program.c revision b5bbf09c42a9d563984fad875ced5c4814033a3d
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL - gallium craps itself on progs/vp/arl.txt 16 * 17 * MSB - Like MAD, but MUL+SUB 18 * - Fuck it off, introduce a way to negate args for ops that 19 * support it. 20 * 21 * Look into inlining IMMD for ops other than MOV (make it general?) 22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 24 * 25 * Verify half-insns work where expected - and force disable them where they 26 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 27 * 28 * FUCK! watch dst==src vectors, can overwrite components that are needed. 29 * ie. SUB R0, R0.yzxw, R0 30 * 31 * Things to check with renouveau: 32 * SGE/SLT with needed src0/1 swap 33 * FP attr/result assignment - how? 34 * FP/VP constbuf usage 35 */ 36struct nv50_reg { 37 enum { 38 P_TEMP, 39 P_ATTR, 40 P_RESULT, 41 P_CONST, 42 P_IMMD 43 } type; 44 int index; 45 46 int hw; 47 int neg; 48}; 49 50struct nv50_pc { 51 struct nv50_program *p; 52 53 /* hw resources */ 54 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 55 56 /* tgsi resources */ 57 struct nv50_reg *temp; 58 int temp_nr; 59 struct nv50_reg *attr; 60 int attr_nr; 61 struct nv50_reg *result; 62 int result_nr; 63 struct nv50_reg *param; 64 int param_nr; 65 struct nv50_reg *immd; 66 float *immd_buf; 67 int immd_nr; 68 69 struct nv50_reg *temp_temp[8]; 70 unsigned temp_temp_nr; 71}; 72 73static void 74alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 75{ 76 int i; 77 78 if (reg->type != P_TEMP) 79 return; 80 81 if (reg->hw >= 0) { 82 /*XXX: do this here too to catch FP temp-as-attr usage.. 83 * not clean, but works */ 84 if (pc->p->cfg.high_temp < (reg->hw + 1)) 85 pc->p->cfg.high_temp = reg->hw + 1; 86 return; 87 } 88 89 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 90 if (!(pc->r_temp[i])) { 91 pc->r_temp[i] = reg; 92 reg->hw = i; 93 if (pc->p->cfg.high_temp < (i + 1)) 94 pc->p->cfg.high_temp = i + 1; 95 return; 96 } 97 } 98 99 assert(0); 100} 101 102static struct nv50_reg * 103alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 104{ 105 struct nv50_reg *r; 106 int i; 107 108 if (dst && dst->type == P_TEMP && dst->hw == -1) 109 return dst; 110 111 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 112 if (!pc->r_temp[i]) { 113 r = CALLOC_STRUCT(nv50_reg); 114 r->type = P_TEMP; 115 r->index = -1; 116 r->hw = i; 117 pc->r_temp[i] = r; 118 return r; 119 } 120 } 121 122 assert(0); 123 return NULL; 124} 125 126static void 127free_temp(struct nv50_pc *pc, struct nv50_reg *r) 128{ 129 if (r->index == -1) { 130 FREE(pc->r_temp[r->hw]); 131 pc->r_temp[r->hw] = NULL; 132 } 133} 134 135static struct nv50_reg * 136temp_temp(struct nv50_pc *pc) 137{ 138 if (pc->temp_temp_nr >= 8) 139 assert(0); 140 141 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 142 return pc->temp_temp[pc->temp_temp_nr++]; 143} 144 145static void 146kill_temp_temp(struct nv50_pc *pc) 147{ 148 int i; 149 150 for (i = 0; i < pc->temp_temp_nr; i++) 151 free_temp(pc, pc->temp_temp[i]); 152 pc->temp_temp_nr = 0; 153} 154 155static int 156ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 157{ 158 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 159 sizeof(float)); 160 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 161 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 162 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 163 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 164 165 return pc->immd_nr++; 166} 167 168static struct nv50_reg * 169alloc_immd(struct nv50_pc *pc, float f) 170{ 171 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 172 unsigned hw; 173 174 hw = ctor_immd(pc, f, 0, 0, 0) * 4; 175 r->type = P_IMMD; 176 r->hw = hw; 177 r->index = -1; 178 return r; 179} 180 181static void 182emit(struct nv50_pc *pc, unsigned *inst) 183{ 184 struct nv50_program *p = pc->p; 185 186 if (inst[0] & 1) { 187 p->insns_nr += 2; 188 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 189 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 190 } else { 191 p->insns_nr += 1; 192 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 193 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 194 } 195} 196 197static INLINE void set_long(struct nv50_pc *, unsigned *); 198 199static boolean 200is_long(unsigned *inst) 201{ 202 if (inst[0] & 1) 203 return TRUE; 204 return FALSE; 205} 206 207static boolean 208is_immd(unsigned *inst) 209{ 210 if (is_long(inst) && (inst[1] & 3) == 3) 211 return TRUE; 212 return FALSE; 213} 214 215static INLINE void 216set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 217{ 218 set_long(pc, inst); 219 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 220 inst[1] |= (pred << 7) | (idx << 12); 221} 222 223static INLINE void 224set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 225{ 226 set_long(pc, inst); 227 inst[1] &= ~((0x3 << 4) | (1 << 6)); 228 inst[1] |= (idx << 4) | (on << 6); 229} 230 231static INLINE void 232set_long(struct nv50_pc *pc, unsigned *inst) 233{ 234 if (is_long(inst)) 235 return; 236 237 inst[0] |= 1; 238 set_pred(pc, 0xf, 0, inst); 239 set_pred_wr(pc, 0, 0, inst); 240} 241 242static INLINE void 243set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 244{ 245 if (dst->type == P_RESULT) { 246 set_long(pc, inst); 247 inst[1] |= 0x00000008; 248 } 249 250 alloc_reg(pc, dst); 251 inst[0] |= (dst->hw << 2); 252} 253 254static INLINE void 255set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 256{ 257 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 258 259 set_long(pc, inst); 260 /*XXX: can't be predicated - bits overlap.. catch cases where both 261 * are required and avoid them. */ 262 set_pred(pc, 0, 0, inst); 263 set_pred_wr(pc, 0, 0, inst); 264 265 inst[1] |= 0x00000002 | 0x00000001; 266 inst[0] |= (val & 0x3f) << 16; 267 inst[1] |= (val >> 6) << 2; 268} 269 270static void 271emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 272 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 273{ 274 unsigned inst[2] = { 0, 0 }; 275 276 inst[0] |= 0x80000000; 277 set_dst(pc, dst, inst); 278 alloc_reg(pc, iv); 279 inst[0] |= (iv->hw << 9); 280 alloc_reg(pc, src); 281 inst[0] |= (src->hw << 16); 282 if (noperspective) 283 inst[0] |= (1 << 25); 284 285 emit(pc, inst); 286} 287 288static void 289set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 290{ 291 set_long(pc, inst); 292 if (src->type == P_IMMD) { 293 inst[1] |= (NV50_CB_PMISC << 22); 294 } else { 295 if (pc->p->type == PIPE_SHADER_VERTEX) 296 inst[1] |= (NV50_CB_PVP << 22); 297 else 298 inst[1] |= (NV50_CB_PFP << 22); 299 } 300} 301 302static void 303emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 304{ 305 unsigned inst[2] = { 0, 0 }; 306 307 inst[0] |= 0x10000000; 308 309 set_dst(pc, dst, inst); 310 311 if (dst->type != P_RESULT && src->type == P_IMMD) { 312 set_immd(pc, src, inst); 313 /*XXX: 32-bit, but steals part of "half" reg space - need to 314 * catch and handle this case if/when we do half-regs 315 */ 316 inst[0] |= 0x00008000; 317 } else 318 if (src->type == P_IMMD || src->type == P_CONST) { 319 set_long(pc, inst); 320 set_cseg(pc, src, inst); 321 inst[0] |= (src->hw << 9); 322 inst[1] |= 0x20000000; /* src0 const? */ 323 } else { 324 if (src->type == P_ATTR) { 325 set_long(pc, inst); 326 inst[1] |= 0x00200000; 327 } 328 329 alloc_reg(pc, src); 330 inst[0] |= (src->hw << 9); 331 } 332 333 /* We really should support "half" instructions here at some point, 334 * but I don't feel confident enough about them yet. 335 */ 336 set_long(pc, inst); 337 if (is_long(inst) && !is_immd(inst)) { 338 inst[1] |= 0x04000000; /* 32-bit */ 339 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 340 } 341 342 emit(pc, inst); 343} 344 345static boolean 346check_swap_src_0_1(struct nv50_pc *pc, 347 struct nv50_reg **s0, struct nv50_reg **s1) 348{ 349 struct nv50_reg *src0 = *s0, *src1 = *s1; 350 351 if (src0->type == P_CONST) { 352 if (src1->type != P_CONST) { 353 *s0 = src1; 354 *s1 = src0; 355 return TRUE; 356 } 357 } else 358 if (src1->type == P_ATTR) { 359 if (src0->type != P_ATTR) { 360 *s0 = src1; 361 *s1 = src0; 362 return TRUE; 363 } 364 } 365 366 return FALSE; 367} 368 369static void 370set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 371{ 372 if (src->type == P_ATTR) { 373 set_long(pc, inst); 374 inst[1] |= 0x00200000; 375 } else 376 if (src->type == P_CONST || src->type == P_IMMD) { 377 struct nv50_reg *temp = temp_temp(pc); 378 379 emit_mov(pc, temp, src); 380 src = temp; 381 } 382 383 alloc_reg(pc, src); 384 inst[0] |= (src->hw << 9); 385} 386 387static void 388set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 389{ 390 if (src->type == P_ATTR) { 391 struct nv50_reg *temp = temp_temp(pc); 392 393 emit_mov(pc, temp, src); 394 src = temp; 395 } else 396 if (src->type == P_CONST || src->type == P_IMMD) { 397 assert(!(inst[0] & 0x00800000)); 398 if (inst[0] & 0x01000000) { 399 struct nv50_reg *temp = temp_temp(pc); 400 401 emit_mov(pc, temp, src); 402 src = temp; 403 } else { 404 set_cseg(pc, src, inst); 405 inst[0] |= 0x00800000; 406 } 407 } 408 409 alloc_reg(pc, src); 410 inst[0] |= (src->hw << 16); 411} 412 413static void 414set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 415{ 416 set_long(pc, inst); 417 418 if (src->type == P_ATTR) { 419 struct nv50_reg *temp = temp_temp(pc); 420 421 emit_mov(pc, temp, src); 422 src = temp; 423 } else 424 if (src->type == P_CONST || src->type == P_IMMD) { 425 assert(!(inst[0] & 0x01000000)); 426 if (inst[0] & 0x00800000) { 427 struct nv50_reg *temp = temp_temp(pc); 428 429 emit_mov(pc, temp, src); 430 src = temp; 431 } else { 432 set_cseg(pc, src, inst); 433 inst[0] |= 0x01000000; 434 } 435 } 436 437 alloc_reg(pc, src); 438 inst[1] |= (src->hw << 14); 439} 440 441static void 442emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 443 struct nv50_reg *src1) 444{ 445 unsigned inst[2] = { 0, 0 }; 446 447 inst[0] |= 0xc0000000; 448 set_long(pc, inst); 449 450 check_swap_src_0_1(pc, &src0, &src1); 451 set_dst(pc, dst, inst); 452 set_src_0(pc, src0, inst); 453 set_src_1(pc, src1, inst); 454 455 emit(pc, inst); 456} 457 458static void 459emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 460 struct nv50_reg *src0, struct nv50_reg *src1) 461{ 462 unsigned inst[2] = { 0, 0 }; 463 464 inst[0] |= 0xb0000000; 465 466 check_swap_src_0_1(pc, &src0, &src1); 467 set_dst(pc, dst, inst); 468 set_src_0(pc, src0, inst); 469 if (is_long(inst)) 470 set_src_2(pc, src1, inst); 471 else 472 set_src_1(pc, src1, inst); 473 474 emit(pc, inst); 475} 476 477static void 478emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 479 struct nv50_reg *src0, struct nv50_reg *src1) 480{ 481 unsigned inst[2] = { 0, 0 }; 482 483 set_long(pc, inst); 484 inst[0] |= 0xb0000000; 485 inst[1] |= (sub << 29); 486 487 check_swap_src_0_1(pc, &src0, &src1); 488 set_dst(pc, dst, inst); 489 set_src_0(pc, src0, inst); 490 set_src_1(pc, src1, inst); 491 492 emit(pc, inst); 493} 494 495static void 496emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 497 struct nv50_reg *src1) 498{ 499 unsigned inst[2] = { 0, 0 }; 500 501 inst[0] |= 0xb0000000; 502 503 set_long(pc, inst); 504 if (check_swap_src_0_1(pc, &src0, &src1)) 505 inst[1] |= 0x04000000; 506 else 507 inst[1] |= 0x08000000; 508 509 set_dst(pc, dst, inst); 510 set_src_0(pc, src0, inst); 511 set_src_2(pc, src1, inst); 512 513 emit(pc, inst); 514} 515 516static void 517emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 518 struct nv50_reg *src1, struct nv50_reg *src2) 519{ 520 unsigned inst[2] = { 0, 0 }; 521 522 inst[0] |= 0xe0000000; 523 524 check_swap_src_0_1(pc, &src0, &src1); 525 set_dst(pc, dst, inst); 526 set_src_0(pc, src0, inst); 527 set_src_1(pc, src1, inst); 528 set_src_2(pc, src2, inst); 529 530 emit(pc, inst); 531} 532 533static void 534emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 535 struct nv50_reg *src1, struct nv50_reg *src2) 536{ 537 unsigned inst[2] = { 0, 0 }; 538 539 inst[0] |= 0xe0000000; 540 set_long(pc, inst); 541 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 542 543 check_swap_src_0_1(pc, &src0, &src1); 544 set_dst(pc, dst, inst); 545 set_src_0(pc, src0, inst); 546 set_src_1(pc, src1, inst); 547 set_src_2(pc, src2, inst); 548 549 emit(pc, inst); 550} 551 552static void 553emit_flop(struct nv50_pc *pc, unsigned sub, 554 struct nv50_reg *dst, struct nv50_reg *src) 555{ 556 unsigned inst[2] = { 0, 0 }; 557 558 inst[0] |= 0x90000000; 559 if (sub) { 560 set_long(pc, inst); 561 inst[1] |= (sub << 29); 562 } 563 564 set_dst(pc, dst, inst); 565 set_src_0(pc, src, inst); 566 567 emit(pc, inst); 568} 569 570static void 571emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 572{ 573 unsigned inst[2] = { 0, 0 }; 574 575 inst[0] |= 0xb0000000; 576 577 set_dst(pc, dst, inst); 578 set_src_0(pc, src, inst); 579 set_long(pc, inst); 580 inst[1] |= (6 << 29) | 0x00004000; 581 582 emit(pc, inst); 583} 584 585/*XXX: inaccurate results.. why? */ 586#define ALLOW_SET_SWAP 0 587 588static void 589emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 590 struct nv50_reg *src0, struct nv50_reg *src1) 591{ 592 unsigned inst[2] = { 0, 0 }; 593#if ALLOW_SET_SWAP 594 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; 595#endif 596 struct nv50_reg *rdst; 597 598#if ALLOW_SET_SWAP 599 assert(c_op <= 7); 600 if (check_swap_src_0_1(pc, &src0, &src1)) 601 c_op = inv_cop[c_op]; 602#endif 603 604 rdst = dst; 605 if (dst->type != P_TEMP) 606 dst = alloc_temp(pc, NULL); 607 608 /* set.u32 */ 609 set_long(pc, inst); 610 inst[0] |= 0xb0000000; 611 inst[1] |= (3 << 29); 612 inst[1] |= (c_op << 14); 613 /*XXX: breaks things, .u32 by default? 614 * decuda will disasm as .u16 and use .lo/.hi regs, but this 615 * doesn't seem to match what the hw actually does. 616 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 617 */ 618 set_dst(pc, dst, inst); 619 set_src_0(pc, src0, inst); 620 set_src_1(pc, src1, inst); 621 emit(pc, inst); 622 623 /* cvt.f32.u32 */ 624 inst[0] = 0xa0000001; 625 inst[1] = 0x64014780; 626 set_dst(pc, rdst, inst); 627 set_src_0(pc, dst, inst); 628 emit(pc, inst); 629 630 if (dst != rdst) 631 free_temp(pc, dst); 632} 633 634static void 635emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 636{ 637 unsigned inst[2] = { 0, 0 }; 638 639 inst[0] = 0xa0000000; /* cvt */ 640 set_long(pc, inst); 641 inst[1] |= (6 << 29); /* cvt */ 642 inst[1] |= 0x08000000; /* integer mode */ 643 inst[1] |= 0x04000000; /* 32 bit */ 644 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 645 inst[1] |= (1 << 14); /* src .f32 */ 646 set_dst(pc, dst, inst); 647 set_src_0(pc, src, inst); 648 649 emit(pc, inst); 650} 651 652static void 653emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 654 struct nv50_reg *v, struct nv50_reg *e) 655{ 656 struct nv50_reg *temp = alloc_temp(pc, NULL); 657 658 emit_flop(pc, 3, temp, v); 659 emit_mul(pc, temp, temp, e); 660 emit_preex2(pc, temp, temp); 661 emit_flop(pc, 6, dst, temp); 662 663 free_temp(pc, temp); 664} 665 666static void 667emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 668{ 669 unsigned inst[2] = { 0, 0 }; 670 671 inst[0] = 0xa0000000; /* cvt */ 672 set_long(pc, inst); 673 inst[1] |= (6 << 29); /* cvt */ 674 inst[1] |= 0x04000000; /* 32 bit */ 675 inst[1] |= (1 << 14); /* src .f32 */ 676 inst[1] |= ((1 << 6) << 14); /* .abs */ 677 set_dst(pc, dst, inst); 678 set_src_0(pc, src, inst); 679 680 emit(pc, inst); 681} 682 683static void 684emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) 685{ 686 struct nv50_reg *one = alloc_immd(pc, 1.0); 687 struct nv50_reg *zero = alloc_immd(pc, 0.0); 688 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 689 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 690 struct nv50_reg *tmp[4]; 691 692 emit_mov(pc, dst[0], one); 693 emit_mov(pc, dst[3], one); 694 695 tmp[0] = temp_temp(pc); 696 emit_minmax(pc, 4, dst[1], src[0], zero); 697 set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); 698 699 tmp[1] = temp_temp(pc); 700 emit_minmax(pc, 4, tmp[1], src[1], zero); 701 702 tmp[3] = temp_temp(pc); 703 emit_minmax(pc, 4, tmp[3], src[3], neg128); 704 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 705 706 emit_pow(pc, dst[2], tmp[1], tmp[3]); 707 emit_mov(pc, dst[2], zero); 708 set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); 709} 710 711static struct nv50_reg * 712tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 713{ 714 switch (dst->DstRegister.File) { 715 case TGSI_FILE_TEMPORARY: 716 return &pc->temp[dst->DstRegister.Index * 4 + c]; 717 case TGSI_FILE_OUTPUT: 718 return &pc->result[dst->DstRegister.Index * 4 + c]; 719 case TGSI_FILE_NULL: 720 return NULL; 721 default: 722 break; 723 } 724 725 return NULL; 726} 727 728static struct nv50_reg * 729tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 730{ 731 struct nv50_reg *r = NULL; 732 struct nv50_reg *temp; 733 unsigned c; 734 735 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 736 switch (c) { 737 case TGSI_EXTSWIZZLE_X: 738 case TGSI_EXTSWIZZLE_Y: 739 case TGSI_EXTSWIZZLE_Z: 740 case TGSI_EXTSWIZZLE_W: 741 switch (src->SrcRegister.File) { 742 case TGSI_FILE_INPUT: 743 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 744 break; 745 case TGSI_FILE_TEMPORARY: 746 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 747 break; 748 case TGSI_FILE_CONSTANT: 749 r = &pc->param[src->SrcRegister.Index * 4 + c]; 750 break; 751 case TGSI_FILE_IMMEDIATE: 752 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 753 break; 754 default: 755 assert(0); 756 break; 757 } 758 break; 759 case TGSI_EXTSWIZZLE_ZERO: 760 r = alloc_immd(pc, 0.0); 761 break; 762 case TGSI_EXTSWIZZLE_ONE: 763 r = alloc_immd(pc, 1.0); 764 break; 765 default: 766 assert(0); 767 break; 768 } 769 770 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 771 case TGSI_UTIL_SIGN_KEEP: 772 break; 773 case TGSI_UTIL_SIGN_CLEAR: 774 temp = temp_temp(pc); 775 emit_abs(pc, temp, r); 776 r = temp; 777 break; 778 default: 779 assert(0); 780 break; 781 } 782 783 return r; 784} 785 786static boolean 787nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 788{ 789 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 790 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 791 unsigned mask, sat; 792 int i, c; 793 794 NOUVEAU_ERR("insn %p\n", tok); 795 796 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 797 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 798 799 for (c = 0; c < 4; c++) { 800 if (mask & (1 << c)) 801 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 802 else 803 dst[c] = NULL; 804 } 805 806 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 807 for (c = 0; c < 4; c++) 808 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 809 } 810 811 if (sat) { 812 for (c = 0; c < 4; c++) { 813 rdst[c] = dst[c]; 814 dst[c] = temp_temp(pc); 815 } 816 } 817 818 switch (inst->Instruction.Opcode) { 819 case TGSI_OPCODE_ABS: 820 for (c = 0; c < 4; c++) { 821 if (!(mask & (1 << c))) 822 continue; 823 emit_abs(pc, dst[c], src[0][c]); 824 } 825 break; 826 case TGSI_OPCODE_ADD: 827 for (c = 0; c < 4; c++) { 828 if (!(mask & (1 << c))) 829 continue; 830 emit_add(pc, dst[c], src[0][c], src[1][c]); 831 } 832 break; 833 case TGSI_OPCODE_COS: 834 for (c = 0; c < 4; c++) { 835 if (!(mask & (1 << c))) 836 continue; 837 emit_flop(pc, 5, dst[c], src[0][c]); 838 } 839 break; 840 case TGSI_OPCODE_DP3: 841 temp = alloc_temp(pc, NULL); 842 emit_mul(pc, temp, src[0][0], src[1][0]); 843 emit_mad(pc, temp, src[0][1], src[1][1], temp); 844 emit_mad(pc, temp, src[0][2], src[1][2], temp); 845 for (c = 0; c < 4; c++) { 846 if (!(mask & (1 << c))) 847 continue; 848 emit_mov(pc, dst[c], temp); 849 } 850 free_temp(pc, temp); 851 break; 852 case TGSI_OPCODE_DP4: 853 temp = alloc_temp(pc, NULL); 854 emit_mul(pc, temp, src[0][0], src[1][0]); 855 emit_mad(pc, temp, src[0][1], src[1][1], temp); 856 emit_mad(pc, temp, src[0][2], src[1][2], temp); 857 emit_mad(pc, temp, src[0][3], src[1][3], temp); 858 for (c = 0; c < 4; c++) { 859 if (!(mask & (1 << c))) 860 continue; 861 emit_mov(pc, dst[c], temp); 862 } 863 free_temp(pc, temp); 864 break; 865 case TGSI_OPCODE_DPH: 866 temp = alloc_temp(pc, NULL); 867 emit_mul(pc, temp, src[0][0], src[1][0]); 868 emit_mad(pc, temp, src[0][1], src[1][1], temp); 869 emit_mad(pc, temp, src[0][2], src[1][2], temp); 870 emit_add(pc, temp, src[1][3], temp); 871 for (c = 0; c < 4; c++) { 872 if (!(mask & (1 << c))) 873 continue; 874 emit_mov(pc, dst[c], temp); 875 } 876 free_temp(pc, temp); 877 break; 878 case TGSI_OPCODE_DST: 879 { 880 struct nv50_reg *one = alloc_immd(pc, 1.0); 881 if (mask & (1 << 0)) 882 emit_mov(pc, dst[0], one); 883 if (mask & (1 << 1)) 884 emit_mul(pc, dst[1], src[0][1], src[1][1]); 885 if (mask & (1 << 2)) 886 emit_mov(pc, dst[2], src[0][2]); 887 if (mask & (1 << 3)) 888 emit_mov(pc, dst[3], src[1][3]); 889 FREE(one); 890 } 891 break; 892 case TGSI_OPCODE_EX2: 893 temp = alloc_temp(pc, NULL); 894 for (c = 0; c < 4; c++) { 895 if (!(mask & (1 << c))) 896 continue; 897 emit_preex2(pc, temp, src[0][c]); 898 emit_flop(pc, 6, dst[c], temp); 899 } 900 free_temp(pc, temp); 901 break; 902 case TGSI_OPCODE_FLR: 903 for (c = 0; c < 4; c++) { 904 if (!(mask & (1 << c))) 905 continue; 906 emit_flr(pc, dst[c], src[0][c]); 907 } 908 break; 909 case TGSI_OPCODE_FRC: 910 temp = alloc_temp(pc, NULL); 911 for (c = 0; c < 4; c++) { 912 if (!(mask & (1 << c))) 913 continue; 914 emit_flr(pc, temp, src[0][c]); 915 emit_sub(pc, dst[c], src[0][c], temp); 916 } 917 free_temp(pc, temp); 918 break; 919 case TGSI_OPCODE_LIT: 920 /*XXX: writemask */ 921 emit_lit(pc, &dst[0], &src[0][0]); 922 break; 923 case TGSI_OPCODE_LG2: 924 for (c = 0; c < 4; c++) { 925 if (!(mask & (1 << c))) 926 continue; 927 emit_flop(pc, 3, dst[c], src[0][c]); 928 } 929 break; 930 case TGSI_OPCODE_MAD: 931 for (c = 0; c < 4; c++) { 932 if (!(mask & (1 << c))) 933 continue; 934 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 935 } 936 break; 937 case TGSI_OPCODE_MAX: 938 for (c = 0; c < 4; c++) { 939 if (!(mask & (1 << c))) 940 continue; 941 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 942 } 943 break; 944 case TGSI_OPCODE_MIN: 945 for (c = 0; c < 4; c++) { 946 if (!(mask & (1 << c))) 947 continue; 948 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 949 } 950 break; 951 case TGSI_OPCODE_MOV: 952 for (c = 0; c < 4; c++) { 953 if (!(mask & (1 << c))) 954 continue; 955 emit_mov(pc, dst[c], src[0][c]); 956 } 957 break; 958 case TGSI_OPCODE_MUL: 959 for (c = 0; c < 4; c++) { 960 if (!(mask & (1 << c))) 961 continue; 962 emit_mul(pc, dst[c], src[0][c], src[1][c]); 963 } 964 break; 965 case TGSI_OPCODE_POW: 966 temp = alloc_temp(pc, NULL); 967 emit_pow(pc, temp, src[0][0], src[1][0]); 968 for (c = 0; c < 4; c++) { 969 if (!(mask & (1 << c))) 970 continue; 971 emit_mov(pc, dst[c], temp); 972 } 973 free_temp(pc, temp); 974 break; 975 case TGSI_OPCODE_RCP: 976 for (c = 0; c < 4; c++) { 977 if (!(mask & (1 << c))) 978 continue; 979 emit_flop(pc, 0, dst[c], src[0][c]); 980 } 981 break; 982 case TGSI_OPCODE_RSQ: 983 for (c = 0; c < 4; c++) { 984 if (!(mask & (1 << c))) 985 continue; 986 emit_flop(pc, 2, dst[c], src[0][c]); 987 } 988 break; 989 case TGSI_OPCODE_SGE: 990 for (c = 0; c < 4; c++) { 991 if (!(mask & (1 << c))) 992 continue; 993 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 994 } 995 break; 996 case TGSI_OPCODE_SIN: 997 for (c = 0; c < 4; c++) { 998 if (!(mask & (1 << c))) 999 continue; 1000 emit_flop(pc, 4, dst[c], src[0][c]); 1001 } 1002 break; 1003 case TGSI_OPCODE_SLT: 1004 for (c = 0; c < 4; c++) { 1005 if (!(mask & (1 << c))) 1006 continue; 1007 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 1008 } 1009 break; 1010 case TGSI_OPCODE_SUB: 1011 for (c = 0; c < 4; c++) { 1012 if (!(mask & (1 << c))) 1013 continue; 1014 emit_sub(pc, dst[c], src[0][c], src[1][c]); 1015 } 1016 break; 1017 case TGSI_OPCODE_XPD: 1018 temp = alloc_temp(pc, NULL); 1019 if (mask & (1 << 0)) { 1020 emit_mul(pc, temp, src[0][2], src[1][1]); 1021 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 1022 } 1023 if (mask & (1 << 1)) { 1024 emit_mul(pc, temp, src[0][0], src[1][2]); 1025 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 1026 } 1027 if (mask & (1 << 2)) { 1028 emit_mul(pc, temp, src[0][1], src[1][0]); 1029 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 1030 } 1031 free_temp(pc, temp); 1032 break; 1033 case TGSI_OPCODE_END: 1034 break; 1035 default: 1036 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 1037 return FALSE; 1038 } 1039 1040 if (sat) { 1041 for (c = 0; c < 4; c++) { 1042 unsigned inst[2] = { 0, 0 }; 1043 1044 if (!(mask & (1 << c))) 1045 continue; 1046 1047 inst[0] = 0xa0000000; /* cvt */ 1048 set_long(pc, inst); 1049 inst[1] |= (6 << 29); /* cvt */ 1050 inst[1] |= 0x04000000; /* 32 bit */ 1051 inst[1] |= (1 << 14); /* src .f32 */ 1052 inst[1] |= ((1 << 5) << 14); /* .sat */ 1053 set_dst(pc, rdst[c], inst); 1054 set_src_0(pc, dst[c], inst); 1055 emit(pc, inst); 1056 } 1057 } 1058 1059 kill_temp_temp(pc); 1060 return TRUE; 1061} 1062 1063static boolean 1064nv50_program_tx_prep(struct nv50_pc *pc) 1065{ 1066 struct tgsi_parse_context p; 1067 boolean ret = FALSE; 1068 unsigned i, c; 1069 1070 tgsi_parse_init(&p, pc->p->pipe.tokens); 1071 while (!tgsi_parse_end_of_tokens(&p)) { 1072 const union tgsi_full_token *tok = &p.FullToken; 1073 1074 tgsi_parse_token(&p); 1075 switch (tok->Token.Type) { 1076 case TGSI_TOKEN_TYPE_IMMEDIATE: 1077 { 1078 const struct tgsi_full_immediate *imm = 1079 &p.FullToken.FullImmediate; 1080 1081 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1082 imm->u.ImmediateFloat32[1].Float, 1083 imm->u.ImmediateFloat32[2].Float, 1084 imm->u.ImmediateFloat32[3].Float); 1085 } 1086 break; 1087 case TGSI_TOKEN_TYPE_DECLARATION: 1088 { 1089 const struct tgsi_full_declaration *d; 1090 unsigned last; 1091 1092 d = &p.FullToken.FullDeclaration; 1093 last = d->u.DeclarationRange.Last; 1094 1095 switch (d->Declaration.File) { 1096 case TGSI_FILE_TEMPORARY: 1097 if (pc->temp_nr < (last + 1)) 1098 pc->temp_nr = last + 1; 1099 break; 1100 case TGSI_FILE_OUTPUT: 1101 if (pc->result_nr < (last + 1)) 1102 pc->result_nr = last + 1; 1103 break; 1104 case TGSI_FILE_INPUT: 1105 if (pc->attr_nr < (last + 1)) 1106 pc->attr_nr = last + 1; 1107 break; 1108 case TGSI_FILE_CONSTANT: 1109 if (pc->param_nr < (last + 1)) 1110 pc->param_nr = last + 1; 1111 break; 1112 default: 1113 NOUVEAU_ERR("bad decl file %d\n", 1114 d->Declaration.File); 1115 goto out_err; 1116 } 1117 } 1118 break; 1119 case TGSI_TOKEN_TYPE_INSTRUCTION: 1120 break; 1121 default: 1122 break; 1123 } 1124 } 1125 1126 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 1127 if (pc->temp_nr) { 1128 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1129 if (!pc->temp) 1130 goto out_err; 1131 1132 for (i = 0; i < pc->temp_nr; i++) { 1133 for (c = 0; c < 4; c++) { 1134 pc->temp[i*4+c].type = P_TEMP; 1135 pc->temp[i*4+c].hw = -1; 1136 pc->temp[i*4+c].index = i; 1137 } 1138 } 1139 } 1140 1141 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1142 if (pc->attr_nr) { 1143 struct nv50_reg *iv = NULL, *tmp = NULL; 1144 int aid = 0; 1145 1146 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1147 if (!pc->attr) 1148 goto out_err; 1149 1150 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1151 iv = alloc_temp(pc, NULL); 1152 aid++; 1153 } 1154 1155 for (i = 0; i < pc->attr_nr; i++) { 1156 struct nv50_reg *a = &pc->attr[i*4]; 1157 1158 for (c = 0; c < 4; c++) { 1159 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1160 struct nv50_reg *at = 1161 alloc_temp(pc, NULL); 1162 pc->attr[i*4+c].type = at->type; 1163 pc->attr[i*4+c].hw = at->hw; 1164 pc->attr[i*4+c].index = at->index; 1165 } else { 1166 pc->p->cfg.vp.attr[aid/32] |= 1167 (1 << (aid % 32)); 1168 pc->attr[i*4+c].type = P_ATTR; 1169 pc->attr[i*4+c].hw = aid++; 1170 pc->attr[i*4+c].index = i; 1171 } 1172 } 1173 1174 if (pc->p->type != PIPE_SHADER_FRAGMENT) 1175 continue; 1176 1177 emit_interp(pc, iv, iv, iv, FALSE); 1178 tmp = alloc_temp(pc, NULL); 1179 { 1180 unsigned inst[2] = { 0, 0 }; 1181 inst[0] = 0x90000000; 1182 inst[0] |= (tmp->hw << 2); 1183 emit(pc, inst); 1184 } 1185 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1186 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1187 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1188 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1189 free_temp(pc, tmp); 1190 } 1191 1192 if (iv) 1193 free_temp(pc, iv); 1194 } 1195 1196 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1197 if (pc->result_nr) { 1198 int rid = 0; 1199 1200 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1201 if (!pc->result) 1202 goto out_err; 1203 1204 for (i = 0; i < pc->result_nr; i++) { 1205 for (c = 0; c < 4; c++) { 1206 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1207 pc->result[i*4+c].type = P_TEMP; 1208 pc->result[i*4+c].hw = -1; 1209 } else { 1210 pc->result[i*4+c].type = P_RESULT; 1211 pc->result[i*4+c].hw = rid++; 1212 } 1213 pc->result[i*4+c].index = i; 1214 } 1215 } 1216 } 1217 1218 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1219 if (pc->param_nr) { 1220 int rid = 0; 1221 1222 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1223 if (!pc->param) 1224 goto out_err; 1225 1226 for (i = 0; i < pc->param_nr; i++) { 1227 for (c = 0; c < 4; c++) { 1228 pc->param[i*4+c].type = P_CONST; 1229 pc->param[i*4+c].hw = rid++; 1230 pc->param[i*4+c].index = i; 1231 } 1232 } 1233 } 1234 1235 if (pc->immd_nr) { 1236 int rid = 0; 1237 1238 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1239 if (!pc->immd) 1240 goto out_err; 1241 1242 for (i = 0; i < pc->immd_nr; i++) { 1243 for (c = 0; c < 4; c++) { 1244 pc->immd[i*4+c].type = P_IMMD; 1245 pc->immd[i*4+c].hw = rid++; 1246 pc->immd[i*4+c].index = i; 1247 } 1248 } 1249 } 1250 1251 ret = TRUE; 1252out_err: 1253 tgsi_parse_free(&p); 1254 return ret; 1255} 1256 1257static boolean 1258nv50_program_tx(struct nv50_program *p) 1259{ 1260 struct tgsi_parse_context parse; 1261 struct nv50_pc *pc; 1262 boolean ret; 1263 1264 pc = CALLOC_STRUCT(nv50_pc); 1265 if (!pc) 1266 return FALSE; 1267 pc->p = p; 1268 pc->p->cfg.high_temp = 4; 1269 1270 ret = nv50_program_tx_prep(pc); 1271 if (ret == FALSE) 1272 goto out_cleanup; 1273 1274 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1275 while (!tgsi_parse_end_of_tokens(&parse)) { 1276 const union tgsi_full_token *tok = &parse.FullToken; 1277 1278 tgsi_parse_token(&parse); 1279 1280 switch (tok->Token.Type) { 1281 case TGSI_TOKEN_TYPE_INSTRUCTION: 1282 ret = nv50_program_tx_insn(pc, tok); 1283 if (ret == FALSE) 1284 goto out_err; 1285 break; 1286 default: 1287 break; 1288 } 1289 } 1290 1291 if (p->type == PIPE_SHADER_FRAGMENT) { 1292 struct nv50_reg out; 1293 1294 out.type = P_TEMP; 1295 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) 1296 emit_mov(pc, &out, &pc->result[out.hw]); 1297 } 1298 1299 p->immd_nr = pc->immd_nr * 4; 1300 p->immd = pc->immd_buf; 1301 1302out_err: 1303 tgsi_parse_free(&parse); 1304 1305out_cleanup: 1306 return ret; 1307} 1308 1309static void 1310nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1311{ 1312 int i; 1313 1314 if (nv50_program_tx(p) == FALSE) 1315 assert(0); 1316 /* *not* sufficient, it's fine if last inst is long and 1317 * NOT immd - otherwise it's fucked fucked fucked */ 1318 p->insns[p->insns_nr - 1] |= 0x00000001; 1319 1320 if (p->type == PIPE_SHADER_VERTEX) { 1321 for (i = 0; i < p->insns_nr; i++) 1322 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1323 } else { 1324 for (i = 0; i < p->insns_nr; i++) 1325 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1326 } 1327 1328 p->translated = TRUE; 1329} 1330 1331static void 1332nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1333{ 1334 int i; 1335 1336 for (i = 0; i < p->immd_nr; i++) { 1337 BEGIN_RING(tesla, 0x0f00, 2); 1338 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 1339 OUT_RING (fui(p->immd[i])); 1340 } 1341} 1342 1343static void 1344nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1345{ 1346 struct pipe_winsys *ws = nv50->pipe.winsys; 1347 void *map; 1348 1349 if (!p->buffer) 1350 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1351 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1352 memcpy(map, p->insns, p->insns_nr * 4); 1353 ws->buffer_unmap(ws, p->buffer); 1354} 1355 1356void 1357nv50_vertprog_validate(struct nv50_context *nv50) 1358{ 1359 struct nouveau_grobj *tesla = nv50->screen->tesla; 1360 struct nv50_program *p = nv50->vertprog; 1361 struct nouveau_stateobj *so; 1362 1363 if (!p->translated) { 1364 nv50_program_validate(nv50, p); 1365 if (!p->translated) 1366 assert(0); 1367 } 1368 1369 nv50_program_validate_data(nv50, p); 1370 nv50_program_validate_code(nv50, p); 1371 1372 so = so_new(11, 2); 1373 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1374 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1375 NOUVEAU_BO_HIGH, 0, 0); 1376 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1377 NOUVEAU_BO_LOW, 0, 0); 1378 so_method(so, tesla, 0x1650, 2); 1379 so_data (so, p->cfg.vp.attr[0]); 1380 so_data (so, p->cfg.vp.attr[1]); 1381 so_method(so, tesla, 0x16ac, 2); 1382 so_data (so, 8); 1383 so_data (so, p->cfg.high_temp); 1384 so_method(so, tesla, 0x140c, 1); 1385 so_data (so, 0); /* program start offset */ 1386 so_emit(nv50->screen->nvws, so); 1387 so_ref(NULL, &so); 1388} 1389 1390void 1391nv50_fragprog_validate(struct nv50_context *nv50) 1392{ 1393 struct nouveau_grobj *tesla = nv50->screen->tesla; 1394 struct nv50_program *p = nv50->fragprog; 1395 struct nouveau_stateobj *so; 1396 1397 if (!p->translated) { 1398 nv50_program_validate(nv50, p); 1399 if (!p->translated) 1400 assert(0); 1401 } 1402 1403 nv50_program_validate_data(nv50, p); 1404 nv50_program_validate_code(nv50, p); 1405 1406 so = so_new(7, 2); 1407 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1408 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1409 NOUVEAU_BO_HIGH, 0, 0); 1410 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1411 NOUVEAU_BO_LOW, 0, 0); 1412 so_method(so, tesla, 0x198c, 1); 1413 so_data (so, p->cfg.high_temp); 1414 so_method(so, tesla, 0x1414, 1); 1415 so_data (so, 0); /* program start offset */ 1416 so_emit(nv50->screen->nvws, so); 1417 so_ref(NULL, &so); 1418} 1419 1420void 1421nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1422{ 1423 struct pipe_winsys *ws = nv50->pipe.winsys; 1424 1425 if (p->insns_nr) { 1426 if (p->insns) 1427 FREE(p->insns); 1428 p->insns_nr = 0; 1429 } 1430 1431 if (p->buffer) 1432 pipe_buffer_reference(ws, &p->buffer, NULL); 1433 1434 p->translated = 0; 1435} 1436 1437