nv50_program.c revision 21e688e0a3faeef18b07c4d860bd71cc6e3ddf4a
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL - gallium craps itself on progs/vp/arl.txt 16 * 17 * MSB - Like MAD, but MUL+SUB 18 * - Fuck it off, introduce a way to negate args for ops that 19 * support it. 20 * 21 * Look into inlining IMMD for ops other than MOV (make it general?) 22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 24 * 25 * Verify half-insns work where expected - and force disable them where they 26 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 27 * 28 * FUCK! watch dst==src vectors, can overwrite components that are needed. 29 * ie. SUB R0, R0.yzxw, R0 30 * 31 * MOV dst, -src 32 * "delta" tmp, -src (0xa0000204,0xe4004780 - delta r0, -r0) 33 * mov dst, tmp 34 * 35 * Things to check with renouveau: 36 * FP attr/result assignment - how? 37 * attrib 38 * - 0x16bc maps vp output onto fp hpos 39 * - 0x16c0 maps vp output onto fp col0 40 * result 41 * - colr always 0-3 42 * - depr always 4 43 * 0x16bc->0x16e8 --> some binding between vp/fp regs 44 * 0x16b8 --> VP output count 45 * 46 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 47 * "MOV rcol.x, fcol.y" = 0x00000004 48 * 0x19a8 --> as above but 0x00000100 and 0x00000000 49 * - 0x00100000 used when KIL used 50 * 0x196c --> as above but 0x00000011 and 0x00000000 51 * 52 * 0x1988 --> 0xXXNNNNNN 53 * - XX == FP high something 54 */ 55struct nv50_reg { 56 enum { 57 P_TEMP, 58 P_ATTR, 59 P_RESULT, 60 P_CONST, 61 P_IMMD 62 } type; 63 int index; 64 65 int hw; 66 int neg; 67}; 68 69struct nv50_pc { 70 struct nv50_program *p; 71 72 /* hw resources */ 73 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 74 75 /* tgsi resources */ 76 struct nv50_reg *temp; 77 int temp_nr; 78 struct nv50_reg *attr; 79 int attr_nr; 80 struct nv50_reg *result; 81 int result_nr; 82 struct nv50_reg *param; 83 int param_nr; 84 struct nv50_reg *immd; 85 float *immd_buf; 86 int immd_nr; 87 88 struct nv50_reg *temp_temp[16]; 89 unsigned temp_temp_nr; 90}; 91 92static void 93alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 94{ 95 int i; 96 97 if (reg->type != P_TEMP) 98 return; 99 100 if (reg->hw >= 0) { 101 /*XXX: do this here too to catch FP temp-as-attr usage.. 102 * not clean, but works */ 103 if (pc->p->cfg.high_temp < (reg->hw + 1)) 104 pc->p->cfg.high_temp = reg->hw + 1; 105 return; 106 } 107 108 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 109 if (!(pc->r_temp[i])) { 110 pc->r_temp[i] = reg; 111 reg->hw = i; 112 if (pc->p->cfg.high_temp < (i + 1)) 113 pc->p->cfg.high_temp = i + 1; 114 return; 115 } 116 } 117 118 assert(0); 119} 120 121static struct nv50_reg * 122alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 123{ 124 struct nv50_reg *r; 125 int i; 126 127 if (dst && dst->type == P_TEMP && dst->hw == -1) 128 return dst; 129 130 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 131 if (!pc->r_temp[i]) { 132 r = CALLOC_STRUCT(nv50_reg); 133 r->type = P_TEMP; 134 r->index = -1; 135 r->hw = i; 136 pc->r_temp[i] = r; 137 return r; 138 } 139 } 140 141 assert(0); 142 return NULL; 143} 144 145static void 146free_temp(struct nv50_pc *pc, struct nv50_reg *r) 147{ 148 if (r->index == -1) { 149 FREE(pc->r_temp[r->hw]); 150 pc->r_temp[r->hw] = NULL; 151 } 152} 153 154static struct nv50_reg * 155temp_temp(struct nv50_pc *pc) 156{ 157 if (pc->temp_temp_nr >= 16) 158 assert(0); 159 160 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 161 return pc->temp_temp[pc->temp_temp_nr++]; 162} 163 164static void 165kill_temp_temp(struct nv50_pc *pc) 166{ 167 int i; 168 169 for (i = 0; i < pc->temp_temp_nr; i++) 170 free_temp(pc, pc->temp_temp[i]); 171 pc->temp_temp_nr = 0; 172} 173 174static int 175ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 176{ 177 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 178 sizeof(float)); 179 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 180 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 181 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 182 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 183 184 return pc->immd_nr++; 185} 186 187static struct nv50_reg * 188alloc_immd(struct nv50_pc *pc, float f) 189{ 190 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 191 unsigned hw; 192 193 hw = ctor_immd(pc, f, 0, 0, 0) * 4; 194 r->type = P_IMMD; 195 r->hw = hw; 196 r->index = -1; 197 return r; 198} 199 200static void 201emit(struct nv50_pc *pc, unsigned *inst) 202{ 203 struct nv50_program *p = pc->p; 204 205 if (inst[0] & 1) { 206 p->insns_nr += 2; 207 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 208 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 209 } else { 210 p->insns_nr += 1; 211 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 212 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 213 } 214} 215 216static INLINE void set_long(struct nv50_pc *, unsigned *); 217 218static boolean 219is_long(unsigned *inst) 220{ 221 if (inst[0] & 1) 222 return TRUE; 223 return FALSE; 224} 225 226static boolean 227is_immd(unsigned *inst) 228{ 229 if (is_long(inst) && (inst[1] & 3) == 3) 230 return TRUE; 231 return FALSE; 232} 233 234static INLINE void 235set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 236{ 237 set_long(pc, inst); 238 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 239 inst[1] |= (pred << 7) | (idx << 12); 240} 241 242static INLINE void 243set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 244{ 245 set_long(pc, inst); 246 inst[1] &= ~((0x3 << 4) | (1 << 6)); 247 inst[1] |= (idx << 4) | (on << 6); 248} 249 250static INLINE void 251set_long(struct nv50_pc *pc, unsigned *inst) 252{ 253 if (is_long(inst)) 254 return; 255 256 inst[0] |= 1; 257 set_pred(pc, 0xf, 0, inst); 258 set_pred_wr(pc, 0, 0, inst); 259} 260 261static INLINE void 262set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 263{ 264 if (dst->type == P_RESULT) { 265 set_long(pc, inst); 266 inst[1] |= 0x00000008; 267 } 268 269 alloc_reg(pc, dst); 270 inst[0] |= (dst->hw << 2); 271} 272 273static INLINE void 274set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 275{ 276 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 277 278 set_long(pc, inst); 279 /*XXX: can't be predicated - bits overlap.. catch cases where both 280 * are required and avoid them. */ 281 set_pred(pc, 0, 0, inst); 282 set_pred_wr(pc, 0, 0, inst); 283 284 inst[1] |= 0x00000002 | 0x00000001; 285 inst[0] |= (val & 0x3f) << 16; 286 inst[1] |= (val >> 6) << 2; 287} 288 289static void 290emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 291 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 292{ 293 unsigned inst[2] = { 0, 0 }; 294 295 inst[0] |= 0x80000000; 296 set_dst(pc, dst, inst); 297 alloc_reg(pc, iv); 298 inst[0] |= (iv->hw << 9); 299 alloc_reg(pc, src); 300 inst[0] |= (src->hw << 16); 301 if (noperspective) 302 inst[0] |= (1 << 25); 303 304 emit(pc, inst); 305} 306 307static void 308set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 309{ 310 set_long(pc, inst); 311 if (src->type == P_IMMD) { 312 inst[1] |= (NV50_CB_PMISC << 22); 313 } else { 314 if (pc->p->type == PIPE_SHADER_VERTEX) 315 inst[1] |= (NV50_CB_PVP << 22); 316 else 317 inst[1] |= (NV50_CB_PFP << 22); 318 } 319} 320 321static void 322emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 323{ 324 unsigned inst[2] = { 0, 0 }; 325 326 inst[0] |= 0x10000000; 327 328 set_dst(pc, dst, inst); 329 330 if (dst->type != P_RESULT && src->type == P_IMMD) { 331 set_immd(pc, src, inst); 332 /*XXX: 32-bit, but steals part of "half" reg space - need to 333 * catch and handle this case if/when we do half-regs 334 */ 335 inst[0] |= 0x00008000; 336 } else 337 if (src->type == P_IMMD || src->type == P_CONST) { 338 set_long(pc, inst); 339 set_cseg(pc, src, inst); 340 inst[0] |= (src->hw << 9); 341 inst[1] |= 0x20000000; /* src0 const? */ 342 } else { 343 if (src->type == P_ATTR) { 344 set_long(pc, inst); 345 inst[1] |= 0x00200000; 346 } 347 348 alloc_reg(pc, src); 349 inst[0] |= (src->hw << 9); 350 } 351 352 /* We really should support "half" instructions here at some point, 353 * but I don't feel confident enough about them yet. 354 */ 355 set_long(pc, inst); 356 if (is_long(inst) && !is_immd(inst)) { 357 inst[1] |= 0x04000000; /* 32-bit */ 358 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 359 } 360 361 emit(pc, inst); 362} 363 364static boolean 365check_swap_src_0_1(struct nv50_pc *pc, 366 struct nv50_reg **s0, struct nv50_reg **s1) 367{ 368 struct nv50_reg *src0 = *s0, *src1 = *s1; 369 370 if (src0->type == P_CONST) { 371 if (src1->type != P_CONST) { 372 *s0 = src1; 373 *s1 = src0; 374 return TRUE; 375 } 376 } else 377 if (src1->type == P_ATTR) { 378 if (src0->type != P_ATTR) { 379 *s0 = src1; 380 *s1 = src0; 381 return TRUE; 382 } 383 } 384 385 return FALSE; 386} 387 388static void 389set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 390{ 391 if (src->type == P_ATTR) { 392 set_long(pc, inst); 393 inst[1] |= 0x00200000; 394 } else 395 if (src->type == P_CONST || src->type == P_IMMD) { 396 struct nv50_reg *temp = temp_temp(pc); 397 398 emit_mov(pc, temp, src); 399 src = temp; 400 } 401 402 alloc_reg(pc, src); 403 inst[0] |= (src->hw << 9); 404} 405 406static void 407set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 408{ 409 if (src->type == P_ATTR) { 410 struct nv50_reg *temp = temp_temp(pc); 411 412 emit_mov(pc, temp, src); 413 src = temp; 414 } else 415 if (src->type == P_CONST || src->type == P_IMMD) { 416 assert(!(inst[0] & 0x00800000)); 417 if (inst[0] & 0x01000000) { 418 struct nv50_reg *temp = temp_temp(pc); 419 420 emit_mov(pc, temp, src); 421 src = temp; 422 } else { 423 set_cseg(pc, src, inst); 424 inst[0] |= 0x00800000; 425 } 426 } 427 428 alloc_reg(pc, src); 429 inst[0] |= (src->hw << 16); 430} 431 432static void 433set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 434{ 435 set_long(pc, inst); 436 437 if (src->type == P_ATTR) { 438 struct nv50_reg *temp = temp_temp(pc); 439 440 emit_mov(pc, temp, src); 441 src = temp; 442 } else 443 if (src->type == P_CONST || src->type == P_IMMD) { 444 assert(!(inst[0] & 0x01000000)); 445 if (inst[0] & 0x00800000) { 446 struct nv50_reg *temp = temp_temp(pc); 447 448 emit_mov(pc, temp, src); 449 src = temp; 450 } else { 451 set_cseg(pc, src, inst); 452 inst[0] |= 0x01000000; 453 } 454 } 455 456 alloc_reg(pc, src); 457 inst[1] |= (src->hw << 14); 458} 459 460static void 461emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 462 struct nv50_reg *src1) 463{ 464 unsigned inst[2] = { 0, 0 }; 465 466 inst[0] |= 0xc0000000; 467 set_long(pc, inst); 468 469 check_swap_src_0_1(pc, &src0, &src1); 470 set_dst(pc, dst, inst); 471 set_src_0(pc, src0, inst); 472 set_src_1(pc, src1, inst); 473 474 emit(pc, inst); 475} 476 477static void 478emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 479 struct nv50_reg *src0, struct nv50_reg *src1) 480{ 481 unsigned inst[2] = { 0, 0 }; 482 483 inst[0] |= 0xb0000000; 484 485 check_swap_src_0_1(pc, &src0, &src1); 486 set_dst(pc, dst, inst); 487 set_src_0(pc, src0, inst); 488 if (is_long(inst)) 489 set_src_2(pc, src1, inst); 490 else 491 set_src_1(pc, src1, inst); 492 493 emit(pc, inst); 494} 495 496static void 497emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 498 struct nv50_reg *src0, struct nv50_reg *src1) 499{ 500 unsigned inst[2] = { 0, 0 }; 501 502 set_long(pc, inst); 503 inst[0] |= 0xb0000000; 504 inst[1] |= (sub << 29); 505 506 check_swap_src_0_1(pc, &src0, &src1); 507 set_dst(pc, dst, inst); 508 set_src_0(pc, src0, inst); 509 set_src_1(pc, src1, inst); 510 511 emit(pc, inst); 512} 513 514static void 515emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 516 struct nv50_reg *src1) 517{ 518 unsigned inst[2] = { 0, 0 }; 519 520 inst[0] |= 0xb0000000; 521 522 set_long(pc, inst); 523 if (check_swap_src_0_1(pc, &src0, &src1)) 524 inst[1] |= 0x04000000; 525 else 526 inst[1] |= 0x08000000; 527 528 set_dst(pc, dst, inst); 529 set_src_0(pc, src0, inst); 530 set_src_2(pc, src1, inst); 531 532 emit(pc, inst); 533} 534 535static void 536emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 537 struct nv50_reg *src1, struct nv50_reg *src2) 538{ 539 unsigned inst[2] = { 0, 0 }; 540 541 inst[0] |= 0xe0000000; 542 543 check_swap_src_0_1(pc, &src0, &src1); 544 set_dst(pc, dst, inst); 545 set_src_0(pc, src0, inst); 546 set_src_1(pc, src1, inst); 547 set_src_2(pc, src2, inst); 548 549 emit(pc, inst); 550} 551 552static void 553emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 554 struct nv50_reg *src1, struct nv50_reg *src2) 555{ 556 unsigned inst[2] = { 0, 0 }; 557 558 inst[0] |= 0xe0000000; 559 set_long(pc, inst); 560 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 561 562 check_swap_src_0_1(pc, &src0, &src1); 563 set_dst(pc, dst, inst); 564 set_src_0(pc, src0, inst); 565 set_src_1(pc, src1, inst); 566 set_src_2(pc, src2, inst); 567 568 emit(pc, inst); 569} 570 571static void 572emit_flop(struct nv50_pc *pc, unsigned sub, 573 struct nv50_reg *dst, struct nv50_reg *src) 574{ 575 unsigned inst[2] = { 0, 0 }; 576 577 inst[0] |= 0x90000000; 578 if (sub) { 579 set_long(pc, inst); 580 inst[1] |= (sub << 29); 581 } 582 583 set_dst(pc, dst, inst); 584 set_src_0(pc, src, inst); 585 586 emit(pc, inst); 587} 588 589static void 590emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 591{ 592 unsigned inst[2] = { 0, 0 }; 593 594 inst[0] |= 0xb0000000; 595 596 set_dst(pc, dst, inst); 597 set_src_0(pc, src, inst); 598 set_long(pc, inst); 599 inst[1] |= (6 << 29) | 0x00004000; 600 601 emit(pc, inst); 602} 603 604static void 605emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 606 struct nv50_reg *src0, struct nv50_reg *src1) 607{ 608 unsigned inst[2] = { 0, 0 }; 609 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; 610 struct nv50_reg *rdst; 611 612 assert(c_op <= 7); 613 if (check_swap_src_0_1(pc, &src0, &src1)) 614 c_op = inv_cop[c_op]; 615 616 rdst = dst; 617 if (dst->type != P_TEMP) 618 dst = alloc_temp(pc, NULL); 619 620 /* set.u32 */ 621 set_long(pc, inst); 622 inst[0] |= 0xb0000000; 623 inst[1] |= (3 << 29); 624 inst[1] |= (c_op << 14); 625 /*XXX: breaks things, .u32 by default? 626 * decuda will disasm as .u16 and use .lo/.hi regs, but this 627 * doesn't seem to match what the hw actually does. 628 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 629 */ 630 set_dst(pc, dst, inst); 631 set_src_0(pc, src0, inst); 632 set_src_1(pc, src1, inst); 633 emit(pc, inst); 634 635 /* cvt.f32.u32 */ 636 inst[0] = 0xa0000001; 637 inst[1] = 0x64014780; 638 set_dst(pc, rdst, inst); 639 set_src_0(pc, dst, inst); 640 emit(pc, inst); 641 642 if (dst != rdst) 643 free_temp(pc, dst); 644} 645 646static void 647emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 648{ 649 unsigned inst[2] = { 0, 0 }; 650 651 inst[0] = 0xa0000000; /* cvt */ 652 set_long(pc, inst); 653 inst[1] |= (6 << 29); /* cvt */ 654 inst[1] |= 0x08000000; /* integer mode */ 655 inst[1] |= 0x04000000; /* 32 bit */ 656 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 657 inst[1] |= (1 << 14); /* src .f32 */ 658 set_dst(pc, dst, inst); 659 set_src_0(pc, src, inst); 660 661 emit(pc, inst); 662} 663 664static void 665emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 666 struct nv50_reg *v, struct nv50_reg *e) 667{ 668 struct nv50_reg *temp = alloc_temp(pc, NULL); 669 670 emit_flop(pc, 3, temp, v); 671 emit_mul(pc, temp, temp, e); 672 emit_preex2(pc, temp, temp); 673 emit_flop(pc, 6, dst, temp); 674 675 free_temp(pc, temp); 676} 677 678static void 679emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 680{ 681 unsigned inst[2] = { 0, 0 }; 682 683 inst[0] = 0xa0000000; /* cvt */ 684 set_long(pc, inst); 685 inst[1] |= (6 << 29); /* cvt */ 686 inst[1] |= 0x04000000; /* 32 bit */ 687 inst[1] |= (1 << 14); /* src .f32 */ 688 inst[1] |= ((1 << 6) << 14); /* .abs */ 689 set_dst(pc, dst, inst); 690 set_src_0(pc, src, inst); 691 692 emit(pc, inst); 693} 694 695static void 696emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) 697{ 698 struct nv50_reg *one = alloc_immd(pc, 1.0); 699 struct nv50_reg *zero = alloc_immd(pc, 0.0); 700 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 701 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 702 struct nv50_reg *tmp[4]; 703 704 emit_mov(pc, dst[0], one); 705 emit_mov(pc, dst[3], one); 706 707 tmp[0] = temp_temp(pc); 708 emit_minmax(pc, 4, dst[1], src[0], zero); 709 set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); 710 711 tmp[1] = temp_temp(pc); 712 emit_minmax(pc, 4, tmp[1], src[1], zero); 713 714 tmp[3] = temp_temp(pc); 715 emit_minmax(pc, 4, tmp[3], src[3], neg128); 716 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 717 718 emit_pow(pc, dst[2], tmp[1], tmp[3]); 719 emit_mov(pc, dst[2], zero); 720 set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); 721} 722 723static void 724emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 725{ 726 unsigned inst[2] = { 0, 0 }; 727 728 set_long(pc, inst); 729 inst[0] |= 0xa0000000; /* delta */ 730 inst[1] |= (7 << 29); /* delta */ 731 inst[1] |= 0x04000000; /* negate arg0? probably not */ 732 inst[1] |= (1 << 14); /* src .f32 */ 733 set_dst(pc, dst, inst); 734 set_src_0(pc, src, inst); 735 736 emit(pc, inst); 737} 738 739static struct nv50_reg * 740tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 741{ 742 switch (dst->DstRegister.File) { 743 case TGSI_FILE_TEMPORARY: 744 return &pc->temp[dst->DstRegister.Index * 4 + c]; 745 case TGSI_FILE_OUTPUT: 746 return &pc->result[dst->DstRegister.Index * 4 + c]; 747 case TGSI_FILE_NULL: 748 return NULL; 749 default: 750 break; 751 } 752 753 return NULL; 754} 755 756static struct nv50_reg * 757tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 758{ 759 struct nv50_reg *r = NULL; 760 struct nv50_reg *temp; 761 unsigned c; 762 763 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 764 switch (c) { 765 case TGSI_EXTSWIZZLE_X: 766 case TGSI_EXTSWIZZLE_Y: 767 case TGSI_EXTSWIZZLE_Z: 768 case TGSI_EXTSWIZZLE_W: 769 switch (src->SrcRegister.File) { 770 case TGSI_FILE_INPUT: 771 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 772 break; 773 case TGSI_FILE_TEMPORARY: 774 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 775 break; 776 case TGSI_FILE_CONSTANT: 777 r = &pc->param[src->SrcRegister.Index * 4 + c]; 778 break; 779 case TGSI_FILE_IMMEDIATE: 780 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 781 break; 782 default: 783 assert(0); 784 break; 785 } 786 break; 787 case TGSI_EXTSWIZZLE_ZERO: 788 r = alloc_immd(pc, 0.0); 789 break; 790 case TGSI_EXTSWIZZLE_ONE: 791 r = alloc_immd(pc, 1.0); 792 break; 793 default: 794 assert(0); 795 break; 796 } 797 798 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 799 case TGSI_UTIL_SIGN_KEEP: 800 break; 801 case TGSI_UTIL_SIGN_CLEAR: 802 temp = temp_temp(pc); 803 emit_abs(pc, temp, r); 804 r = temp; 805 break; 806 case TGSI_UTIL_SIGN_TOGGLE: 807 temp = temp_temp(pc); 808 emit_neg(pc, temp, r); 809 r = temp; 810 break; 811 case TGSI_UTIL_SIGN_SET: 812 temp = temp_temp(pc); 813 emit_abs(pc, temp, r); 814 emit_neg(pc, temp, r); 815 r = temp; 816 break; 817 default: 818 assert(0); 819 break; 820 } 821 822 return r; 823} 824 825static boolean 826nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 827{ 828 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 829 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 830 unsigned mask, sat; 831 int i, c; 832 833 NOUVEAU_ERR("insn %p\n", tok); 834 835 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 836 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 837 838 for (c = 0; c < 4; c++) { 839 if (mask & (1 << c)) 840 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 841 else 842 dst[c] = NULL; 843 } 844 845 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 846 for (c = 0; c < 4; c++) 847 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 848 } 849 850 if (sat) { 851 for (c = 0; c < 4; c++) { 852 rdst[c] = dst[c]; 853 dst[c] = temp_temp(pc); 854 } 855 } 856 857 switch (inst->Instruction.Opcode) { 858 case TGSI_OPCODE_ABS: 859 for (c = 0; c < 4; c++) { 860 if (!(mask & (1 << c))) 861 continue; 862 emit_abs(pc, dst[c], src[0][c]); 863 } 864 break; 865 case TGSI_OPCODE_ADD: 866 for (c = 0; c < 4; c++) { 867 if (!(mask & (1 << c))) 868 continue; 869 emit_add(pc, dst[c], src[0][c], src[1][c]); 870 } 871 break; 872 case TGSI_OPCODE_COS: 873 for (c = 0; c < 4; c++) { 874 if (!(mask & (1 << c))) 875 continue; 876 emit_flop(pc, 5, dst[c], src[0][c]); 877 } 878 break; 879 case TGSI_OPCODE_DP3: 880 temp = alloc_temp(pc, NULL); 881 emit_mul(pc, temp, src[0][0], src[1][0]); 882 emit_mad(pc, temp, src[0][1], src[1][1], temp); 883 emit_mad(pc, temp, src[0][2], src[1][2], temp); 884 for (c = 0; c < 4; c++) { 885 if (!(mask & (1 << c))) 886 continue; 887 emit_mov(pc, dst[c], temp); 888 } 889 free_temp(pc, temp); 890 break; 891 case TGSI_OPCODE_DP4: 892 temp = alloc_temp(pc, NULL); 893 emit_mul(pc, temp, src[0][0], src[1][0]); 894 emit_mad(pc, temp, src[0][1], src[1][1], temp); 895 emit_mad(pc, temp, src[0][2], src[1][2], temp); 896 emit_mad(pc, temp, src[0][3], src[1][3], temp); 897 for (c = 0; c < 4; c++) { 898 if (!(mask & (1 << c))) 899 continue; 900 emit_mov(pc, dst[c], temp); 901 } 902 free_temp(pc, temp); 903 break; 904 case TGSI_OPCODE_DPH: 905 temp = alloc_temp(pc, NULL); 906 emit_mul(pc, temp, src[0][0], src[1][0]); 907 emit_mad(pc, temp, src[0][1], src[1][1], temp); 908 emit_mad(pc, temp, src[0][2], src[1][2], temp); 909 emit_add(pc, temp, src[1][3], temp); 910 for (c = 0; c < 4; c++) { 911 if (!(mask & (1 << c))) 912 continue; 913 emit_mov(pc, dst[c], temp); 914 } 915 free_temp(pc, temp); 916 break; 917 case TGSI_OPCODE_DST: 918 { 919 struct nv50_reg *one = alloc_immd(pc, 1.0); 920 if (mask & (1 << 0)) 921 emit_mov(pc, dst[0], one); 922 if (mask & (1 << 1)) 923 emit_mul(pc, dst[1], src[0][1], src[1][1]); 924 if (mask & (1 << 2)) 925 emit_mov(pc, dst[2], src[0][2]); 926 if (mask & (1 << 3)) 927 emit_mov(pc, dst[3], src[1][3]); 928 FREE(one); 929 } 930 break; 931 case TGSI_OPCODE_EX2: 932 temp = alloc_temp(pc, NULL); 933 for (c = 0; c < 4; c++) { 934 if (!(mask & (1 << c))) 935 continue; 936 emit_preex2(pc, temp, src[0][c]); 937 emit_flop(pc, 6, dst[c], temp); 938 } 939 free_temp(pc, temp); 940 break; 941 case TGSI_OPCODE_FLR: 942 for (c = 0; c < 4; c++) { 943 if (!(mask & (1 << c))) 944 continue; 945 emit_flr(pc, dst[c], src[0][c]); 946 } 947 break; 948 case TGSI_OPCODE_FRC: 949 temp = alloc_temp(pc, NULL); 950 for (c = 0; c < 4; c++) { 951 if (!(mask & (1 << c))) 952 continue; 953 emit_flr(pc, temp, src[0][c]); 954 emit_sub(pc, dst[c], src[0][c], temp); 955 } 956 free_temp(pc, temp); 957 break; 958 case TGSI_OPCODE_LIT: 959 /*XXX: writemask */ 960 emit_lit(pc, &dst[0], &src[0][0]); 961 break; 962 case TGSI_OPCODE_LG2: 963 for (c = 0; c < 4; c++) { 964 if (!(mask & (1 << c))) 965 continue; 966 emit_flop(pc, 3, dst[c], src[0][c]); 967 } 968 break; 969 case TGSI_OPCODE_LRP: 970 for (c = 0; c < 4; c++) { 971 if (!(mask & (1 << c))) 972 continue; 973 /*XXX: we can do better than this */ 974 temp = alloc_temp(pc, NULL); 975 emit_neg(pc, temp, src[0][c]); 976 emit_mad(pc, temp, temp, src[2][c], src[2][c]); 977 emit_mad(pc, dst[c], src[0][c], src[1][c], temp); 978 free_temp(pc, temp); 979 } 980 break; 981 case TGSI_OPCODE_MAD: 982 for (c = 0; c < 4; c++) { 983 if (!(mask & (1 << c))) 984 continue; 985 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 986 } 987 break; 988 case TGSI_OPCODE_MAX: 989 for (c = 0; c < 4; c++) { 990 if (!(mask & (1 << c))) 991 continue; 992 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 993 } 994 break; 995 case TGSI_OPCODE_MIN: 996 for (c = 0; c < 4; c++) { 997 if (!(mask & (1 << c))) 998 continue; 999 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 1000 } 1001 break; 1002 case TGSI_OPCODE_MOV: 1003 for (c = 0; c < 4; c++) { 1004 if (!(mask & (1 << c))) 1005 continue; 1006 emit_mov(pc, dst[c], src[0][c]); 1007 } 1008 break; 1009 case TGSI_OPCODE_MUL: 1010 for (c = 0; c < 4; c++) { 1011 if (!(mask & (1 << c))) 1012 continue; 1013 emit_mul(pc, dst[c], src[0][c], src[1][c]); 1014 } 1015 break; 1016 case TGSI_OPCODE_POW: 1017 temp = alloc_temp(pc, NULL); 1018 emit_pow(pc, temp, src[0][0], src[1][0]); 1019 for (c = 0; c < 4; c++) { 1020 if (!(mask & (1 << c))) 1021 continue; 1022 emit_mov(pc, dst[c], temp); 1023 } 1024 free_temp(pc, temp); 1025 break; 1026 case TGSI_OPCODE_RCP: 1027 for (c = 0; c < 4; c++) { 1028 if (!(mask & (1 << c))) 1029 continue; 1030 emit_flop(pc, 0, dst[c], src[0][c]); 1031 } 1032 break; 1033 case TGSI_OPCODE_RSQ: 1034 for (c = 0; c < 4; c++) { 1035 if (!(mask & (1 << c))) 1036 continue; 1037 emit_flop(pc, 2, dst[c], src[0][c]); 1038 } 1039 break; 1040 case TGSI_OPCODE_SGE: 1041 for (c = 0; c < 4; c++) { 1042 if (!(mask & (1 << c))) 1043 continue; 1044 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 1045 } 1046 break; 1047 case TGSI_OPCODE_SIN: 1048 for (c = 0; c < 4; c++) { 1049 if (!(mask & (1 << c))) 1050 continue; 1051 emit_flop(pc, 4, dst[c], src[0][c]); 1052 } 1053 break; 1054 case TGSI_OPCODE_SLT: 1055 for (c = 0; c < 4; c++) { 1056 if (!(mask & (1 << c))) 1057 continue; 1058 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 1059 } 1060 break; 1061 case TGSI_OPCODE_SUB: 1062 for (c = 0; c < 4; c++) { 1063 if (!(mask & (1 << c))) 1064 continue; 1065 emit_sub(pc, dst[c], src[0][c], src[1][c]); 1066 } 1067 break; 1068 case TGSI_OPCODE_XPD: 1069 temp = alloc_temp(pc, NULL); 1070 if (mask & (1 << 0)) { 1071 emit_mul(pc, temp, src[0][2], src[1][1]); 1072 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 1073 } 1074 if (mask & (1 << 1)) { 1075 emit_mul(pc, temp, src[0][0], src[1][2]); 1076 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 1077 } 1078 if (mask & (1 << 2)) { 1079 emit_mul(pc, temp, src[0][1], src[1][0]); 1080 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 1081 } 1082 free_temp(pc, temp); 1083 break; 1084 case TGSI_OPCODE_END: 1085 break; 1086 default: 1087 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 1088 return FALSE; 1089 } 1090 1091 if (sat) { 1092 for (c = 0; c < 4; c++) { 1093 unsigned inst[2] = { 0, 0 }; 1094 1095 if (!(mask & (1 << c))) 1096 continue; 1097 1098 inst[0] = 0xa0000000; /* cvt */ 1099 set_long(pc, inst); 1100 inst[1] |= (6 << 29); /* cvt */ 1101 inst[1] |= 0x04000000; /* 32 bit */ 1102 inst[1] |= (1 << 14); /* src .f32 */ 1103 inst[1] |= ((1 << 5) << 14); /* .sat */ 1104 set_dst(pc, rdst[c], inst); 1105 set_src_0(pc, dst[c], inst); 1106 emit(pc, inst); 1107 } 1108 } 1109 1110 kill_temp_temp(pc); 1111 return TRUE; 1112} 1113 1114static boolean 1115nv50_program_tx_prep(struct nv50_pc *pc) 1116{ 1117 struct tgsi_parse_context p; 1118 boolean ret = FALSE; 1119 unsigned i, c; 1120 1121 tgsi_parse_init(&p, pc->p->pipe.tokens); 1122 while (!tgsi_parse_end_of_tokens(&p)) { 1123 const union tgsi_full_token *tok = &p.FullToken; 1124 1125 tgsi_parse_token(&p); 1126 switch (tok->Token.Type) { 1127 case TGSI_TOKEN_TYPE_IMMEDIATE: 1128 { 1129 const struct tgsi_full_immediate *imm = 1130 &p.FullToken.FullImmediate; 1131 1132 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1133 imm->u.ImmediateFloat32[1].Float, 1134 imm->u.ImmediateFloat32[2].Float, 1135 imm->u.ImmediateFloat32[3].Float); 1136 } 1137 break; 1138 case TGSI_TOKEN_TYPE_DECLARATION: 1139 { 1140 const struct tgsi_full_declaration *d; 1141 unsigned last; 1142 1143 d = &p.FullToken.FullDeclaration; 1144 last = d->u.DeclarationRange.Last; 1145 1146 switch (d->Declaration.File) { 1147 case TGSI_FILE_TEMPORARY: 1148 if (pc->temp_nr < (last + 1)) 1149 pc->temp_nr = last + 1; 1150 break; 1151 case TGSI_FILE_OUTPUT: 1152 if (pc->result_nr < (last + 1)) 1153 pc->result_nr = last + 1; 1154 break; 1155 case TGSI_FILE_INPUT: 1156 if (pc->attr_nr < (last + 1)) 1157 pc->attr_nr = last + 1; 1158 break; 1159 case TGSI_FILE_CONSTANT: 1160 if (pc->param_nr < (last + 1)) 1161 pc->param_nr = last + 1; 1162 break; 1163 default: 1164 NOUVEAU_ERR("bad decl file %d\n", 1165 d->Declaration.File); 1166 goto out_err; 1167 } 1168 } 1169 break; 1170 case TGSI_TOKEN_TYPE_INSTRUCTION: 1171 break; 1172 default: 1173 break; 1174 } 1175 } 1176 1177 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 1178 if (pc->temp_nr) { 1179 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1180 if (!pc->temp) 1181 goto out_err; 1182 1183 for (i = 0; i < pc->temp_nr; i++) { 1184 for (c = 0; c < 4; c++) { 1185 pc->temp[i*4+c].type = P_TEMP; 1186 pc->temp[i*4+c].hw = -1; 1187 pc->temp[i*4+c].index = i; 1188 } 1189 } 1190 } 1191 1192 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1193 if (pc->attr_nr) { 1194 struct nv50_reg *iv = NULL, *tmp = NULL; 1195 int aid = 0; 1196 1197 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1198 if (!pc->attr) 1199 goto out_err; 1200 1201 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1202 iv = alloc_temp(pc, NULL); 1203 aid++; 1204 } 1205 1206 for (i = 0; i < pc->attr_nr; i++) { 1207 struct nv50_reg *a = &pc->attr[i*4]; 1208 1209 for (c = 0; c < 4; c++) { 1210 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1211 struct nv50_reg *at = 1212 alloc_temp(pc, NULL); 1213 pc->attr[i*4+c].type = at->type; 1214 pc->attr[i*4+c].hw = at->hw; 1215 pc->attr[i*4+c].index = at->index; 1216 } else { 1217 pc->p->cfg.vp.attr[aid/32] |= 1218 (1 << (aid % 32)); 1219 pc->attr[i*4+c].type = P_ATTR; 1220 pc->attr[i*4+c].hw = aid++; 1221 pc->attr[i*4+c].index = i; 1222 } 1223 } 1224 1225 if (pc->p->type != PIPE_SHADER_FRAGMENT) 1226 continue; 1227 1228 emit_interp(pc, iv, iv, iv, FALSE); 1229 tmp = alloc_temp(pc, NULL); 1230 emit_flop(pc, 0, tmp, iv); 1231 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1232 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1233 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1234 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1235 free_temp(pc, tmp); 1236 } 1237 1238 if (iv) 1239 free_temp(pc, iv); 1240 } 1241 1242 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1243 if (pc->result_nr) { 1244 int rid = 0; 1245 1246 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1247 if (!pc->result) 1248 goto out_err; 1249 1250 for (i = 0; i < pc->result_nr; i++) { 1251 for (c = 0; c < 4; c++) { 1252 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1253 pc->result[i*4+c].type = P_TEMP; 1254 pc->result[i*4+c].hw = -1; 1255 } else { 1256 pc->result[i*4+c].type = P_RESULT; 1257 pc->result[i*4+c].hw = rid++; 1258 } 1259 pc->result[i*4+c].index = i; 1260 } 1261 } 1262 } 1263 1264 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1265 if (pc->param_nr) { 1266 int rid = 0; 1267 1268 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1269 if (!pc->param) 1270 goto out_err; 1271 1272 for (i = 0; i < pc->param_nr; i++) { 1273 for (c = 0; c < 4; c++) { 1274 pc->param[i*4+c].type = P_CONST; 1275 pc->param[i*4+c].hw = rid++; 1276 pc->param[i*4+c].index = i; 1277 } 1278 } 1279 } 1280 1281 if (pc->immd_nr) { 1282 int rid = 0; 1283 1284 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1285 if (!pc->immd) 1286 goto out_err; 1287 1288 for (i = 0; i < pc->immd_nr; i++) { 1289 for (c = 0; c < 4; c++) { 1290 pc->immd[i*4+c].type = P_IMMD; 1291 pc->immd[i*4+c].hw = rid++; 1292 pc->immd[i*4+c].index = i; 1293 } 1294 } 1295 } 1296 1297 ret = TRUE; 1298out_err: 1299 tgsi_parse_free(&p); 1300 return ret; 1301} 1302 1303static boolean 1304nv50_program_tx(struct nv50_program *p) 1305{ 1306 struct tgsi_parse_context parse; 1307 struct nv50_pc *pc; 1308 boolean ret; 1309 1310 pc = CALLOC_STRUCT(nv50_pc); 1311 if (!pc) 1312 return FALSE; 1313 pc->p = p; 1314 pc->p->cfg.high_temp = 4; 1315 1316 ret = nv50_program_tx_prep(pc); 1317 if (ret == FALSE) 1318 goto out_cleanup; 1319 1320 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1321 while (!tgsi_parse_end_of_tokens(&parse)) { 1322 const union tgsi_full_token *tok = &parse.FullToken; 1323 1324 tgsi_parse_token(&parse); 1325 1326 switch (tok->Token.Type) { 1327 case TGSI_TOKEN_TYPE_INSTRUCTION: 1328 ret = nv50_program_tx_insn(pc, tok); 1329 if (ret == FALSE) 1330 goto out_err; 1331 break; 1332 default: 1333 break; 1334 } 1335 } 1336 1337 if (p->type == PIPE_SHADER_FRAGMENT) { 1338 struct nv50_reg out; 1339 1340 out.type = P_TEMP; 1341 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) 1342 emit_mov(pc, &out, &pc->result[out.hw]); 1343 } 1344 1345 p->immd_nr = pc->immd_nr * 4; 1346 p->immd = pc->immd_buf; 1347 1348out_err: 1349 tgsi_parse_free(&parse); 1350 1351out_cleanup: 1352 return ret; 1353} 1354 1355static void 1356nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1357{ 1358 int i; 1359 1360 if (nv50_program_tx(p) == FALSE) 1361 assert(0); 1362 /* *not* sufficient, it's fine if last inst is long and 1363 * NOT immd - otherwise it's fucked fucked fucked */ 1364 p->insns[p->insns_nr - 1] |= 0x00000001; 1365 1366 if (p->type == PIPE_SHADER_VERTEX) { 1367 for (i = 0; i < p->insns_nr; i++) 1368 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1369 } else { 1370 for (i = 0; i < p->insns_nr; i++) 1371 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1372 } 1373 1374 p->translated = TRUE; 1375} 1376 1377static void 1378nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1379{ 1380 int i; 1381 1382 for (i = 0; i < p->immd_nr; i++) { 1383 BEGIN_RING(tesla, 0x0f00, 2); 1384 OUT_RING ((NV50_CB_PMISC << 0) | (i << 8)); 1385 OUT_RING (fui(p->immd[i])); 1386 } 1387} 1388 1389static void 1390nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1391{ 1392 struct pipe_winsys *ws = nv50->pipe.winsys; 1393 void *map; 1394 1395 if (!p->buffer) 1396 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1397 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1398 memcpy(map, p->insns, p->insns_nr * 4); 1399 ws->buffer_unmap(ws, p->buffer); 1400} 1401 1402void 1403nv50_vertprog_validate(struct nv50_context *nv50) 1404{ 1405 struct nouveau_grobj *tesla = nv50->screen->tesla; 1406 struct nv50_program *p = nv50->vertprog; 1407 struct nouveau_stateobj *so; 1408 1409 if (!p->translated) { 1410 nv50_program_validate(nv50, p); 1411 if (!p->translated) 1412 assert(0); 1413 } 1414 1415 nv50_program_validate_data(nv50, p); 1416 nv50_program_validate_code(nv50, p); 1417 1418 so = so_new(11, 2); 1419 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1420 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1421 NOUVEAU_BO_HIGH, 0, 0); 1422 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1423 NOUVEAU_BO_LOW, 0, 0); 1424 so_method(so, tesla, 0x1650, 2); 1425 so_data (so, p->cfg.vp.attr[0]); 1426 so_data (so, p->cfg.vp.attr[1]); 1427 so_method(so, tesla, 0x16ac, 2); 1428 so_data (so, 8); 1429 so_data (so, p->cfg.high_temp); 1430 so_method(so, tesla, 0x140c, 1); 1431 so_data (so, 0); /* program start offset */ 1432 so_emit(nv50->screen->nvws, so); 1433 so_ref(NULL, &so); 1434} 1435 1436void 1437nv50_fragprog_validate(struct nv50_context *nv50) 1438{ 1439 struct nouveau_grobj *tesla = nv50->screen->tesla; 1440 struct nv50_program *p = nv50->fragprog; 1441 struct nouveau_stateobj *so; 1442 1443 if (!p->translated) { 1444 nv50_program_validate(nv50, p); 1445 if (!p->translated) 1446 assert(0); 1447 } 1448 1449 nv50_program_validate_data(nv50, p); 1450 nv50_program_validate_code(nv50, p); 1451 1452 so = so_new(7, 2); 1453 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1454 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1455 NOUVEAU_BO_HIGH, 0, 0); 1456 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1457 NOUVEAU_BO_LOW, 0, 0); 1458 so_method(so, tesla, 0x198c, 1); 1459 so_data (so, p->cfg.high_temp); 1460 so_method(so, tesla, 0x1414, 1); 1461 so_data (so, 0); /* program start offset */ 1462 so_emit(nv50->screen->nvws, so); 1463 so_ref(NULL, &so); 1464} 1465 1466void 1467nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1468{ 1469 struct pipe_winsys *ws = nv50->pipe.winsys; 1470 1471 if (p->insns_nr) { 1472 if (p->insns) 1473 FREE(p->insns); 1474 p->insns_nr = 0; 1475 } 1476 1477 if (p->buffer) 1478 pipe_buffer_reference(ws, &p->buffer, NULL); 1479 1480 p->translated = 0; 1481} 1482 1483