nv50_program.c revision fda01b584715c05696a0e6768fda669ef1eb5f3b
1/* 2 * Copyright 2008 Ben Skeggs 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23#include "pipe/p_context.h" 24#include "pipe/p_defines.h" 25#include "pipe/p_state.h" 26#include "pipe/p_inlines.h" 27 28#include "pipe/p_shader_tokens.h" 29#include "tgsi/tgsi_parse.h" 30#include "tgsi/tgsi_util.h" 31 32#include "nv50_context.h" 33 34#define NV50_SU_MAX_TEMP 64 35#define NV50_PROGRAM_DUMP 36 37/* ARL - gallium craps itself on progs/vp/arl.txt 38 * 39 * MSB - Like MAD, but MUL+SUB 40 * - Fuck it off, introduce a way to negate args for ops that 41 * support it. 42 * 43 * Look into inlining IMMD for ops other than MOV (make it general?) 44 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 45 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 46 * 47 * In ops such as ADD it's possible to construct a bad opcode in the !is_long() 48 * case, if the emit_src() causes the inst to suddenly become long. 49 * 50 * Verify half-insns work where expected - and force disable them where they 51 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 52 * 53 * FUCK! watch dst==src vectors, can overwrite components that are needed. 54 * ie. SUB R0, R0.yzxw, R0 55 * 56 * Things to check with renouveau: 57 * FP attr/result assignment - how? 58 * attrib 59 * - 0x16bc maps vp output onto fp hpos 60 * - 0x16c0 maps vp output onto fp col0 61 * result 62 * - colr always 0-3 63 * - depr always 4 64 * 0x16bc->0x16e8 --> some binding between vp/fp regs 65 * 0x16b8 --> VP output count 66 * 67 * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 68 * "MOV rcol.x, fcol.y" = 0x00000004 69 * 0x19a8 --> as above but 0x00000100 and 0x00000000 70 * - 0x00100000 used when KIL used 71 * 0x196c --> as above but 0x00000011 and 0x00000000 72 * 73 * 0x1988 --> 0xXXNNNNNN 74 * - XX == FP high something 75 */ 76struct nv50_reg { 77 enum { 78 P_TEMP, 79 P_ATTR, 80 P_RESULT, 81 P_CONST, 82 P_IMMD 83 } type; 84 int index; 85 86 int hw; 87 int neg; 88}; 89 90struct nv50_pc { 91 struct nv50_program *p; 92 93 /* hw resources */ 94 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 95 96 /* tgsi resources */ 97 struct nv50_reg *temp; 98 int temp_nr; 99 struct nv50_reg *attr; 100 int attr_nr; 101 struct nv50_reg *result; 102 int result_nr; 103 struct nv50_reg *param; 104 int param_nr; 105 struct nv50_reg *immd; 106 float *immd_buf; 107 int immd_nr; 108 109 struct nv50_reg *temp_temp[16]; 110 unsigned temp_temp_nr; 111}; 112 113static void 114alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 115{ 116 int i; 117 118 if (reg->type == P_RESULT) { 119 if (pc->p->cfg.high_result < (reg->hw + 1)) 120 pc->p->cfg.high_result = reg->hw + 1; 121 } 122 123 if (reg->type != P_TEMP) 124 return; 125 126 if (reg->hw >= 0) { 127 /*XXX: do this here too to catch FP temp-as-attr usage.. 128 * not clean, but works */ 129 if (pc->p->cfg.high_temp < (reg->hw + 1)) 130 pc->p->cfg.high_temp = reg->hw + 1; 131 return; 132 } 133 134 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 135 if (!(pc->r_temp[i])) { 136 pc->r_temp[i] = reg; 137 reg->hw = i; 138 if (pc->p->cfg.high_temp < (i + 1)) 139 pc->p->cfg.high_temp = i + 1; 140 return; 141 } 142 } 143 144 assert(0); 145} 146 147static struct nv50_reg * 148alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 149{ 150 struct nv50_reg *r; 151 int i; 152 153 if (dst && dst->type == P_TEMP && dst->hw == -1) 154 return dst; 155 156 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 157 if (!pc->r_temp[i]) { 158 r = CALLOC_STRUCT(nv50_reg); 159 r->type = P_TEMP; 160 r->index = -1; 161 r->hw = i; 162 pc->r_temp[i] = r; 163 return r; 164 } 165 } 166 167 assert(0); 168 return NULL; 169} 170 171static void 172free_temp(struct nv50_pc *pc, struct nv50_reg *r) 173{ 174 if (r->index == -1) { 175 unsigned hw = r->hw; 176 177 FREE(pc->r_temp[hw]); 178 pc->r_temp[hw] = NULL; 179 } 180} 181 182static struct nv50_reg * 183temp_temp(struct nv50_pc *pc) 184{ 185 if (pc->temp_temp_nr >= 16) 186 assert(0); 187 188 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 189 return pc->temp_temp[pc->temp_temp_nr++]; 190} 191 192static void 193kill_temp_temp(struct nv50_pc *pc) 194{ 195 int i; 196 197 for (i = 0; i < pc->temp_temp_nr; i++) 198 free_temp(pc, pc->temp_temp[i]); 199 pc->temp_temp_nr = 0; 200} 201 202static int 203ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 204{ 205 pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), 206 (pc->immd_nr + 1) * 4 * sizeof(float)); 207 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 208 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 209 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 210 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 211 212 return pc->immd_nr++; 213} 214 215static struct nv50_reg * 216alloc_immd(struct nv50_pc *pc, float f) 217{ 218 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 219 unsigned hw; 220 221 hw = ctor_immd(pc, f, 0, 0, 0) * 4; 222 r->type = P_IMMD; 223 r->hw = hw; 224 r->index = -1; 225 return r; 226} 227 228static struct nv50_program_exec * 229exec(struct nv50_pc *pc) 230{ 231 struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); 232 233 e->param.index = -1; 234 return e; 235} 236 237static void 238emit(struct nv50_pc *pc, struct nv50_program_exec *e) 239{ 240 struct nv50_program *p = pc->p; 241 242 if (p->exec_tail) 243 p->exec_tail->next = e; 244 if (!p->exec_head) 245 p->exec_head = e; 246 p->exec_tail = e; 247 p->exec_size += (e->inst[0] & 1) ? 2 : 1; 248} 249 250static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); 251 252static boolean 253is_long(struct nv50_program_exec *e) 254{ 255 if (e->inst[0] & 1) 256 return TRUE; 257 return FALSE; 258} 259 260static boolean 261is_immd(struct nv50_program_exec *e) 262{ 263 if (is_long(e) && (e->inst[1] & 3) == 3) 264 return TRUE; 265 return FALSE; 266} 267 268static INLINE void 269set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, 270 struct nv50_program_exec *e) 271{ 272 set_long(pc, e); 273 e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 274 e->inst[1] |= (pred << 7) | (idx << 12); 275} 276 277static INLINE void 278set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, 279 struct nv50_program_exec *e) 280{ 281 set_long(pc, e); 282 e->inst[1] &= ~((0x3 << 4) | (1 << 6)); 283 e->inst[1] |= (idx << 4) | (on << 6); 284} 285 286static INLINE void 287set_long(struct nv50_pc *pc, struct nv50_program_exec *e) 288{ 289 if (is_long(e)) 290 return; 291 292 e->inst[0] |= 1; 293 set_pred(pc, 0xf, 0, e); 294 set_pred_wr(pc, 0, 0, e); 295} 296 297static INLINE void 298set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) 299{ 300 if (dst->type == P_RESULT) { 301 set_long(pc, e); 302 e->inst[1] |= 0x00000008; 303 } 304 305 alloc_reg(pc, dst); 306 e->inst[0] |= (dst->hw << 2); 307} 308 309static INLINE void 310set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) 311{ 312 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 313 314 set_long(pc, e); 315 /*XXX: can't be predicated - bits overlap.. catch cases where both 316 * are required and avoid them. */ 317 set_pred(pc, 0, 0, e); 318 set_pred_wr(pc, 0, 0, e); 319 320 e->inst[1] |= 0x00000002 | 0x00000001; 321 e->inst[0] |= (val & 0x3f) << 16; 322 e->inst[1] |= (val >> 6) << 2; 323} 324 325static void 326emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 327 struct nv50_reg *src, struct nv50_reg *iv) 328{ 329 struct nv50_program_exec *e = exec(pc); 330 331 e->inst[0] |= 0x80000000; 332 set_dst(pc, dst, e); 333 alloc_reg(pc, src); 334 e->inst[0] |= (src->hw << 16); 335 if (iv) { 336 e->inst[0] |= (1 << 25); 337 alloc_reg(pc, iv); 338 e->inst[0] |= (iv->hw << 9); 339 } 340 341 emit(pc, e); 342} 343 344static void 345set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, 346 struct nv50_program_exec *e) 347{ 348 set_long(pc, e); 349#if 1 350 e->inst[1] |= (1 << 22); 351#else 352 if (src->type == P_IMMD) { 353 e->inst[1] |= (NV50_CB_PMISC << 22); 354 } else { 355 if (pc->p->type == PIPE_SHADER_VERTEX) 356 e->inst[1] |= (NV50_CB_PVP << 22); 357 else 358 e->inst[1] |= (NV50_CB_PFP << 22); 359 } 360#endif 361 362 e->param.index = src->hw; 363 e->param.shift = s; 364 e->param.mask = m << (s % 32); 365} 366 367static void 368emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 369{ 370 struct nv50_program_exec *e = exec(pc); 371 372 e->inst[0] |= 0x10000000; 373 374 set_dst(pc, dst, e); 375 376 if (0 && dst->type != P_RESULT && src->type == P_IMMD) { 377 set_immd(pc, src, e); 378 /*XXX: 32-bit, but steals part of "half" reg space - need to 379 * catch and handle this case if/when we do half-regs 380 */ 381 e->inst[0] |= 0x00008000; 382 } else 383 if (src->type == P_IMMD || src->type == P_CONST) { 384 set_long(pc, e); 385 set_data(pc, src, 0x7f, 9, e); 386 e->inst[1] |= 0x20000000; /* src0 const? */ 387 } else { 388 if (src->type == P_ATTR) { 389 set_long(pc, e); 390 e->inst[1] |= 0x00200000; 391 } 392 393 alloc_reg(pc, src); 394 e->inst[0] |= (src->hw << 9); 395 } 396 397 /* We really should support "half" instructions here at some point, 398 * but I don't feel confident enough about them yet. 399 */ 400 set_long(pc, e); 401 if (is_long(e) && !is_immd(e)) { 402 e->inst[1] |= 0x04000000; /* 32-bit */ 403 e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 404 } 405 406 emit(pc, e); 407} 408 409static boolean 410check_swap_src_0_1(struct nv50_pc *pc, 411 struct nv50_reg **s0, struct nv50_reg **s1) 412{ 413 struct nv50_reg *src0 = *s0, *src1 = *s1; 414 415 if (src0->type == P_CONST) { 416 if (src1->type != P_CONST) { 417 *s0 = src1; 418 *s1 = src0; 419 return TRUE; 420 } 421 } else 422 if (src1->type == P_ATTR) { 423 if (src0->type != P_ATTR) { 424 *s0 = src1; 425 *s1 = src0; 426 return TRUE; 427 } 428 } 429 430 return FALSE; 431} 432 433static void 434set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 435{ 436 if (src->type == P_ATTR) { 437 set_long(pc, e); 438 e->inst[1] |= 0x00200000; 439 } else 440 if (src->type == P_CONST || src->type == P_IMMD) { 441 struct nv50_reg *temp = temp_temp(pc); 442 443 emit_mov(pc, temp, src); 444 src = temp; 445 } 446 447 alloc_reg(pc, src); 448 e->inst[0] |= (src->hw << 9); 449} 450 451static void 452set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 453{ 454 if (src->type == P_ATTR) { 455 struct nv50_reg *temp = temp_temp(pc); 456 457 emit_mov(pc, temp, src); 458 src = temp; 459 } else 460 if (src->type == P_CONST || src->type == P_IMMD) { 461 assert(!(e->inst[0] & 0x00800000)); 462 if (e->inst[0] & 0x01000000) { 463 struct nv50_reg *temp = temp_temp(pc); 464 465 emit_mov(pc, temp, src); 466 src = temp; 467 } else { 468 set_data(pc, src, 0x7f, 16, e); 469 e->inst[0] |= 0x00800000; 470 } 471 } 472 473 alloc_reg(pc, src); 474 e->inst[0] |= (src->hw << 16); 475} 476 477static void 478set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) 479{ 480 set_long(pc, e); 481 482 if (src->type == P_ATTR) { 483 struct nv50_reg *temp = temp_temp(pc); 484 485 emit_mov(pc, temp, src); 486 src = temp; 487 } else 488 if (src->type == P_CONST || src->type == P_IMMD) { 489 assert(!(e->inst[0] & 0x01000000)); 490 if (e->inst[0] & 0x00800000) { 491 struct nv50_reg *temp = temp_temp(pc); 492 493 emit_mov(pc, temp, src); 494 src = temp; 495 } else { 496 set_data(pc, src, 0x7f, 32+14, e); 497 e->inst[0] |= 0x01000000; 498 } 499 } 500 501 alloc_reg(pc, src); 502 e->inst[1] |= (src->hw << 14); 503} 504 505static void 506emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 507 struct nv50_reg *src1) 508{ 509 struct nv50_program_exec *e = exec(pc); 510 511 e->inst[0] |= 0xc0000000; 512 set_long(pc, e); 513 514 check_swap_src_0_1(pc, &src0, &src1); 515 set_dst(pc, dst, e); 516 set_src_0(pc, src0, e); 517 set_src_1(pc, src1, e); 518 519 emit(pc, e); 520} 521 522static void 523emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 524 struct nv50_reg *src0, struct nv50_reg *src1) 525{ 526 struct nv50_program_exec *e = exec(pc); 527 528 e->inst[0] |= 0xb0000000; 529 530 check_swap_src_0_1(pc, &src0, &src1); 531 set_dst(pc, dst, e); 532 set_src_0(pc, src0, e); 533 if (is_long(e)) 534 set_src_2(pc, src1, e); 535 else 536 set_src_1(pc, src1, e); 537 538 emit(pc, e); 539} 540 541static void 542emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 543 struct nv50_reg *src0, struct nv50_reg *src1) 544{ 545 struct nv50_program_exec *e = exec(pc); 546 547 set_long(pc, e); 548 e->inst[0] |= 0xb0000000; 549 e->inst[1] |= (sub << 29); 550 551 check_swap_src_0_1(pc, &src0, &src1); 552 set_dst(pc, dst, e); 553 set_src_0(pc, src0, e); 554 set_src_1(pc, src1, e); 555 556 emit(pc, e); 557} 558 559static void 560emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 561 struct nv50_reg *src1) 562{ 563 struct nv50_program_exec *e = exec(pc); 564 565 e->inst[0] |= 0xb0000000; 566 567 set_long(pc, e); 568 if (check_swap_src_0_1(pc, &src0, &src1)) 569 e->inst[1] |= 0x04000000; 570 else 571 e->inst[1] |= 0x08000000; 572 573 set_dst(pc, dst, e); 574 set_src_0(pc, src0, e); 575 set_src_2(pc, src1, e); 576 577 emit(pc, e); 578} 579 580static void 581emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 582 struct nv50_reg *src1, struct nv50_reg *src2) 583{ 584 struct nv50_program_exec *e = exec(pc); 585 586 e->inst[0] |= 0xe0000000; 587 588 check_swap_src_0_1(pc, &src0, &src1); 589 set_dst(pc, dst, e); 590 set_src_0(pc, src0, e); 591 set_src_1(pc, src1, e); 592 set_src_2(pc, src2, e); 593 594 emit(pc, e); 595} 596 597static void 598emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 599 struct nv50_reg *src1, struct nv50_reg *src2) 600{ 601 struct nv50_program_exec *e = exec(pc); 602 603 e->inst[0] |= 0xe0000000; 604 set_long(pc, e); 605 e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 606 607 check_swap_src_0_1(pc, &src0, &src1); 608 set_dst(pc, dst, e); 609 set_src_0(pc, src0, e); 610 set_src_1(pc, src1, e); 611 set_src_2(pc, src2, e); 612 613 emit(pc, e); 614} 615 616static void 617emit_flop(struct nv50_pc *pc, unsigned sub, 618 struct nv50_reg *dst, struct nv50_reg *src) 619{ 620 struct nv50_program_exec *e = exec(pc); 621 622 e->inst[0] |= 0x90000000; 623 if (sub) { 624 set_long(pc, e); 625 e->inst[1] |= (sub << 29); 626 } 627 628 set_dst(pc, dst, e); 629 set_src_0(pc, src, e); 630 631 emit(pc, e); 632} 633 634static void 635emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 636{ 637 struct nv50_program_exec *e = exec(pc); 638 639 e->inst[0] |= 0xb0000000; 640 641 set_dst(pc, dst, e); 642 set_src_0(pc, src, e); 643 set_long(pc, e); 644 e->inst[1] |= (6 << 29) | 0x00004000; 645 646 emit(pc, e); 647} 648 649static void 650emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 651{ 652 struct nv50_program_exec *e = exec(pc); 653 654 e->inst[0] |= 0xb0000000; 655 656 set_dst(pc, dst, e); 657 set_src_0(pc, src, e); 658 set_long(pc, e); 659 e->inst[1] |= (6 << 29); 660 661 emit(pc, e); 662} 663 664static void 665emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 666 struct nv50_reg *src0, struct nv50_reg *src1) 667{ 668 struct nv50_program_exec *e = exec(pc); 669 unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; 670 struct nv50_reg *rdst; 671 672 assert(c_op <= 7); 673 if (check_swap_src_0_1(pc, &src0, &src1)) 674 c_op = inv_cop[c_op]; 675 676 rdst = dst; 677 if (dst->type != P_TEMP) 678 dst = alloc_temp(pc, NULL); 679 680 /* set.u32 */ 681 set_long(pc, e); 682 e->inst[0] |= 0xb0000000; 683 e->inst[1] |= (3 << 29); 684 e->inst[1] |= (c_op << 14); 685 /*XXX: breaks things, .u32 by default? 686 * decuda will disasm as .u16 and use .lo/.hi regs, but this 687 * doesn't seem to match what the hw actually does. 688 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 689 */ 690 set_dst(pc, dst, e); 691 set_src_0(pc, src0, e); 692 set_src_1(pc, src1, e); 693 emit(pc, e); 694 695 /* cvt.f32.u32 */ 696 e = exec(pc); 697 e->inst[0] = 0xa0000001; 698 e->inst[1] = 0x64014780; 699 set_dst(pc, rdst, e); 700 set_src_0(pc, dst, e); 701 emit(pc, e); 702 703 if (dst != rdst) 704 free_temp(pc, dst); 705} 706 707static void 708emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 709{ 710 struct nv50_program_exec *e = exec(pc); 711 712 e->inst[0] = 0xa0000000; /* cvt */ 713 set_long(pc, e); 714 e->inst[1] |= (6 << 29); /* cvt */ 715 e->inst[1] |= 0x08000000; /* integer mode */ 716 e->inst[1] |= 0x04000000; /* 32 bit */ 717 e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 718 e->inst[1] |= (1 << 14); /* src .f32 */ 719 set_dst(pc, dst, e); 720 set_src_0(pc, src, e); 721 722 emit(pc, e); 723} 724 725static void 726emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 727 struct nv50_reg *v, struct nv50_reg *e) 728{ 729 struct nv50_reg *temp = alloc_temp(pc, NULL); 730 731 emit_flop(pc, 3, temp, v); 732 emit_mul(pc, temp, temp, e); 733 emit_preex2(pc, temp, temp); 734 emit_flop(pc, 6, dst, temp); 735 736 free_temp(pc, temp); 737} 738 739static void 740emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 741{ 742 struct nv50_program_exec *e = exec(pc); 743 744 e->inst[0] = 0xa0000000; /* cvt */ 745 set_long(pc, e); 746 e->inst[1] |= (6 << 29); /* cvt */ 747 e->inst[1] |= 0x04000000; /* 32 bit */ 748 e->inst[1] |= (1 << 14); /* src .f32 */ 749 e->inst[1] |= ((1 << 6) << 14); /* .abs */ 750 set_dst(pc, dst, e); 751 set_src_0(pc, src, e); 752 753 emit(pc, e); 754} 755 756static void 757emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, 758 struct nv50_reg **src) 759{ 760 struct nv50_reg *one = alloc_immd(pc, 1.0); 761 struct nv50_reg *zero = alloc_immd(pc, 0.0); 762 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 763 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 764 struct nv50_reg *tmp[4]; 765 766 if (mask & (1 << 0)) 767 emit_mov(pc, dst[0], one); 768 769 if (mask & (1 << 3)) 770 emit_mov(pc, dst[3], one); 771 772 if (mask & (3 << 1)) { 773 if (mask & (1 << 1)) 774 tmp[0] = dst[1]; 775 else 776 tmp[0] = temp_temp(pc); 777 emit_minmax(pc, 4, tmp[0], src[0], zero); 778 } 779 780 if (mask & (1 << 2)) { 781 set_pred_wr(pc, 1, 0, pc->p->exec_tail); 782 783 tmp[1] = temp_temp(pc); 784 emit_minmax(pc, 4, tmp[1], src[1], zero); 785 786 tmp[3] = temp_temp(pc); 787 emit_minmax(pc, 4, tmp[3], src[3], neg128); 788 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 789 790 emit_pow(pc, dst[2], tmp[1], tmp[3]); 791 emit_mov(pc, dst[2], zero); 792 set_pred(pc, 3, 0, pc->p->exec_tail); 793 } 794} 795 796static void 797emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 798{ 799 struct nv50_program_exec *e = exec(pc); 800 801 set_long(pc, e); 802 e->inst[0] |= 0xa0000000; /* delta */ 803 e->inst[1] |= (7 << 29); /* delta */ 804 e->inst[1] |= 0x04000000; /* negate arg0? probably not */ 805 e->inst[1] |= (1 << 14); /* src .f32 */ 806 set_dst(pc, dst, e); 807 set_src_0(pc, src, e); 808 809 emit(pc, e); 810} 811 812static struct nv50_reg * 813tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 814{ 815 switch (dst->DstRegister.File) { 816 case TGSI_FILE_TEMPORARY: 817 return &pc->temp[dst->DstRegister.Index * 4 + c]; 818 case TGSI_FILE_OUTPUT: 819 return &pc->result[dst->DstRegister.Index * 4 + c]; 820 case TGSI_FILE_NULL: 821 return NULL; 822 default: 823 break; 824 } 825 826 return NULL; 827} 828 829static struct nv50_reg * 830tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 831{ 832 struct nv50_reg *r = NULL; 833 struct nv50_reg *temp; 834 unsigned c; 835 836 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 837 switch (c) { 838 case TGSI_EXTSWIZZLE_X: 839 case TGSI_EXTSWIZZLE_Y: 840 case TGSI_EXTSWIZZLE_Z: 841 case TGSI_EXTSWIZZLE_W: 842 switch (src->SrcRegister.File) { 843 case TGSI_FILE_INPUT: 844 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 845 break; 846 case TGSI_FILE_TEMPORARY: 847 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 848 break; 849 case TGSI_FILE_CONSTANT: 850 r = &pc->param[src->SrcRegister.Index * 4 + c]; 851 break; 852 case TGSI_FILE_IMMEDIATE: 853 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 854 break; 855 case TGSI_FILE_SAMPLER: 856 break; 857 default: 858 assert(0); 859 break; 860 } 861 break; 862 case TGSI_EXTSWIZZLE_ZERO: 863 r = alloc_immd(pc, 0.0); 864 break; 865 case TGSI_EXTSWIZZLE_ONE: 866 r = alloc_immd(pc, 1.0); 867 break; 868 default: 869 assert(0); 870 break; 871 } 872 873 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 874 case TGSI_UTIL_SIGN_KEEP: 875 break; 876 case TGSI_UTIL_SIGN_CLEAR: 877 temp = temp_temp(pc); 878 emit_abs(pc, temp, r); 879 r = temp; 880 break; 881 case TGSI_UTIL_SIGN_TOGGLE: 882 temp = temp_temp(pc); 883 emit_neg(pc, temp, r); 884 r = temp; 885 break; 886 case TGSI_UTIL_SIGN_SET: 887 temp = temp_temp(pc); 888 emit_abs(pc, temp, r); 889 emit_neg(pc, temp, r); 890 r = temp; 891 break; 892 default: 893 assert(0); 894 break; 895 } 896 897 return r; 898} 899 900static boolean 901nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 902{ 903 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 904 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 905 unsigned mask, sat; 906 int i, c; 907 908 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 909 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 910 911 for (c = 0; c < 4; c++) { 912 if (mask & (1 << c)) 913 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 914 else 915 dst[c] = NULL; 916 } 917 918 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 919 for (c = 0; c < 4; c++) 920 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 921 } 922 923 if (sat) { 924 for (c = 0; c < 4; c++) { 925 rdst[c] = dst[c]; 926 dst[c] = temp_temp(pc); 927 } 928 } 929 930 switch (inst->Instruction.Opcode) { 931 case TGSI_OPCODE_ABS: 932 for (c = 0; c < 4; c++) { 933 if (!(mask & (1 << c))) 934 continue; 935 emit_abs(pc, dst[c], src[0][c]); 936 } 937 break; 938 case TGSI_OPCODE_ADD: 939 for (c = 0; c < 4; c++) { 940 if (!(mask & (1 << c))) 941 continue; 942 emit_add(pc, dst[c], src[0][c], src[1][c]); 943 } 944 break; 945 case TGSI_OPCODE_COS: 946 temp = alloc_temp(pc, NULL); 947 emit_precossin(pc, temp, src[0][0]); 948 emit_flop(pc, 5, temp, temp); 949 for (c = 0; c < 4; c++) { 950 if (!(mask & (1 << c))) 951 continue; 952 emit_mov(pc, dst[c], temp); 953 } 954 break; 955 case TGSI_OPCODE_DP3: 956 temp = alloc_temp(pc, NULL); 957 emit_mul(pc, temp, src[0][0], src[1][0]); 958 emit_mad(pc, temp, src[0][1], src[1][1], temp); 959 emit_mad(pc, temp, src[0][2], src[1][2], temp); 960 for (c = 0; c < 4; c++) { 961 if (!(mask & (1 << c))) 962 continue; 963 emit_mov(pc, dst[c], temp); 964 } 965 free_temp(pc, temp); 966 break; 967 case TGSI_OPCODE_DP4: 968 temp = alloc_temp(pc, NULL); 969 emit_mul(pc, temp, src[0][0], src[1][0]); 970 emit_mad(pc, temp, src[0][1], src[1][1], temp); 971 emit_mad(pc, temp, src[0][2], src[1][2], temp); 972 emit_mad(pc, temp, src[0][3], src[1][3], temp); 973 for (c = 0; c < 4; c++) { 974 if (!(mask & (1 << c))) 975 continue; 976 emit_mov(pc, dst[c], temp); 977 } 978 free_temp(pc, temp); 979 break; 980 case TGSI_OPCODE_DPH: 981 temp = alloc_temp(pc, NULL); 982 emit_mul(pc, temp, src[0][0], src[1][0]); 983 emit_mad(pc, temp, src[0][1], src[1][1], temp); 984 emit_mad(pc, temp, src[0][2], src[1][2], temp); 985 emit_add(pc, temp, src[1][3], temp); 986 for (c = 0; c < 4; c++) { 987 if (!(mask & (1 << c))) 988 continue; 989 emit_mov(pc, dst[c], temp); 990 } 991 free_temp(pc, temp); 992 break; 993 case TGSI_OPCODE_DST: 994 { 995 struct nv50_reg *one = alloc_immd(pc, 1.0); 996 if (mask & (1 << 0)) 997 emit_mov(pc, dst[0], one); 998 if (mask & (1 << 1)) 999 emit_mul(pc, dst[1], src[0][1], src[1][1]); 1000 if (mask & (1 << 2)) 1001 emit_mov(pc, dst[2], src[0][2]); 1002 if (mask & (1 << 3)) 1003 emit_mov(pc, dst[3], src[1][3]); 1004 FREE(one); 1005 } 1006 break; 1007 case TGSI_OPCODE_EX2: 1008 temp = alloc_temp(pc, NULL); 1009 emit_preex2(pc, temp, src[0][0]); 1010 emit_flop(pc, 6, temp, temp); 1011 for (c = 0; c < 4; c++) { 1012 if (!(mask & (1 << c))) 1013 continue; 1014 emit_mov(pc, dst[c], temp); 1015 } 1016 free_temp(pc, temp); 1017 break; 1018 case TGSI_OPCODE_FLR: 1019 for (c = 0; c < 4; c++) { 1020 if (!(mask & (1 << c))) 1021 continue; 1022 emit_flr(pc, dst[c], src[0][c]); 1023 } 1024 break; 1025 case TGSI_OPCODE_FRC: 1026 temp = alloc_temp(pc, NULL); 1027 for (c = 0; c < 4; c++) { 1028 if (!(mask & (1 << c))) 1029 continue; 1030 emit_flr(pc, temp, src[0][c]); 1031 emit_sub(pc, dst[c], src[0][c], temp); 1032 } 1033 free_temp(pc, temp); 1034 break; 1035 case TGSI_OPCODE_LIT: 1036 emit_lit(pc, &dst[0], mask, &src[0][0]); 1037 break; 1038 case TGSI_OPCODE_LG2: 1039 temp = alloc_temp(pc, NULL); 1040 emit_flop(pc, 3, temp, src[0][0]); 1041 for (c = 0; c < 4; c++) { 1042 if (!(mask & (1 << c))) 1043 continue; 1044 emit_mov(pc, dst[c], temp); 1045 } 1046 break; 1047 case TGSI_OPCODE_LRP: 1048 for (c = 0; c < 4; c++) { 1049 if (!(mask & (1 << c))) 1050 continue; 1051 /*XXX: we can do better than this */ 1052 temp = alloc_temp(pc, NULL); 1053 emit_neg(pc, temp, src[0][c]); 1054 emit_mad(pc, temp, temp, src[2][c], src[2][c]); 1055 emit_mad(pc, dst[c], src[0][c], src[1][c], temp); 1056 free_temp(pc, temp); 1057 } 1058 break; 1059 case TGSI_OPCODE_MAD: 1060 for (c = 0; c < 4; c++) { 1061 if (!(mask & (1 << c))) 1062 continue; 1063 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 1064 } 1065 break; 1066 case TGSI_OPCODE_MAX: 1067 for (c = 0; c < 4; c++) { 1068 if (!(mask & (1 << c))) 1069 continue; 1070 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 1071 } 1072 break; 1073 case TGSI_OPCODE_MIN: 1074 for (c = 0; c < 4; c++) { 1075 if (!(mask & (1 << c))) 1076 continue; 1077 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 1078 } 1079 break; 1080 case TGSI_OPCODE_MOV: 1081 for (c = 0; c < 4; c++) { 1082 if (!(mask & (1 << c))) 1083 continue; 1084 emit_mov(pc, dst[c], src[0][c]); 1085 } 1086 break; 1087 case TGSI_OPCODE_MUL: 1088 for (c = 0; c < 4; c++) { 1089 if (!(mask & (1 << c))) 1090 continue; 1091 emit_mul(pc, dst[c], src[0][c], src[1][c]); 1092 } 1093 break; 1094 case TGSI_OPCODE_POW: 1095 temp = alloc_temp(pc, NULL); 1096 emit_pow(pc, temp, src[0][0], src[1][0]); 1097 for (c = 0; c < 4; c++) { 1098 if (!(mask & (1 << c))) 1099 continue; 1100 emit_mov(pc, dst[c], temp); 1101 } 1102 free_temp(pc, temp); 1103 break; 1104 case TGSI_OPCODE_RCP: 1105 for (c = 0; c < 4; c++) { 1106 if (!(mask & (1 << c))) 1107 continue; 1108 emit_flop(pc, 0, dst[c], src[0][0]); 1109 } 1110 break; 1111 case TGSI_OPCODE_RSQ: 1112 for (c = 0; c < 4; c++) { 1113 if (!(mask & (1 << c))) 1114 continue; 1115 emit_flop(pc, 2, dst[c], src[0][0]); 1116 } 1117 break; 1118 case TGSI_OPCODE_SCS: 1119 temp = alloc_temp(pc, NULL); 1120 emit_precossin(pc, temp, src[0][0]); 1121 if (mask & (1 << 0)) 1122 emit_flop(pc, 5, dst[0], temp); 1123 if (mask & (1 << 1)) 1124 emit_flop(pc, 4, dst[1], temp); 1125 break; 1126 case TGSI_OPCODE_SGE: 1127 for (c = 0; c < 4; c++) { 1128 if (!(mask & (1 << c))) 1129 continue; 1130 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 1131 } 1132 break; 1133 case TGSI_OPCODE_SIN: 1134 temp = alloc_temp(pc, NULL); 1135 emit_precossin(pc, temp, src[0][0]); 1136 emit_flop(pc, 4, temp, temp); 1137 for (c = 0; c < 4; c++) { 1138 if (!(mask & (1 << c))) 1139 continue; 1140 emit_mov(pc, dst[c], temp); 1141 } 1142 break; 1143 case TGSI_OPCODE_SLT: 1144 for (c = 0; c < 4; c++) { 1145 if (!(mask & (1 << c))) 1146 continue; 1147 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 1148 } 1149 break; 1150 case TGSI_OPCODE_SUB: 1151 for (c = 0; c < 4; c++) { 1152 if (!(mask & (1 << c))) 1153 continue; 1154 emit_sub(pc, dst[c], src[0][c], src[1][c]); 1155 } 1156 break; 1157 case TGSI_OPCODE_TEX: 1158 { 1159 struct nv50_reg *t0, *t1, *t2, *t3; 1160 struct nv50_program_exec *e; 1161 1162 t0 = alloc_temp(pc, NULL); 1163 t0 = alloc_temp(pc, NULL); 1164 t1 = alloc_temp(pc, NULL); 1165 t2 = alloc_temp(pc, NULL); 1166 t3 = alloc_temp(pc, NULL); 1167 emit_mov(pc, t0, src[0][0]); 1168 emit_mov(pc, t1, src[0][1]); 1169 1170 e = exec(pc); 1171 e->inst[0] = 0xf6400000; 1172 set_long(pc, e); 1173 e->inst[1] |= 0x0000c004; 1174 set_dst(pc, t0, e); 1175 emit(pc, e); 1176 1177 if (mask & (1 << 0)) emit_mov(pc, dst[0], t0); 1178 if (mask & (1 << 1)) emit_mov(pc, dst[1], t1); 1179 if (mask & (1 << 2)) emit_mov(pc, dst[2], t2); 1180 if (mask & (1 << 3)) emit_mov(pc, dst[3], t3); 1181 1182 free_temp(pc, t0); 1183 free_temp(pc, t1); 1184 free_temp(pc, t2); 1185 free_temp(pc, t3); 1186 } 1187 break; 1188 case TGSI_OPCODE_XPD: 1189 temp = alloc_temp(pc, NULL); 1190 if (mask & (1 << 0)) { 1191 emit_mul(pc, temp, src[0][2], src[1][1]); 1192 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 1193 } 1194 if (mask & (1 << 1)) { 1195 emit_mul(pc, temp, src[0][0], src[1][2]); 1196 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 1197 } 1198 if (mask & (1 << 2)) { 1199 emit_mul(pc, temp, src[0][1], src[1][0]); 1200 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 1201 } 1202 free_temp(pc, temp); 1203 break; 1204 case TGSI_OPCODE_END: 1205 break; 1206 default: 1207 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 1208 return FALSE; 1209 } 1210 1211 if (sat) { 1212 for (c = 0; c < 4; c++) { 1213 struct nv50_program_exec *e; 1214 1215 if (!(mask & (1 << c))) 1216 continue; 1217 e = exec(pc); 1218 1219 e->inst[0] = 0xa0000000; /* cvt */ 1220 set_long(pc, e); 1221 e->inst[1] |= (6 << 29); /* cvt */ 1222 e->inst[1] |= 0x04000000; /* 32 bit */ 1223 e->inst[1] |= (1 << 14); /* src .f32 */ 1224 e->inst[1] |= ((1 << 5) << 14); /* .sat */ 1225 set_dst(pc, rdst[c], e); 1226 set_src_0(pc, dst[c], e); 1227 emit(pc, e); 1228 } 1229 } 1230 1231 kill_temp_temp(pc); 1232 return TRUE; 1233} 1234 1235static boolean 1236nv50_program_tx_prep(struct nv50_pc *pc) 1237{ 1238 struct tgsi_parse_context p; 1239 boolean ret = FALSE; 1240 unsigned i, c; 1241 1242 tgsi_parse_init(&p, pc->p->pipe.tokens); 1243 while (!tgsi_parse_end_of_tokens(&p)) { 1244 const union tgsi_full_token *tok = &p.FullToken; 1245 1246 tgsi_parse_token(&p); 1247 switch (tok->Token.Type) { 1248 case TGSI_TOKEN_TYPE_IMMEDIATE: 1249 { 1250 const struct tgsi_full_immediate *imm = 1251 &p.FullToken.FullImmediate; 1252 1253 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1254 imm->u.ImmediateFloat32[1].Float, 1255 imm->u.ImmediateFloat32[2].Float, 1256 imm->u.ImmediateFloat32[3].Float); 1257 } 1258 break; 1259 case TGSI_TOKEN_TYPE_DECLARATION: 1260 { 1261 const struct tgsi_full_declaration *d; 1262 unsigned last; 1263 1264 d = &p.FullToken.FullDeclaration; 1265 last = d->DeclarationRange.Last; 1266 1267 switch (d->Declaration.File) { 1268 case TGSI_FILE_TEMPORARY: 1269 if (pc->temp_nr < (last + 1)) 1270 pc->temp_nr = last + 1; 1271 break; 1272 case TGSI_FILE_OUTPUT: 1273 if (pc->result_nr < (last + 1)) 1274 pc->result_nr = last + 1; 1275 break; 1276 case TGSI_FILE_INPUT: 1277 if (pc->attr_nr < (last + 1)) 1278 pc->attr_nr = last + 1; 1279 break; 1280 case TGSI_FILE_CONSTANT: 1281 if (pc->param_nr < (last + 1)) 1282 pc->param_nr = last + 1; 1283 break; 1284 case TGSI_FILE_SAMPLER: 1285 break; 1286 default: 1287 NOUVEAU_ERR("bad decl file %d\n", 1288 d->Declaration.File); 1289 goto out_err; 1290 } 1291 } 1292 break; 1293 case TGSI_TOKEN_TYPE_INSTRUCTION: 1294 break; 1295 default: 1296 break; 1297 } 1298 } 1299 1300 if (pc->temp_nr) { 1301 pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1302 if (!pc->temp) 1303 goto out_err; 1304 1305 for (i = 0; i < pc->temp_nr; i++) { 1306 for (c = 0; c < 4; c++) { 1307 pc->temp[i*4+c].type = P_TEMP; 1308 pc->temp[i*4+c].hw = -1; 1309 pc->temp[i*4+c].index = i; 1310 } 1311 } 1312 } 1313 1314 if (pc->attr_nr) { 1315 struct nv50_reg *iv = NULL; 1316 int aid = 0; 1317 1318 pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1319 if (!pc->attr) 1320 goto out_err; 1321 1322 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1323 iv = alloc_temp(pc, NULL); 1324 emit_interp(pc, iv, iv, NULL); 1325 emit_flop(pc, 0, iv, iv); 1326 aid++; 1327 } 1328 1329 for (i = 0; i < pc->attr_nr; i++) { 1330 struct nv50_reg *a = &pc->attr[i*4]; 1331 1332 for (c = 0; c < 4; c++) { 1333 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1334 struct nv50_reg *at = 1335 alloc_temp(pc, NULL); 1336 pc->attr[i*4+c].type = at->type; 1337 pc->attr[i*4+c].hw = at->hw; 1338 pc->attr[i*4+c].index = at->index; 1339 } else { 1340 pc->p->cfg.vp.attr[aid/32] |= 1341 (1 << (aid % 32)); 1342 pc->attr[i*4+c].type = P_ATTR; 1343 pc->attr[i*4+c].hw = aid++; 1344 pc->attr[i*4+c].index = i; 1345 } 1346 } 1347 1348 if (pc->p->type != PIPE_SHADER_FRAGMENT) 1349 continue; 1350 1351 emit_interp(pc, &a[0], &a[0], iv); 1352 emit_interp(pc, &a[1], &a[1], iv); 1353 emit_interp(pc, &a[2], &a[2], iv); 1354 emit_interp(pc, &a[3], &a[3], iv); 1355 } 1356 1357 if (iv) 1358 free_temp(pc, iv); 1359 } 1360 1361 if (pc->result_nr) { 1362 int rid = 0; 1363 1364 pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); 1365 if (!pc->result) 1366 goto out_err; 1367 1368 for (i = 0; i < pc->result_nr; i++) { 1369 for (c = 0; c < 4; c++) { 1370 if (pc->p->type == PIPE_SHADER_FRAGMENT) { 1371 pc->result[i*4+c].type = P_TEMP; 1372 pc->result[i*4+c].hw = -1; 1373 } else { 1374 pc->result[i*4+c].type = P_RESULT; 1375 pc->result[i*4+c].hw = rid++; 1376 } 1377 pc->result[i*4+c].index = i; 1378 } 1379 } 1380 } 1381 1382 if (pc->param_nr) { 1383 int rid = 0; 1384 1385 pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); 1386 if (!pc->param) 1387 goto out_err; 1388 1389 for (i = 0; i < pc->param_nr; i++) { 1390 for (c = 0; c < 4; c++) { 1391 pc->param[i*4+c].type = P_CONST; 1392 pc->param[i*4+c].hw = rid++; 1393 pc->param[i*4+c].index = i; 1394 } 1395 } 1396 } 1397 1398 if (pc->immd_nr) { 1399 int rid = pc->param_nr * 4; 1400 1401 pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1402 if (!pc->immd) 1403 goto out_err; 1404 1405 for (i = 0; i < pc->immd_nr; i++) { 1406 for (c = 0; c < 4; c++) { 1407 pc->immd[i*4+c].type = P_IMMD; 1408 pc->immd[i*4+c].hw = rid++; 1409 pc->immd[i*4+c].index = i; 1410 } 1411 } 1412 } 1413 1414 ret = TRUE; 1415out_err: 1416 tgsi_parse_free(&p); 1417 return ret; 1418} 1419 1420static boolean 1421nv50_program_tx(struct nv50_program *p) 1422{ 1423 struct tgsi_parse_context parse; 1424 struct nv50_pc *pc; 1425 boolean ret; 1426 1427 pc = CALLOC_STRUCT(nv50_pc); 1428 if (!pc) 1429 return FALSE; 1430 pc->p = p; 1431 pc->p->cfg.high_temp = 4; 1432 1433 ret = nv50_program_tx_prep(pc); 1434 if (ret == FALSE) 1435 goto out_cleanup; 1436 1437 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1438 while (!tgsi_parse_end_of_tokens(&parse)) { 1439 const union tgsi_full_token *tok = &parse.FullToken; 1440 1441 tgsi_parse_token(&parse); 1442 1443 switch (tok->Token.Type) { 1444 case TGSI_TOKEN_TYPE_INSTRUCTION: 1445 ret = nv50_program_tx_insn(pc, tok); 1446 if (ret == FALSE) 1447 goto out_err; 1448 break; 1449 default: 1450 break; 1451 } 1452 } 1453 1454 if (p->type == PIPE_SHADER_FRAGMENT) { 1455 struct nv50_reg out; 1456 1457 out.type = P_TEMP; 1458 for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) 1459 emit_mov(pc, &out, &pc->result[out.hw]); 1460 } 1461 1462 assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); 1463 pc->p->exec_tail->inst[1] |= 0x00000001; 1464 1465 p->param_nr = pc->param_nr * 4; 1466 p->immd_nr = pc->immd_nr * 4; 1467 p->immd = pc->immd_buf; 1468 1469out_err: 1470 tgsi_parse_free(&parse); 1471 1472out_cleanup: 1473 return ret; 1474} 1475 1476static void 1477nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1478{ 1479 if (nv50_program_tx(p) == FALSE) 1480 assert(0); 1481 p->translated = TRUE; 1482} 1483 1484static void 1485nv50_program_upload_data(struct nv50_context *nv50, float *map, 1486 unsigned start, unsigned count) 1487{ 1488 while (count) { 1489 unsigned nr = count > 2047 ? 2047 : count; 1490 1491 BEGIN_RING(tesla, 0x00000f00, 1); 1492 OUT_RING ((NV50_CB_PMISC << 0) | (start << 8)); 1493 BEGIN_RING(tesla, 0x40000f04, nr); 1494 OUT_RINGp (map, nr); 1495 1496 map += nr; 1497 start += nr; 1498 count -= nr; 1499 } 1500} 1501 1502static void 1503nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1504{ 1505 struct nouveau_winsys *nvws = nv50->screen->nvws; 1506 struct pipe_winsys *ws = nv50->pipe.winsys; 1507 unsigned nr = p->param_nr + p->immd_nr; 1508 1509 if (!p->data && nr) { 1510 struct nouveau_resource *heap = nv50->screen->vp_data_heap; 1511 1512 if (nvws->res_alloc(heap, nr, p, &p->data)) { 1513 while (heap->next && heap->size < nr) { 1514 struct nv50_program *evict = heap->next->priv; 1515 nvws->res_free(&evict->data); 1516 } 1517 1518 if (nvws->res_alloc(heap, nr, p, &p->data)) 1519 assert(0); 1520 } 1521 } 1522 1523 if (p->param_nr) { 1524 float *map = ws->buffer_map(ws, nv50->constbuf[p->type], 1525 PIPE_BUFFER_USAGE_CPU_READ); 1526 nv50_program_upload_data(nv50, map, p->data->start, 1527 p->param_nr); 1528 ws->buffer_unmap(ws, nv50->constbuf[p->type]); 1529 } 1530 1531 if (p->immd_nr) { 1532 nv50_program_upload_data(nv50, p->immd, 1533 p->data->start + p->param_nr, 1534 p->immd_nr); 1535 } 1536} 1537 1538static void 1539nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1540{ 1541 struct pipe_winsys *ws = nv50->pipe.winsys; 1542 struct nv50_program_exec *e; 1543 struct nouveau_stateobj *so; 1544 const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; 1545 unsigned start, count, *up, *ptr; 1546 boolean upload = FALSE; 1547 1548 if (!p->buffer) { 1549 p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); 1550 upload = TRUE; 1551 } 1552 1553 if (p->data && p->data->start != p->data_start) { 1554 for (e = p->exec_head; e; e = e->next) { 1555 unsigned ei, ci; 1556 1557 if (e->param.index < 0) 1558 continue; 1559 ei = e->param.shift >> 5; 1560 ci = e->param.index + p->data->start; 1561 1562 e->inst[ei] &= ~e->param.mask; 1563 e->inst[ei] |= (ci << e->param.shift); 1564 } 1565 1566 p->data_start = p->data->start; 1567 upload = TRUE; 1568 } 1569 1570 if (!upload) 1571 return; 1572 1573 up = ptr = MALLOC(p->exec_size * 4); 1574 for (e = p->exec_head; e; e = e->next) { 1575 *(ptr++) = e->inst[0]; 1576 if (is_long(e)) 1577 *(ptr++) = e->inst[1]; 1578 } 1579 1580 so = so_new(4,2); 1581 so_method(so, nv50->screen->tesla, 0x1280, 3); 1582 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); 1583 so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); 1584 so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); 1585 1586 start = 0; count = p->exec_size; 1587 while (count) { 1588 struct nouveau_winsys *nvws = nv50->screen->nvws; 1589 unsigned nr; 1590 1591 so_emit(nvws, so); 1592 1593 nr = MIN2(count, 2047); 1594 nr = MIN2(nvws->channel->pushbuf->remaining, nr); 1595 if (nvws->channel->pushbuf->remaining < (nr + 3)) { 1596 FIRE_RING(NULL); 1597 continue; 1598 } 1599 1600 BEGIN_RING(tesla, 0x0f00, 1); 1601 OUT_RING ((start << 8) | NV50_CB_PUPLOAD); 1602 BEGIN_RING(tesla, 0x40000f04, nr); 1603 OUT_RINGp (up + start, nr); 1604 1605 start += nr; 1606 count -= nr; 1607 } 1608 1609 FREE(up); 1610 so_ref(NULL, &so); 1611} 1612 1613void 1614nv50_vertprog_validate(struct nv50_context *nv50) 1615{ 1616 struct nouveau_grobj *tesla = nv50->screen->tesla; 1617 struct nv50_program *p = nv50->vertprog; 1618 struct nouveau_stateobj *so; 1619 1620 if (!p->translated) { 1621 nv50_program_validate(nv50, p); 1622 if (!p->translated) 1623 assert(0); 1624 } 1625 1626 nv50_program_validate_data(nv50, p); 1627 nv50_program_validate_code(nv50, p); 1628 1629 so = so_new(13, 2); 1630 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1631 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1632 NOUVEAU_BO_HIGH, 0, 0); 1633 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1634 NOUVEAU_BO_LOW, 0, 0); 1635 so_method(so, tesla, 0x1650, 2); 1636 so_data (so, p->cfg.vp.attr[0]); 1637 so_data (so, p->cfg.vp.attr[1]); 1638 so_method(so, tesla, 0x16b8, 1); 1639 so_data (so, p->cfg.high_result); 1640 so_method(so, tesla, 0x16ac, 2); 1641 so_data (so, p->cfg.high_result); //8); 1642 so_data (so, p->cfg.high_temp); 1643 so_method(so, tesla, 0x140c, 1); 1644 so_data (so, 0); /* program start offset */ 1645 so_ref(so, &nv50->state.vertprog); 1646} 1647 1648void 1649nv50_fragprog_validate(struct nv50_context *nv50) 1650{ 1651 struct nouveau_grobj *tesla = nv50->screen->tesla; 1652 struct nv50_program *p = nv50->fragprog; 1653 struct nouveau_stateobj *so; 1654 1655 if (!p->translated) { 1656 nv50_program_validate(nv50, p); 1657 if (!p->translated) 1658 assert(0); 1659 } 1660 1661 nv50_program_validate_data(nv50, p); 1662 nv50_program_validate_code(nv50, p); 1663 1664 so = so_new(64, 2); 1665 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1666 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1667 NOUVEAU_BO_HIGH, 0, 0); 1668 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1669 NOUVEAU_BO_LOW, 0, 0); 1670 so_method(so, tesla, 0x1904, 4); 1671 so_data (so, 0x01040404); /* p: 0x01000404 */ 1672 so_data (so, 0x00000004); 1673 so_data (so, 0x00000000); 1674 so_data (so, 0x00000000); 1675 so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */ 1676 so_data (so, 0x03020100); 1677 so_data (so, 0x07060504); 1678 so_data (so, 0x0b0a0908); 1679 so_method(so, tesla, 0x1988, 2); 1680 so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */ 1681 so_data (so, p->cfg.high_temp); 1682 so_method(so, tesla, 0x1414, 1); 1683 so_data (so, 0); /* program start offset */ 1684 so_ref(so, &nv50->state.fragprog); 1685} 1686 1687void 1688nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1689{ 1690 struct pipe_winsys *ws = nv50->pipe.winsys; 1691 1692 while (p->exec_head) { 1693 struct nv50_program_exec *e = p->exec_head; 1694 1695 p->exec_head = e->next; 1696 FREE(e); 1697 } 1698 p->exec_tail = NULL; 1699 p->exec_size = 0; 1700 1701 if (p->buffer) 1702 pipe_buffer_reference(ws, &p->buffer, NULL); 1703 1704 nv50->screen->nvws->res_free(&p->data); 1705 1706 p->translated = 0; 1707} 1708 1709