nv50_program.c revision afcaeaa0e4dc3ced40621c76304a2c0c5a3ab403
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15/* ARL - gallium craps itself on progs/vp/arl.txt 16 * 17 * MSB - Like MAD, but MUL+SUB 18 * - Fuck it off, introduce a way to negate args for ops that 19 * support it. 20 * 21 * Look into inlining IMMD for ops other than MOV (make it general?) 22 * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, 23 * but can emit to P_TEMP first - then MOV later. NVIDIA does this 24 * 25 * Hmmm.. what happens if we have src1+src2 both consts.. ouch ! 26 * 27 * Verify half-insns work where expected - and force disable them where they 28 * don't work - MUL has it forcibly disabled atm as it fixes POW.. 29 * 30 * FUCK! watch dst==src vectors, can overwrite components that are needed. 31 * ie. SUB R0, R0.yzxw, R0 32 */ 33struct nv50_reg { 34 enum { 35 P_TEMP, 36 P_ATTR, 37 P_RESULT, 38 P_CONST, 39 P_IMMD 40 } type; 41 int index; 42 43 int hw; 44 int neg; 45}; 46 47struct nv50_pc { 48 struct nv50_program *p; 49 50 /* hw resources */ 51 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 52 53 /* tgsi resources */ 54 struct nv50_reg *temp; 55 int temp_nr; 56 struct nv50_reg *attr; 57 int attr_nr; 58 struct nv50_reg *result; 59 int result_nr; 60 struct nv50_reg *param; 61 int param_nr; 62 struct nv50_reg *immd; 63 float *immd_buf; 64 int immd_nr; 65 66 struct nv50_reg *temp_temp[8]; 67 unsigned temp_temp_nr; 68}; 69 70static void 71alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 72{ 73 int i; 74 75 if (reg->type != P_TEMP) 76 return; 77 78 if (reg->hw >= 0) { 79 /*XXX: do this here too to catch FP temp-as-attr usage.. 80 * not clean, but works */ 81 if (pc->p->cfg.high_temp < (reg->hw + 1)) 82 pc->p->cfg.high_temp = reg->hw + 1; 83 return; 84 } 85 86 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 87 if (!(pc->r_temp[i])) { 88 pc->r_temp[i] = reg; 89 reg->hw = i; 90 if (pc->p->cfg.high_temp < (i + 1)) 91 pc->p->cfg.high_temp = i + 1; 92 return; 93 } 94 } 95 96 assert(0); 97} 98 99static struct nv50_reg * 100alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 101{ 102 struct nv50_reg *r; 103 int i; 104 105 if (dst && dst->type == P_TEMP && dst->hw == -1) 106 return dst; 107 108 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 109 if (!pc->r_temp[i]) { 110 r = CALLOC_STRUCT(nv50_reg); 111 r->type = P_TEMP; 112 r->index = -1; 113 r->hw = i; 114 pc->r_temp[i] = r; 115 return r; 116 } 117 } 118 119 assert(0); 120 return NULL; 121} 122 123static void 124free_temp(struct nv50_pc *pc, struct nv50_reg *r) 125{ 126 if (r->index == -1) { 127 FREE(pc->r_temp[r->hw]); 128 pc->r_temp[r->hw] = NULL; 129 } 130} 131 132static struct nv50_reg * 133temp_temp(struct nv50_pc *pc) 134{ 135 if (pc->temp_temp_nr >= 8) 136 assert(0); 137 138 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 139 return pc->temp_temp[pc->temp_temp_nr++]; 140} 141 142static void 143kill_temp_temp(struct nv50_pc *pc) 144{ 145 int i; 146 147 for (i = 0; i < pc->temp_temp_nr; i++) 148 free_temp(pc, pc->temp_temp[i]); 149 pc->temp_temp_nr = 0; 150} 151 152static int 153ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) 154{ 155 pc->immd_buf = realloc(pc->immd_buf, (pc->immd_nr + 1) * 4 * 156 sizeof(float)); 157 pc->immd_buf[(pc->immd_nr * 4) + 0] = x; 158 pc->immd_buf[(pc->immd_nr * 4) + 1] = y; 159 pc->immd_buf[(pc->immd_nr * 4) + 2] = z; 160 pc->immd_buf[(pc->immd_nr * 4) + 3] = w; 161 162 return pc->immd_nr++; 163} 164 165static struct nv50_reg * 166alloc_immd(struct nv50_pc *pc, float f) 167{ 168 struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); 169 unsigned hw; 170 171 hw = ctor_immd(pc, f, 0, 0, 0) * 4; 172 r->type = P_IMMD; 173 r->hw = hw; 174 r->index = -1; 175 return r; 176} 177 178static void 179emit(struct nv50_pc *pc, unsigned *inst) 180{ 181 struct nv50_program *p = pc->p; 182 183 if (inst[0] & 1) { 184 p->insns_nr += 2; 185 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 186 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 187 } else { 188 p->insns_nr += 1; 189 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 190 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 191 } 192} 193 194static INLINE void set_long(struct nv50_pc *, unsigned *); 195 196static boolean 197is_long(unsigned *inst) 198{ 199 if (inst[0] & 1) 200 return TRUE; 201 return FALSE; 202} 203 204static boolean 205is_immd(unsigned *inst) 206{ 207 if (is_long(inst) && (inst[1] & 3) == 3) 208 return TRUE; 209 return FALSE; 210} 211 212static INLINE void 213set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 214{ 215 set_long(pc, inst); 216 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 217 inst[1] |= (pred << 7) | (idx << 12); 218} 219 220static INLINE void 221set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 222{ 223 set_long(pc, inst); 224 inst[1] &= ~((0x3 << 4) | (1 << 6)); 225 inst[1] |= (idx << 4) | (on << 6); 226} 227 228static INLINE void 229set_long(struct nv50_pc *pc, unsigned *inst) 230{ 231 if (is_long(inst)) 232 return; 233 234 inst[0] |= 1; 235 set_pred(pc, 0xf, 0, inst); 236 set_pred_wr(pc, 0, 0, inst); 237} 238 239static INLINE void 240set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 241{ 242 if (dst->type == P_RESULT) { 243 set_long(pc, inst); 244 inst[1] |= 0x00000008; 245 } 246 247 alloc_reg(pc, dst); 248 inst[0] |= (dst->hw << 2); 249} 250 251static INLINE void 252set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 253{ 254 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 255 256 set_long(pc, inst); 257 /*XXX: can't be predicated - bits overlap.. catch cases where both 258 * are required and avoid them. */ 259 set_pred(pc, 0, 0, inst); 260 set_pred_wr(pc, 0, 0, inst); 261 262 inst[1] |= 0x00000002 | 0x00000001; 263 inst[0] |= (val & 0x3f) << 16; 264 inst[1] |= (val >> 6) << 2; 265} 266 267static void 268emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 269 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 270{ 271 unsigned inst[2] = { 0, 0 }; 272 273 inst[0] |= 0x80000000; 274 set_dst(pc, dst, inst); 275 alloc_reg(pc, iv); 276 inst[0] |= (iv->hw << 9); 277 alloc_reg(pc, src); 278 inst[0] |= (src->hw << 16); 279 if (noperspective) 280 inst[0] |= (1 << 25); 281 282 emit(pc, inst); 283} 284 285static void 286set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 287{ 288 set_long(pc, inst); 289 if (src->type == P_IMMD) { 290 inst[1] |= (NV50_CB_PMISC << 22); 291 } else { 292 if (pc->p->type == NV50_PROG_VERTEX) 293 inst[1] |= (NV50_CB_PVP << 22); 294 else 295 inst[1] |= (NV50_CB_PFP << 22); 296 } 297} 298 299static void 300emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 301{ 302 unsigned inst[2] = { 0, 0 }; 303 304 inst[0] |= 0x10000000; 305 306 set_dst(pc, dst, inst); 307 308 if (dst->type != P_RESULT && src->type == P_IMMD) { 309 set_immd(pc, src, inst); 310 /*XXX: 32-bit, but steals part of "half" reg space - need to 311 * catch and handle this case if/when we do half-regs 312 */ 313 inst[0] |= 0x00008000; 314 } else 315 if (src->type == P_IMMD || src->type == P_CONST) { 316 set_long(pc, inst); 317 set_cseg(pc, src, inst); 318 inst[0] |= (src->hw << 9); 319 inst[1] |= 0x20000000; /* src0 const? */ 320 } else { 321 if (src->type == P_ATTR) { 322 set_long(pc, inst); 323 inst[1] |= 0x00200000; 324 } 325 326 alloc_reg(pc, src); 327 inst[0] |= (src->hw << 9); 328 } 329 330 /* We really should support "half" instructions here at some point, 331 * but I don't feel confident enough about them yet. 332 */ 333 set_long(pc, inst); 334 if (is_long(inst) && !is_immd(inst)) { 335 inst[1] |= 0x04000000; /* 32-bit */ 336 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 337 } 338 339 emit(pc, inst); 340} 341 342static boolean 343check_swap_src_0_1(struct nv50_pc *pc, 344 struct nv50_reg **s0, struct nv50_reg **s1) 345{ 346 struct nv50_reg *src0 = *s0, *src1 = *s1; 347 348 if (src0->type == P_CONST) { 349 if (src1->type != P_CONST) { 350 *s0 = src1; 351 *s1 = src0; 352 return TRUE; 353 } 354 } else 355 if (src1->type == P_ATTR) { 356 if (src0->type != P_ATTR) { 357 *s0 = src1; 358 *s1 = src0; 359 return TRUE; 360 } 361 } 362 363 return FALSE; 364} 365 366static void 367set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 368{ 369 if (src->type == P_ATTR) { 370 set_long(pc, inst); 371 inst[1] |= 0x00200000; 372 } else 373 if (src->type == P_CONST || src->type == P_IMMD) { 374 struct nv50_reg *temp = temp_temp(pc); 375 376 emit_mov(pc, temp, src); 377 src = temp; 378 } 379 380 alloc_reg(pc, src); 381 inst[0] |= (src->hw << 9); 382} 383 384static void 385set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 386{ 387 if (src->type == P_ATTR) { 388 struct nv50_reg *temp = temp_temp(pc); 389 390 emit_mov(pc, temp, src); 391 src = temp; 392 } else 393 if (src->type == P_CONST || src->type == P_IMMD) { 394 set_cseg(pc, src, inst); 395 inst[0] |= 0x00800000; 396 } 397 398 alloc_reg(pc, src); 399 inst[0] |= (src->hw << 16); 400} 401 402static void 403set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 404{ 405 set_long(pc, inst); 406 407 if (src->type == P_ATTR) { 408 struct nv50_reg *temp = temp_temp(pc); 409 410 emit_mov(pc, temp, src); 411 src = temp; 412 } else 413 if (src->type == P_CONST || src->type == P_IMMD) { 414 set_cseg(pc, src, inst); 415 inst[0] |= 0x01000000; 416 } 417 418 alloc_reg(pc, src); 419 inst[1] |= (src->hw << 14); 420} 421 422static void 423emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 424 struct nv50_reg *src1) 425{ 426 unsigned inst[2] = { 0, 0 }; 427 428 inst[0] |= 0xc0000000; 429 set_long(pc, inst); 430 431 check_swap_src_0_1(pc, &src0, &src1); 432 set_dst(pc, dst, inst); 433 set_src_0(pc, src0, inst); 434 set_src_1(pc, src1, inst); 435 436 emit(pc, inst); 437} 438 439static void 440emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 441 struct nv50_reg *src0, struct nv50_reg *src1) 442{ 443 unsigned inst[2] = { 0, 0 }; 444 445 inst[0] |= 0xb0000000; 446 447 check_swap_src_0_1(pc, &src0, &src1); 448 set_dst(pc, dst, inst); 449 set_src_0(pc, src0, inst); 450 if (is_long(inst)) 451 set_src_2(pc, src1, inst); 452 else 453 set_src_1(pc, src1, inst); 454 455 emit(pc, inst); 456} 457 458static void 459emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 460 struct nv50_reg *src0, struct nv50_reg *src1) 461{ 462 unsigned inst[2] = { 0, 0 }; 463 464 set_long(pc, inst); 465 inst[0] |= 0xb0000000; 466 inst[1] |= (sub << 29); 467 468 check_swap_src_0_1(pc, &src0, &src1); 469 set_dst(pc, dst, inst); 470 set_src_0(pc, src0, inst); 471 set_src_1(pc, src1, inst); 472 473 emit(pc, inst); 474} 475 476static void 477emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 478 struct nv50_reg *src1) 479{ 480 unsigned inst[2] = { 0, 0 }; 481 482 inst[0] |= 0xb0000000; 483 484 set_long(pc, inst); 485 if (check_swap_src_0_1(pc, &src0, &src1)) 486 inst[1] |= 0x04000000; 487 else 488 inst[1] |= 0x08000000; 489 490 set_dst(pc, dst, inst); 491 set_src_0(pc, src0, inst); 492 set_src_2(pc, src1, inst); 493 494 emit(pc, inst); 495} 496 497static void 498emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 499 struct nv50_reg *src1, struct nv50_reg *src2) 500{ 501 unsigned inst[2] = { 0, 0 }; 502 503 inst[0] |= 0xe0000000; 504 505 check_swap_src_0_1(pc, &src0, &src1); 506 set_dst(pc, dst, inst); 507 set_src_0(pc, src0, inst); 508 set_src_1(pc, src1, inst); 509 set_src_2(pc, src2, inst); 510 511 emit(pc, inst); 512} 513 514static void 515emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 516 struct nv50_reg *src1, struct nv50_reg *src2) 517{ 518 unsigned inst[2] = { 0, 0 }; 519 520 inst[0] |= 0xe0000000; 521 set_long(pc, inst); 522 inst[1] |= 0x08000000; /* src0 * src1 - src2 */ 523 524 check_swap_src_0_1(pc, &src0, &src1); 525 set_dst(pc, dst, inst); 526 set_src_0(pc, src0, inst); 527 set_src_1(pc, src1, inst); 528 set_src_2(pc, src2, inst); 529 530 emit(pc, inst); 531} 532 533static void 534emit_flop(struct nv50_pc *pc, unsigned sub, 535 struct nv50_reg *dst, struct nv50_reg *src) 536{ 537 unsigned inst[2] = { 0, 0 }; 538 539 inst[0] |= 0x90000000; 540 if (sub) { 541 set_long(pc, inst); 542 inst[1] |= (sub << 29); 543 } 544 545 set_dst(pc, dst, inst); 546 set_src_0(pc, src, inst); 547 548 emit(pc, inst); 549} 550 551static void 552emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 553{ 554 unsigned inst[2] = { 0, 0 }; 555 556 inst[0] |= 0xb0000000; 557 558 set_dst(pc, dst, inst); 559 set_src_0(pc, src, inst); 560 set_long(pc, inst); 561 inst[1] |= (6 << 29) | 0x00004000; 562 563 emit(pc, inst); 564} 565 566/*XXX: inaccurate results.. why? */ 567#define ALLOW_SET_SWAP 0 568 569static void 570emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, 571 struct nv50_reg *src0, struct nv50_reg *src1) 572{ 573 unsigned inst[2] = { 0, 0 }; 574#if ALLOW_SET_SWAP 575 unsigned inv_cop[8] = { 0, 6, 2, 4, 3, 5, 1, 7 }; 576#endif 577 struct nv50_reg *rdst; 578 579#if ALLOW_SET_SWAP 580 assert(c_op <= 7); 581 if (check_swap_src_0_1(pc, &src0, &src1)) 582 c_op = inv_cop[c_op]; 583#endif 584 585 rdst = dst; 586 if (dst->type != P_TEMP) 587 dst = alloc_temp(pc, NULL); 588 589 /* set.u32 */ 590 set_long(pc, inst); 591 inst[0] |= 0xb0000000; 592 inst[1] |= (3 << 29); 593 inst[1] |= (c_op << 14); 594 /*XXX: breaks things, .u32 by default? 595 * decuda will disasm as .u16 and use .lo/.hi regs, but this 596 * doesn't seem to match what the hw actually does. 597 inst[1] |= 0x04000000; << breaks things.. .u32 by default? 598 */ 599 set_dst(pc, dst, inst); 600 set_src_0(pc, src0, inst); 601 set_src_1(pc, src1, inst); 602 emit(pc, inst); 603 604 /* cvt.f32.u32 */ 605 inst[0] = 0xa0000001; 606 inst[1] = 0x64014780; 607 set_dst(pc, rdst, inst); 608 set_src_0(pc, dst, inst); 609 emit(pc, inst); 610 611 if (dst != rdst) 612 free_temp(pc, dst); 613} 614 615static void 616emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 617{ 618 unsigned inst[2] = { 0, 0 }; 619 620 inst[0] = 0xa0000000; /* cvt */ 621 set_long(pc, inst); 622 inst[1] |= (6 << 29); /* cvt */ 623 inst[1] |= 0x08000000; /* integer mode */ 624 inst[1] |= 0x04000000; /* 32 bit */ 625 inst[1] |= ((0x1 << 3)) << 14; /* .rn */ 626 inst[1] |= (1 << 14); /* src .f32 */ 627 set_dst(pc, dst, inst); 628 set_src_0(pc, src, inst); 629 630 emit(pc, inst); 631} 632 633static void 634emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, 635 struct nv50_reg *v, struct nv50_reg *e) 636{ 637 struct nv50_reg *temp = alloc_temp(pc, NULL); 638 639 emit_flop(pc, 3, temp, v); 640 emit_mul(pc, temp, temp, e); 641 emit_preex2(pc, temp, temp); 642 emit_flop(pc, 6, dst, temp); 643 644 free_temp(pc, temp); 645} 646 647static void 648emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 649{ 650 unsigned inst[2] = { 0, 0 }; 651 652 inst[0] = 0xa0000000; /* cvt */ 653 set_long(pc, inst); 654 inst[1] |= (6 << 29); /* cvt */ 655 inst[1] |= 0x04000000; /* 32 bit */ 656 inst[1] |= (1 << 14); /* src .f32 */ 657 inst[1] |= ((1 << 6) << 14); /* .abs */ 658 set_dst(pc, dst, inst); 659 set_src_0(pc, src, inst); 660 661 emit(pc, inst); 662} 663 664static void 665emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, struct nv50_reg **src) 666{ 667 struct nv50_reg *one = alloc_immd(pc, 1.0); 668 struct nv50_reg *zero = alloc_immd(pc, 0.0); 669 struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); 670 struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); 671 struct nv50_reg *tmp[4]; 672 673 emit_mov(pc, dst[0], one); 674 emit_mov(pc, dst[3], one); 675 676 tmp[0] = temp_temp(pc); 677 emit_minmax(pc, 4, dst[1], src[0], zero); 678 set_pred_wr(pc, 1, 0, &pc->p->insns[pc->p->insns_nr - 2]); 679 680 tmp[1] = temp_temp(pc); 681 emit_minmax(pc, 4, tmp[1], src[1], zero); 682 683 tmp[3] = temp_temp(pc); 684 emit_minmax(pc, 4, tmp[3], src[3], neg128); 685 emit_minmax(pc, 5, tmp[3], tmp[3], pos128); 686 687 emit_pow(pc, dst[2], tmp[1], tmp[3]); 688 emit_mov(pc, dst[2], zero); 689 set_pred(pc, 3, 0, &pc->p->insns[pc->p->insns_nr - 2]); 690} 691 692static struct nv50_reg * 693tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 694{ 695 switch (dst->DstRegister.File) { 696 case TGSI_FILE_TEMPORARY: 697 return &pc->temp[dst->DstRegister.Index * 4 + c]; 698 case TGSI_FILE_OUTPUT: 699 return &pc->result[dst->DstRegister.Index * 4 + c]; 700 case TGSI_FILE_NULL: 701 return NULL; 702 default: 703 break; 704 } 705 706 return NULL; 707} 708 709static struct nv50_reg * 710tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) 711{ 712 struct nv50_reg *r = NULL; 713 struct nv50_reg *temp; 714 unsigned c; 715 716 c = tgsi_util_get_full_src_register_extswizzle(src, chan); 717 switch (c) { 718 case TGSI_EXTSWIZZLE_X: 719 case TGSI_EXTSWIZZLE_Y: 720 case TGSI_EXTSWIZZLE_Z: 721 case TGSI_EXTSWIZZLE_W: 722 switch (src->SrcRegister.File) { 723 case TGSI_FILE_INPUT: 724 r = &pc->attr[src->SrcRegister.Index * 4 + c]; 725 break; 726 case TGSI_FILE_TEMPORARY: 727 r = &pc->temp[src->SrcRegister.Index * 4 + c]; 728 break; 729 case TGSI_FILE_CONSTANT: 730 r = &pc->param[src->SrcRegister.Index * 4 + c]; 731 break; 732 case TGSI_FILE_IMMEDIATE: 733 r = &pc->immd[src->SrcRegister.Index * 4 + c]; 734 break; 735 default: 736 assert(0); 737 break; 738 } 739 break; 740 case TGSI_EXTSWIZZLE_ZERO: 741 r = alloc_immd(pc, 0.0); 742 break; 743 case TGSI_EXTSWIZZLE_ONE: 744 r = alloc_immd(pc, 1.0); 745 break; 746 default: 747 assert(0); 748 break; 749 } 750 751 switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { 752 case TGSI_UTIL_SIGN_KEEP: 753 break; 754 case TGSI_UTIL_SIGN_CLEAR: 755 temp = temp_temp(pc); 756 emit_abs(pc, temp, r); 757 r = temp; 758 break; 759 default: 760 assert(0); 761 break; 762 } 763 764 return r; 765} 766 767static boolean 768nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 769{ 770 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 771 struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; 772 unsigned mask, sat; 773 int i, c; 774 775 NOUVEAU_ERR("insn %p\n", tok); 776 777 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 778 sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; 779 780 for (c = 0; c < 4; c++) { 781 if (mask & (1 << c)) 782 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 783 else 784 dst[c] = NULL; 785 } 786 787 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 788 for (c = 0; c < 4; c++) 789 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 790 } 791 792 if (sat) { 793 for (c = 0; c < 4; c++) { 794 rdst[c] = dst[c]; 795 dst[c] = temp_temp(pc); 796 } 797 } 798 799 switch (inst->Instruction.Opcode) { 800 case TGSI_OPCODE_ABS: 801 for (c = 0; c < 4; c++) { 802 if (!(mask & (1 << c))) 803 continue; 804 emit_abs(pc, dst[c], src[0][c]); 805 } 806 break; 807 case TGSI_OPCODE_ADD: 808 for (c = 0; c < 4; c++) { 809 if (!(mask & (1 << c))) 810 continue; 811 emit_add(pc, dst[c], src[0][c], src[1][c]); 812 } 813 break; 814 case TGSI_OPCODE_COS: 815 for (c = 0; c < 4; c++) { 816 if (!(mask & (1 << c))) 817 continue; 818 emit_flop(pc, 5, dst[c], src[0][c]); 819 } 820 break; 821 case TGSI_OPCODE_DP3: 822 temp = alloc_temp(pc, NULL); 823 emit_mul(pc, temp, src[0][0], src[1][0]); 824 emit_mad(pc, temp, src[0][1], src[1][1], temp); 825 emit_mad(pc, temp, src[0][2], src[1][2], temp); 826 for (c = 0; c < 4; c++) { 827 if (!(mask & (1 << c))) 828 continue; 829 emit_mov(pc, dst[c], temp); 830 } 831 free_temp(pc, temp); 832 break; 833 case TGSI_OPCODE_DP4: 834 temp = alloc_temp(pc, NULL); 835 emit_mul(pc, temp, src[0][0], src[1][0]); 836 emit_mad(pc, temp, src[0][1], src[1][1], temp); 837 emit_mad(pc, temp, src[0][2], src[1][2], temp); 838 emit_mad(pc, temp, src[0][3], src[1][3], temp); 839 for (c = 0; c < 4; c++) { 840 if (!(mask & (1 << c))) 841 continue; 842 emit_mov(pc, dst[c], temp); 843 } 844 free_temp(pc, temp); 845 break; 846 case TGSI_OPCODE_DPH: 847 temp = alloc_temp(pc, NULL); 848 emit_mul(pc, temp, src[0][0], src[1][0]); 849 emit_mad(pc, temp, src[0][1], src[1][1], temp); 850 emit_mad(pc, temp, src[0][2], src[1][2], temp); 851 emit_add(pc, temp, src[1][3], temp); 852 for (c = 0; c < 4; c++) { 853 if (!(mask & (1 << c))) 854 continue; 855 emit_mov(pc, dst[c], temp); 856 } 857 free_temp(pc, temp); 858 break; 859 case TGSI_OPCODE_DST: 860 { 861 struct nv50_reg *one = alloc_immd(pc, 1.0); 862 if (mask & (1 << 0)) 863 emit_mov(pc, dst[0], one); 864 if (mask & (1 << 1)) 865 emit_mul(pc, dst[1], src[0][1], src[1][1]); 866 if (mask & (1 << 2)) 867 emit_mov(pc, dst[2], src[0][2]); 868 if (mask & (1 << 3)) 869 emit_mov(pc, dst[3], src[1][3]); 870 FREE(one); 871 } 872 break; 873 case TGSI_OPCODE_EX2: 874 temp = alloc_temp(pc, NULL); 875 for (c = 0; c < 4; c++) { 876 if (!(mask & (1 << c))) 877 continue; 878 emit_preex2(pc, temp, src[0][c]); 879 emit_flop(pc, 6, dst[c], temp); 880 } 881 free_temp(pc, temp); 882 break; 883 case TGSI_OPCODE_FLR: 884 for (c = 0; c < 4; c++) { 885 if (!(mask & (1 << c))) 886 continue; 887 emit_flr(pc, dst[c], src[0][c]); 888 } 889 break; 890 case TGSI_OPCODE_FRC: 891 temp = alloc_temp(pc, NULL); 892 for (c = 0; c < 4; c++) { 893 if (!(mask & (1 << c))) 894 continue; 895 emit_flr(pc, temp, src[0][c]); 896 emit_sub(pc, dst[c], src[0][c], temp); 897 } 898 free_temp(pc, temp); 899 break; 900 case TGSI_OPCODE_LIT: 901 /*XXX: writemask */ 902 emit_lit(pc, &dst[0], &src[0][0]); 903 break; 904 case TGSI_OPCODE_LG2: 905 for (c = 0; c < 4; c++) { 906 if (!(mask & (1 << c))) 907 continue; 908 emit_flop(pc, 3, dst[c], src[0][c]); 909 } 910 break; 911 case TGSI_OPCODE_MAD: 912 for (c = 0; c < 4; c++) { 913 if (!(mask & (1 << c))) 914 continue; 915 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 916 } 917 break; 918 case TGSI_OPCODE_MAX: 919 for (c = 0; c < 4; c++) { 920 if (!(mask & (1 << c))) 921 continue; 922 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 923 } 924 break; 925 case TGSI_OPCODE_MIN: 926 for (c = 0; c < 4; c++) { 927 if (!(mask & (1 << c))) 928 continue; 929 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 930 } 931 break; 932 case TGSI_OPCODE_MOV: 933 for (c = 0; c < 4; c++) { 934 if (!(mask & (1 << c))) 935 continue; 936 emit_mov(pc, dst[c], src[0][c]); 937 } 938 break; 939 case TGSI_OPCODE_MUL: 940 for (c = 0; c < 4; c++) { 941 if (!(mask & (1 << c))) 942 continue; 943 emit_mul(pc, dst[c], src[0][c], src[1][c]); 944 } 945 break; 946 case TGSI_OPCODE_POW: 947 temp = alloc_temp(pc, NULL); 948 emit_pow(pc, temp, src[0][0], src[1][0]); 949 for (c = 0; c < 4; c++) { 950 if (!(mask & (1 << c))) 951 continue; 952 emit_mov(pc, dst[c], temp); 953 } 954 free_temp(pc, temp); 955 break; 956 case TGSI_OPCODE_RCP: 957 for (c = 0; c < 4; c++) { 958 if (!(mask & (1 << c))) 959 continue; 960 emit_flop(pc, 0, dst[c], src[0][c]); 961 } 962 break; 963 case TGSI_OPCODE_RSQ: 964 for (c = 0; c < 4; c++) { 965 if (!(mask & (1 << c))) 966 continue; 967 emit_flop(pc, 2, dst[c], src[0][c]); 968 } 969 break; 970 case TGSI_OPCODE_SGE: 971 for (c = 0; c < 4; c++) { 972 if (!(mask & (1 << c))) 973 continue; 974 emit_set(pc, 6, dst[c], src[0][c], src[1][c]); 975 } 976 break; 977 case TGSI_OPCODE_SIN: 978 for (c = 0; c < 4; c++) { 979 if (!(mask & (1 << c))) 980 continue; 981 emit_flop(pc, 4, dst[c], src[0][c]); 982 } 983 break; 984 case TGSI_OPCODE_SLT: 985 for (c = 0; c < 4; c++) { 986 if (!(mask & (1 << c))) 987 continue; 988 emit_set(pc, 1, dst[c], src[0][c], src[1][c]); 989 } 990 break; 991 case TGSI_OPCODE_SUB: 992 for (c = 0; c < 4; c++) { 993 if (!(mask & (1 << c))) 994 continue; 995 emit_sub(pc, dst[c], src[0][c], src[1][c]); 996 } 997 break; 998 case TGSI_OPCODE_XPD: 999 temp = alloc_temp(pc, NULL); 1000 if (mask & (1 << 0)) { 1001 emit_mul(pc, temp, src[0][2], src[1][1]); 1002 emit_msb(pc, dst[0], src[0][1], src[1][2], temp); 1003 } 1004 if (mask & (1 << 1)) { 1005 emit_mul(pc, temp, src[0][0], src[1][2]); 1006 emit_msb(pc, dst[1], src[0][2], src[1][0], temp); 1007 } 1008 if (mask & (1 << 2)) { 1009 emit_mul(pc, temp, src[0][1], src[1][0]); 1010 emit_msb(pc, dst[2], src[0][0], src[1][1], temp); 1011 } 1012 free_temp(pc, temp); 1013 break; 1014 case TGSI_OPCODE_END: 1015 break; 1016 default: 1017 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 1018 return FALSE; 1019 } 1020 1021 if (sat) { 1022 for (c = 0; c < 4; c++) { 1023 unsigned inst[2] = { 0, 0 }; 1024 1025 if (!(mask & (1 << c))) 1026 continue; 1027 1028 inst[0] = 0xa0000000; /* cvt */ 1029 set_long(pc, inst); 1030 inst[1] |= (6 << 29); /* cvt */ 1031 inst[1] |= 0x04000000; /* 32 bit */ 1032 inst[1] |= (1 << 14); /* src .f32 */ 1033 inst[1] |= ((1 << 5) << 14); /* .sat */ 1034 set_dst(pc, rdst[c], inst); 1035 set_src_0(pc, dst[c], inst); 1036 emit(pc, inst); 1037 } 1038 } 1039 1040 kill_temp_temp(pc); 1041 return TRUE; 1042} 1043 1044static boolean 1045nv50_program_tx_prep(struct nv50_pc *pc) 1046{ 1047 struct tgsi_parse_context p; 1048 boolean ret = FALSE; 1049 unsigned i, c; 1050 1051 tgsi_parse_init(&p, pc->p->pipe.tokens); 1052 while (!tgsi_parse_end_of_tokens(&p)) { 1053 const union tgsi_full_token *tok = &p.FullToken; 1054 1055 tgsi_parse_token(&p); 1056 switch (tok->Token.Type) { 1057 case TGSI_TOKEN_TYPE_IMMEDIATE: 1058 { 1059 const struct tgsi_full_immediate *imm = 1060 &p.FullToken.FullImmediate; 1061 1062 ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, 1063 imm->u.ImmediateFloat32[1].Float, 1064 imm->u.ImmediateFloat32[2].Float, 1065 imm->u.ImmediateFloat32[3].Float); 1066 } 1067 break; 1068 case TGSI_TOKEN_TYPE_DECLARATION: 1069 { 1070 const struct tgsi_full_declaration *d; 1071 unsigned last; 1072 1073 d = &p.FullToken.FullDeclaration; 1074 last = d->u.DeclarationRange.Last; 1075 1076 switch (d->Declaration.File) { 1077 case TGSI_FILE_TEMPORARY: 1078 if (pc->temp_nr < (last + 1)) 1079 pc->temp_nr = last + 1; 1080 break; 1081 case TGSI_FILE_OUTPUT: 1082 if (pc->result_nr < (last + 1)) 1083 pc->result_nr = last + 1; 1084 break; 1085 case TGSI_FILE_INPUT: 1086 if (pc->attr_nr < (last + 1)) 1087 pc->attr_nr = last + 1; 1088 break; 1089 case TGSI_FILE_CONSTANT: 1090 if (pc->param_nr < (last + 1)) 1091 pc->param_nr = last + 1; 1092 break; 1093 default: 1094 NOUVEAU_ERR("bad decl file %d\n", 1095 d->Declaration.File); 1096 goto out_err; 1097 } 1098 } 1099 break; 1100 case TGSI_TOKEN_TYPE_INSTRUCTION: 1101 break; 1102 default: 1103 break; 1104 } 1105 } 1106 1107 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 1108 if (pc->temp_nr) { 1109 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 1110 if (!pc->temp) 1111 goto out_err; 1112 1113 for (i = 0; i < pc->temp_nr; i++) { 1114 for (c = 0; c < 4; c++) { 1115 pc->temp[i*4+c].type = P_TEMP; 1116 pc->temp[i*4+c].hw = -1; 1117 pc->temp[i*4+c].index = i; 1118 } 1119 } 1120 } 1121 1122 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 1123 if (pc->attr_nr) { 1124 struct nv50_reg *iv = NULL, *tmp = NULL; 1125 int aid = 0; 1126 1127 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 1128 if (!pc->attr) 1129 goto out_err; 1130 1131 if (pc->p->type == NV50_PROG_FRAGMENT) { 1132 iv = alloc_temp(pc, NULL); 1133 aid++; 1134 } 1135 1136 for (i = 0; i < pc->attr_nr; i++) { 1137 struct nv50_reg *a = &pc->attr[i*4]; 1138 1139 for (c = 0; c < 4; c++) { 1140 if (pc->p->type == NV50_PROG_FRAGMENT) { 1141 struct nv50_reg *at = 1142 alloc_temp(pc, NULL); 1143 pc->attr[i*4+c].type = at->type; 1144 pc->attr[i*4+c].hw = at->hw; 1145 pc->attr[i*4+c].index = at->index; 1146 } else { 1147 pc->p->cfg.vp.attr[aid/32] |= 1148 (1 << (aid % 32)); 1149 pc->attr[i*4+c].type = P_ATTR; 1150 pc->attr[i*4+c].hw = aid++; 1151 pc->attr[i*4+c].index = i; 1152 } 1153 } 1154 1155 if (pc->p->type != NV50_PROG_FRAGMENT) 1156 continue; 1157 1158 emit_interp(pc, iv, iv, iv, FALSE); 1159 tmp = alloc_temp(pc, NULL); 1160 { 1161 unsigned inst[2] = { 0, 0 }; 1162 inst[0] = 0x90000000; 1163 inst[0] |= (tmp->hw << 2); 1164 emit(pc, inst); 1165 } 1166 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 1167 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 1168 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 1169 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 1170 free_temp(pc, tmp); 1171 } 1172 1173 if (iv) 1174 free_temp(pc, iv); 1175 } 1176 1177 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 1178 if (pc->result_nr) { 1179 int rid = 0; 1180 1181 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 1182 if (!pc->result) 1183 goto out_err; 1184 1185 for (i = 0; i < pc->result_nr; i++) { 1186 for (c = 0; c < 4; c++) { 1187 if (pc->p->type == NV50_PROG_FRAGMENT) 1188 pc->result[i*4+c].type = P_TEMP; 1189 else 1190 pc->result[i*4+c].type = P_RESULT; 1191 pc->result[i*4+c].hw = rid++; 1192 pc->result[i*4+c].index = i; 1193 } 1194 } 1195 } 1196 1197 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 1198 if (pc->param_nr) { 1199 int rid = 0; 1200 1201 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 1202 if (!pc->param) 1203 goto out_err; 1204 1205 for (i = 0; i < pc->param_nr; i++) { 1206 for (c = 0; c < 4; c++) { 1207 pc->param[i*4+c].type = P_CONST; 1208 pc->param[i*4+c].hw = rid++; 1209 pc->param[i*4+c].index = i; 1210 } 1211 } 1212 } 1213 1214 if (pc->immd_nr) { 1215 int rid = 0; 1216 1217 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 1218 if (!pc->immd) 1219 goto out_err; 1220 1221 for (i = 0; i < pc->immd_nr; i++) { 1222 for (c = 0; c < 4; c++) { 1223 pc->immd[i*4+c].type = P_IMMD; 1224 pc->immd[i*4+c].hw = rid++; 1225 pc->immd[i*4+c].index = i; 1226 } 1227 } 1228 } 1229 1230 ret = TRUE; 1231out_err: 1232 tgsi_parse_free(&p); 1233 return ret; 1234} 1235 1236static boolean 1237nv50_program_tx(struct nv50_program *p) 1238{ 1239 struct tgsi_parse_context parse; 1240 struct nv50_pc *pc; 1241 boolean ret; 1242 1243 pc = CALLOC_STRUCT(nv50_pc); 1244 if (!pc) 1245 return FALSE; 1246 pc->p = p; 1247 pc->p->cfg.high_temp = 4; 1248 1249 ret = nv50_program_tx_prep(pc); 1250 if (ret == FALSE) 1251 goto out_cleanup; 1252 1253 tgsi_parse_init(&parse, pc->p->pipe.tokens); 1254 while (!tgsi_parse_end_of_tokens(&parse)) { 1255 const union tgsi_full_token *tok = &parse.FullToken; 1256 1257 tgsi_parse_token(&parse); 1258 1259 switch (tok->Token.Type) { 1260 case TGSI_TOKEN_TYPE_INSTRUCTION: 1261 ret = nv50_program_tx_insn(pc, tok); 1262 if (ret == FALSE) 1263 goto out_err; 1264 break; 1265 default: 1266 break; 1267 } 1268 } 1269 1270 p->immd_nr = pc->immd_nr * 4; 1271 p->immd = pc->immd_buf; 1272 1273out_err: 1274 tgsi_parse_free(&parse); 1275 1276out_cleanup: 1277 return ret; 1278} 1279 1280static void 1281nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 1282{ 1283 int i; 1284 1285 if (nv50_program_tx(p) == FALSE) 1286 assert(0); 1287 /* *not* sufficient, it's fine if last inst is long and 1288 * NOT immd - otherwise it's fucked fucked fucked */ 1289 p->insns[p->insns_nr - 1] |= 0x00000001; 1290 1291 if (p->type == NV50_PROG_VERTEX) { 1292 for (i = 0; i < p->insns_nr; i++) 1293 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 1294 } else { 1295 for (i = 0; i < p->insns_nr; i++) 1296 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 1297 } 1298 1299 p->translated = TRUE; 1300} 1301 1302static void 1303nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 1304{ 1305 int i; 1306 1307 for (i = 0; i < p->immd_nr; i++) { 1308 BEGIN_RING(tesla, 0x0f00, 2); 1309 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 1310 OUT_RING (fui(p->immd[i])); 1311 } 1312} 1313 1314static void 1315nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 1316{ 1317 struct pipe_winsys *ws = nv50->pipe.winsys; 1318 void *map; 1319 1320 if (!p->buffer) 1321 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 1322 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 1323 memcpy(map, p->insns, p->insns_nr * 4); 1324 ws->buffer_unmap(ws, p->buffer); 1325} 1326 1327void 1328nv50_vertprog_validate(struct nv50_context *nv50) 1329{ 1330 struct nouveau_grobj *tesla = nv50->screen->tesla; 1331 struct nv50_program *p = nv50->vertprog; 1332 struct nouveau_stateobj *so; 1333 1334 if (!p->translated) { 1335 nv50_program_validate(nv50, p); 1336 if (!p->translated) 1337 assert(0); 1338 } 1339 1340 nv50_program_validate_data(nv50, p); 1341 nv50_program_validate_code(nv50, p); 1342 1343 so = so_new(11, 2); 1344 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1345 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1346 NOUVEAU_BO_HIGH, 0, 0); 1347 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1348 NOUVEAU_BO_LOW, 0, 0); 1349 so_method(so, tesla, 0x1650, 2); 1350 so_data (so, p->cfg.vp.attr[0]); 1351 so_data (so, p->cfg.vp.attr[1]); 1352 so_method(so, tesla, 0x16ac, 2); 1353 so_data (so, 8); 1354 so_data (so, p->cfg.high_temp); 1355 so_method(so, tesla, 0x140c, 1); 1356 so_data (so, 0); /* program start offset */ 1357 so_emit(nv50->screen->nvws, so); 1358 so_ref(NULL, &so); 1359} 1360 1361void 1362nv50_fragprog_validate(struct nv50_context *nv50) 1363{ 1364 struct nouveau_grobj *tesla = nv50->screen->tesla; 1365 struct nv50_program *p = nv50->fragprog; 1366 struct nouveau_stateobj *so; 1367 1368 if (!p->translated) { 1369 nv50_program_validate(nv50, p); 1370 if (!p->translated) 1371 assert(0); 1372 } 1373 1374 nv50_program_validate_data(nv50, p); 1375 nv50_program_validate_code(nv50, p); 1376 1377 so = so_new(7, 2); 1378 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1379 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1380 NOUVEAU_BO_HIGH, 0, 0); 1381 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1382 NOUVEAU_BO_LOW, 0, 0); 1383 so_method(so, tesla, 0x198c, 1); 1384 so_data (so, p->cfg.high_temp); 1385 so_method(so, tesla, 0x1414, 1); 1386 so_data (so, 0); /* program start offset */ 1387 so_emit(nv50->screen->nvws, so); 1388 so_ref(NULL, &so); 1389} 1390 1391void 1392nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1393{ 1394 struct pipe_winsys *ws = nv50->pipe.winsys; 1395 1396 if (p->insns_nr) { 1397 if (p->insns) 1398 FREE(p->insns); 1399 p->insns_nr = 0; 1400 } 1401 1402 if (p->buffer) 1403 pipe_buffer_reference(ws, &p->buffer, NULL); 1404 1405 p->translated = 0; 1406} 1407 1408