nv50_program.c revision fbf4027dd9b279ec159906dcad134f71e34aaec8
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15struct nv50_reg { 16 enum { 17 P_TEMP, 18 P_ATTR, 19 P_RESULT, 20 P_CONST, 21 P_IMMD 22 } type; 23 int index; 24 25 int hw; 26 int neg; 27}; 28 29struct nv50_pc { 30 struct nv50_program *p; 31 32 /* hw resources */ 33 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 34 35 /* tgsi resources */ 36 struct nv50_reg *temp; 37 int temp_nr; 38 struct nv50_reg *attr; 39 int attr_nr; 40 struct nv50_reg *result; 41 int result_nr; 42 struct nv50_reg *param; 43 int param_nr; 44 struct nv50_reg *immd; 45 float *immd_buf; 46 int immd_nr; 47 48 struct nv50_reg *temp_temp[8]; 49 unsigned temp_temp_nr; 50}; 51 52static void 53alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 54{ 55 int i; 56 57 if (reg->type != P_TEMP) 58 return; 59 60 if (reg->hw >= 0) { 61 /*XXX: do this here too to catch FP temp-as-attr usage.. 62 * not clean, but works */ 63 if (pc->p->cfg.high_temp < (reg->hw + 1)) 64 pc->p->cfg.high_temp = reg->hw + 1; 65 return; 66 } 67 68 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 69 if (!(pc->r_temp[i])) { 70 pc->r_temp[i] = reg; 71 reg->hw = i; 72 if (pc->p->cfg.high_temp < (i + 1)) 73 pc->p->cfg.high_temp = i + 1; 74 return; 75 } 76 } 77 78 assert(0); 79} 80 81static struct nv50_reg * 82alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 83{ 84 struct nv50_reg *r; 85 int i; 86 87 if (dst && dst->type == P_TEMP && dst->hw == -1) 88 return dst; 89 90 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 91 if (!pc->r_temp[i]) { 92 r = CALLOC_STRUCT(nv50_reg); 93 r->type = P_TEMP; 94 r->index = -1; 95 r->hw = i; 96 pc->r_temp[i] = r; 97 return r; 98 } 99 } 100 101 assert(0); 102 return NULL; 103} 104 105static void 106free_temp(struct nv50_pc *pc, struct nv50_reg *r) 107{ 108 if (r->index == -1) { 109 FREE(pc->r_temp[r->hw]); 110 pc->r_temp[r->hw] = NULL; 111 } 112} 113 114static struct nv50_reg * 115temp_temp(struct nv50_pc *pc) 116{ 117 if (pc->temp_temp_nr >= 8) 118 assert(0); 119 120 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 121 return pc->temp_temp[pc->temp_temp_nr++]; 122} 123 124static void 125kill_temp_temp(struct nv50_pc *pc) 126{ 127 int i; 128 129 for (i = 0; i < pc->temp_temp_nr; i++) 130 free_temp(pc, pc->temp_temp[i]); 131 pc->temp_temp_nr = 0; 132} 133 134static struct nv50_reg * 135tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 136{ 137 switch (dst->DstRegister.File) { 138 case TGSI_FILE_TEMPORARY: 139 return &pc->temp[dst->DstRegister.Index * 4 + c]; 140 case TGSI_FILE_OUTPUT: 141 return &pc->result[dst->DstRegister.Index * 4 + c]; 142 case TGSI_FILE_NULL: 143 return NULL; 144 default: 145 break; 146 } 147 148 return NULL; 149} 150 151static struct nv50_reg * 152tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) 153{ 154 /* Handle swizzling */ 155 switch (c) { 156 case 0: c = src->SrcRegister.SwizzleX; break; 157 case 1: c = src->SrcRegister.SwizzleY; break; 158 case 2: c = src->SrcRegister.SwizzleZ; break; 159 case 3: c = src->SrcRegister.SwizzleW; break; 160 default: 161 assert(0); 162 } 163 164 switch (src->SrcRegister.File) { 165 case TGSI_FILE_INPUT: 166 return &pc->attr[src->SrcRegister.Index * 4 + c]; 167 case TGSI_FILE_TEMPORARY: 168 return &pc->temp[src->SrcRegister.Index * 4 + c]; 169 case TGSI_FILE_CONSTANT: 170 return &pc->param[src->SrcRegister.Index * 4 + c]; 171 case TGSI_FILE_IMMEDIATE: 172 return &pc->immd[src->SrcRegister.Index * 4 + c]; 173 default: 174 break; 175 } 176 177 return NULL; 178} 179 180static void 181emit(struct nv50_pc *pc, unsigned *inst) 182{ 183 struct nv50_program *p = pc->p; 184 185 if (inst[0] & 1) { 186 p->insns_nr += 2; 187 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 188 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 189 } else { 190 p->insns_nr += 1; 191 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 192 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 193 } 194} 195 196static INLINE void set_long(struct nv50_pc *, unsigned *); 197 198static boolean 199is_long(unsigned *inst) 200{ 201 if (inst[0] & 1) 202 return TRUE; 203 return FALSE; 204} 205 206static boolean 207is_immd(unsigned *inst) 208{ 209 if (is_long(inst) && (inst[1] & 3) == 3) 210 return TRUE; 211 return FALSE; 212} 213 214static INLINE void 215set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 216{ 217 set_long(pc, inst); 218 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 219 inst[1] |= (pred << 7) | (idx << 12); 220} 221 222static INLINE void 223set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 224{ 225 set_long(pc, inst); 226 inst[1] &= ~((0x3 << 4) | (1 << 6)); 227 inst[1] |= (idx << 4) | (on << 6); 228} 229 230static INLINE void 231set_long(struct nv50_pc *pc, unsigned *inst) 232{ 233 if (is_long(inst)) 234 return; 235 236 inst[0] |= 1; 237 set_pred(pc, 0xf, 0, inst); 238 set_pred_wr(pc, 0, 0, inst); 239} 240 241static INLINE void 242set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 243{ 244 if (dst->type == P_RESULT) { 245 set_long(pc, inst); 246 inst[1] |= 0x00000008; 247 } 248 249 alloc_reg(pc, dst); 250 inst[0] |= (dst->hw << 2); 251} 252 253static INLINE void 254set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 255{ 256 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 257 258 set_long(pc, inst); 259 /*XXX: can't be predicated - bits overlap.. catch cases where both 260 * are required and avoid them. */ 261 set_pred(pc, 0, 0, inst); 262 set_pred_wr(pc, 0, 0, inst); 263 264 inst[1] |= 0x00000002 | 0x00000001; 265 inst[0] |= (val & 0x3f) << 16; 266 inst[1] |= (val >> 6) << 2; 267} 268 269static void 270emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 271 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 272{ 273 unsigned inst[2] = { 0, 0 }; 274 275 inst[0] |= 0x80000000; 276 set_dst(pc, dst, inst); 277 alloc_reg(pc, iv); 278 inst[0] |= (iv->hw << 9); 279 alloc_reg(pc, src); 280 inst[0] |= (src->hw << 16); 281 if (noperspective) 282 inst[0] |= (1 << 25); 283 284 emit(pc, inst); 285} 286 287static void 288set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 289{ 290 set_long(pc, inst); 291 if (src->type == P_IMMD) { 292 inst[1] |= (NV50_CB_PMISC << 22); 293 } else { 294 if (pc->p->type == NV50_PROG_VERTEX) 295 inst[1] |= (NV50_CB_PVP << 22); 296 else 297 inst[1] |= (NV50_CB_PFP << 22); 298 } 299} 300 301static void 302emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 303{ 304 unsigned inst[2] = { 0, 0 }; 305 306 inst[0] |= 0x10000000; 307 308 set_dst(pc, dst, inst); 309 310 if (dst->type != P_RESULT && src->type == P_IMMD) { 311 set_immd(pc, src, inst); 312 /*XXX: 32-bit, but steals part of "half" reg space - need to 313 * catch and handle this case if/when we do half-regs 314 */ 315 inst[0] |= 0x00008000; 316 } else 317 if (src->type == P_IMMD || src->type == P_CONST) { 318 set_long(pc, inst); 319 set_cseg(pc, src, inst); 320 inst[0] |= (src->hw << 9); 321 inst[1] |= 0x20000000; /* src0 const? */ 322 } else { 323 if (src->type == P_ATTR) { 324 set_long(pc, inst); 325 inst[1] |= 0x00200000; 326 } 327 328 alloc_reg(pc, src); 329 inst[0] |= (src->hw << 9); 330 } 331 332 /* We really should support "half" instructions here at some point, 333 * but I don't feel confident enough about them yet. 334 */ 335 set_long(pc, inst); 336 if (is_long(inst) && !is_immd(inst)) { 337 inst[1] |= 0x04000000; /* 32-bit */ 338 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 339 } 340 341 emit(pc, inst); 342} 343 344static boolean 345check_swap_src_0_1(struct nv50_pc *pc, 346 struct nv50_reg **s0, struct nv50_reg **s1) 347{ 348 struct nv50_reg *src0 = *s0, *src1 = *s1; 349 350 if (src0->type == P_CONST) { 351 if (src1->type != P_CONST) { 352 *s0 = src1; 353 *s1 = src0; 354 return TRUE; 355 } 356 } else 357 if (src1->type == P_ATTR) { 358 if (src0->type != P_ATTR) { 359 *s0 = src1; 360 *s1 = src0; 361 return TRUE; 362 } 363 } 364 365 return FALSE; 366} 367 368static void 369set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 370{ 371 if (src->type == P_ATTR) { 372 set_long(pc, inst); 373 inst[1] |= 0x00200000; 374 } else 375 if (src->type == P_CONST || src->type == P_IMMD) { 376 struct nv50_reg *temp = temp_temp(pc); 377 378 emit_mov(pc, temp, src); 379 src = temp; 380 } 381 382 alloc_reg(pc, src); 383 inst[0] |= (src->hw << 9); 384} 385 386static void 387set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 388{ 389 if (src->type == P_ATTR) { 390 struct nv50_reg *temp = temp_temp(pc); 391 392 emit_mov(pc, temp, src); 393 src = temp; 394 } else 395 if (src->type == P_CONST || src->type == P_IMMD) { 396 set_cseg(pc, src, inst); 397 inst[0] |= 0x00800000; 398 } 399 400 alloc_reg(pc, src); 401 inst[0] |= (src->hw << 16); 402} 403 404static void 405set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 406{ 407 set_long(pc, inst); 408 409 if (src->type == P_ATTR) { 410 struct nv50_reg *temp = temp_temp(pc); 411 412 emit_mov(pc, temp, src); 413 src = temp; 414 } else 415 if (src->type == P_CONST || src->type == P_IMMD) { 416 set_cseg(pc, src, inst); 417 inst[0] |= 0x01000000; 418 } 419 420 alloc_reg(pc, src); 421 inst[1] |= (src->hw << 14); 422} 423 424static void 425emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 426 struct nv50_reg *src1) 427{ 428 unsigned inst[2] = { 0, 0 }; 429 430 inst[0] |= 0xc0000000; 431 432 check_swap_src_0_1(pc, &src0, &src1); 433 set_dst(pc, dst, inst); 434 set_src_0(pc, src0, inst); 435 set_src_1(pc, src1, inst); 436 437 emit(pc, inst); 438} 439 440static void 441emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 442 struct nv50_reg *src0, struct nv50_reg *src1) 443{ 444 unsigned inst[2] = { 0, 0 }; 445 446 inst[0] |= 0xb0000000; 447 448 check_swap_src_0_1(pc, &src0, &src1); 449 set_dst(pc, dst, inst); 450 set_src_0(pc, src0, inst); 451 if (is_long(inst)) 452 set_src_2(pc, src1, inst); 453 else 454 set_src_1(pc, src1, inst); 455 456 emit(pc, inst); 457} 458 459static void 460emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 461 struct nv50_reg *src0, struct nv50_reg *src1) 462{ 463 unsigned inst[2] = { 0, 0 }; 464 465 set_long(pc, inst); 466 inst[0] |= 0xb0000000; 467 inst[1] |= (sub << 29); 468 469 check_swap_src_0_1(pc, &src0, &src1); 470 set_dst(pc, dst, inst); 471 set_src_0(pc, src0, inst); 472 set_src_1(pc, src1, inst); 473 474 emit(pc, inst); 475} 476 477static void 478emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 479 struct nv50_reg *src1) 480{ 481 unsigned inst[2] = { 0, 0 }; 482 483 inst[0] |= 0xb0000000; 484 485 set_long(pc, inst); 486 if (check_swap_src_0_1(pc, &src0, &src1)) 487 inst[1] |= 0x04000000; 488 else 489 inst[1] |= 0x08000000; 490 491 set_dst(pc, dst, inst); 492 set_src_0(pc, src0, inst); 493 set_src_2(pc, src1, inst); 494 495 emit(pc, inst); 496} 497 498static void 499emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 500 struct nv50_reg *src1, struct nv50_reg *src2) 501{ 502 unsigned inst[2] = { 0, 0 }; 503 504 inst[0] |= 0xe0000000; 505 506 check_swap_src_0_1(pc, &src0, &src1); 507 set_dst(pc, dst, inst); 508 set_src_0(pc, src0, inst); 509 set_src_1(pc, src1, inst); 510 set_src_2(pc, src2, inst); 511 512 emit(pc, inst); 513} 514 515static void 516emit_flop(struct nv50_pc *pc, unsigned sub, 517 struct nv50_reg *dst, struct nv50_reg *src) 518{ 519 unsigned inst[2] = { 0, 0 }; 520 521 inst[0] |= 0x90000000; 522 if (sub) { 523 set_long(pc, inst); 524 inst[1] |= (sub << 29); 525 } 526 527 set_dst(pc, dst, inst); 528 set_src_0(pc, src, inst); 529 530 emit(pc, inst); 531} 532 533static boolean 534nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 535{ 536 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 537 struct nv50_reg *dst[4], *src[3][4], *temp; 538 unsigned mask; 539 int i, c; 540 541 NOUVEAU_ERR("insn %p\n", tok); 542 543 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 544 545 for (c = 0; c < 4; c++) { 546 if (mask & (1 << c)) 547 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 548 else 549 dst[c] = NULL; 550 } 551 552 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 553 for (c = 0; c < 4; c++) 554 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 555 } 556 557 switch (inst->Instruction.Opcode) { 558 case TGSI_OPCODE_ADD: 559 for (c = 0; c < 4; c++) { 560 if (!(mask & (1 << c))) 561 continue; 562 emit_add(pc, dst[c], src[0][c], src[1][c]); 563 } 564 break; 565 case TGSI_OPCODE_COS: 566 for (c = 0; c < 4; c++) { 567 if (!(mask & (1 << c))) 568 continue; 569 emit_flop(pc, 5, dst[c], src[0][c]); 570 } 571 break; 572 case TGSI_OPCODE_DP3: 573 temp = alloc_temp(pc, NULL); 574 emit_mul(pc, temp, src[0][0], src[1][0]); 575 emit_mad(pc, temp, src[0][1], src[1][1], temp); 576 emit_mad(pc, temp, src[0][2], src[1][2], temp); 577 for (c = 0; c < 4; c++) { 578 if (!(mask & (1 << c))) 579 continue; 580 emit_mov(pc, dst[c], temp); 581 } 582 free_temp(pc, temp); 583 break; 584 case TGSI_OPCODE_DP4: 585 temp = alloc_temp(pc, NULL); 586 emit_mul(pc, temp, src[0][0], src[1][0]); 587 emit_mad(pc, temp, src[0][1], src[1][1], temp); 588 emit_mad(pc, temp, src[0][2], src[1][2], temp); 589 emit_mad(pc, temp, src[0][3], src[1][3], temp); 590 for (c = 0; c < 4; c++) { 591 if (!(mask & (1 << c))) 592 continue; 593 emit_mov(pc, dst[c], temp); 594 } 595 free_temp(pc, temp); 596 break; 597 case TGSI_OPCODE_EX2: 598 temp = alloc_temp(pc, NULL); 599 for (c = 0; c < 4; c++) { 600 if (!(mask & (1 << c))) 601 continue; 602 { 603 unsigned inst[2] = { 0, 0 }; 604 inst[0] |= 0xb0000000; 605 set_dst(pc, temp, inst); 606 set_src_0(pc, src[0][c], inst); 607 set_long(pc, inst); 608 inst[1] |= (6 << 29) | 0x00004000; 609 emit(pc, inst); 610 } 611 emit_flop(pc, 6, dst[c], temp); 612 } 613 free_temp(pc, temp); 614 break; 615 case TGSI_OPCODE_LG2: 616 for (c = 0; c < 4; c++) { 617 if (!(mask & (1 << c))) 618 continue; 619 emit_flop(pc, 3, dst[c], src[0][c]); 620 } 621 break; 622 case TGSI_OPCODE_MAD: 623 for (c = 0; c < 4; c++) { 624 if (!(mask & (1 << c))) 625 continue; 626 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 627 } 628 break; 629 case TGSI_OPCODE_MAX: 630 for (c = 0; c < 4; c++) { 631 if (!(mask & (1 << c))) 632 continue; 633 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 634 } 635 break; 636 case TGSI_OPCODE_MIN: 637 for (c = 0; c < 4; c++) { 638 if (!(mask & (1 << c))) 639 continue; 640 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 641 } 642 break; 643 case TGSI_OPCODE_MOV: 644 for (c = 0; c < 4; c++) { 645 if (!(mask & (1 << c))) 646 continue; 647 emit_mov(pc, dst[c], src[0][c]); 648 } 649 break; 650 case TGSI_OPCODE_MUL: 651 for (c = 0; c < 4; c++) { 652 if (!(mask & (1 << c))) 653 continue; 654 emit_mul(pc, dst[c], src[0][c], src[1][c]); 655 } 656 break; 657 case TGSI_OPCODE_RCP: 658 for (c = 0; c < 4; c++) { 659 if (!(mask & (1 << c))) 660 continue; 661 emit_flop(pc, 0, dst[c], src[0][c]); 662 } 663 break; 664 case TGSI_OPCODE_RSQ: 665 for (c = 0; c < 4; c++) { 666 if (!(mask & (1 << c))) 667 continue; 668 emit_flop(pc, 2, dst[c], src[0][c]); 669 } 670 break; 671 case TGSI_OPCODE_SIN: 672 for (c = 0; c < 4; c++) { 673 if (!(mask & (1 << c))) 674 continue; 675 emit_flop(pc, 4, dst[c], src[0][c]); 676 } 677 break; 678 case TGSI_OPCODE_SUB: 679 for (c = 0; c < 4; c++) { 680 if (!(mask & (1 << c))) 681 continue; 682 emit_sub(pc, dst[c], src[0][c], src[1][c]); 683 } 684 break; 685 case TGSI_OPCODE_END: 686 break; 687 default: 688 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 689 return FALSE; 690 } 691 692 kill_temp_temp(pc); 693 return TRUE; 694} 695 696static boolean 697nv50_program_tx_prep(struct nv50_pc *pc) 698{ 699 struct tgsi_parse_context p; 700 boolean ret = FALSE; 701 unsigned i, c; 702 703 tgsi_parse_init(&p, pc->p->pipe.tokens); 704 while (!tgsi_parse_end_of_tokens(&p)) { 705 const union tgsi_full_token *tok = &p.FullToken; 706 707 tgsi_parse_token(&p); 708 switch (tok->Token.Type) { 709 case TGSI_TOKEN_TYPE_IMMEDIATE: 710 { 711 const struct tgsi_full_immediate *imm = 712 &p.FullToken.FullImmediate; 713 714 pc->immd_nr++; 715 pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr * 716 sizeof(float)); 717 pc->immd_buf[4 * (pc->immd_nr - 1) + 0] = 718 imm->u.ImmediateFloat32[0].Float; 719 pc->immd_buf[4 * (pc->immd_nr - 1) + 1] = 720 imm->u.ImmediateFloat32[1].Float; 721 pc->immd_buf[4 * (pc->immd_nr - 1) + 2] = 722 imm->u.ImmediateFloat32[2].Float; 723 pc->immd_buf[4 * (pc->immd_nr - 1) + 3] = 724 imm->u.ImmediateFloat32[3].Float; 725 } 726 break; 727 case TGSI_TOKEN_TYPE_DECLARATION: 728 { 729 const struct tgsi_full_declaration *d; 730 unsigned last; 731 732 d = &p.FullToken.FullDeclaration; 733 last = d->u.DeclarationRange.Last; 734 735 switch (d->Declaration.File) { 736 case TGSI_FILE_TEMPORARY: 737 if (pc->temp_nr < (last + 1)) 738 pc->temp_nr = last + 1; 739 break; 740 case TGSI_FILE_OUTPUT: 741 if (pc->result_nr < (last + 1)) 742 pc->result_nr = last + 1; 743 break; 744 case TGSI_FILE_INPUT: 745 if (pc->attr_nr < (last + 1)) 746 pc->attr_nr = last + 1; 747 break; 748 case TGSI_FILE_CONSTANT: 749 if (pc->param_nr < (last + 1)) 750 pc->param_nr = last + 1; 751 break; 752 default: 753 NOUVEAU_ERR("bad decl file %d\n", 754 d->Declaration.File); 755 goto out_err; 756 } 757 } 758 break; 759 case TGSI_TOKEN_TYPE_INSTRUCTION: 760 break; 761 default: 762 break; 763 } 764 } 765 766 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 767 if (pc->temp_nr) { 768 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 769 if (!pc->temp) 770 goto out_err; 771 772 for (i = 0; i < pc->temp_nr; i++) { 773 for (c = 0; c < 4; c++) { 774 pc->temp[i*4+c].type = P_TEMP; 775 pc->temp[i*4+c].hw = -1; 776 pc->temp[i*4+c].index = i; 777 } 778 } 779 } 780 781 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 782 if (pc->attr_nr) { 783 struct nv50_reg *iv = NULL, *tmp = NULL; 784 int aid = 0; 785 786 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 787 if (!pc->attr) 788 goto out_err; 789 790 if (pc->p->type == NV50_PROG_FRAGMENT) { 791 iv = alloc_temp(pc, NULL); 792 aid++; 793 } 794 795 for (i = 0; i < pc->attr_nr; i++) { 796 struct nv50_reg *a = &pc->attr[i*4]; 797 798 for (c = 0; c < 4; c++) { 799 if (pc->p->type == NV50_PROG_FRAGMENT) { 800 struct nv50_reg *at = 801 alloc_temp(pc, NULL); 802 pc->attr[i*4+c].type = at->type; 803 pc->attr[i*4+c].hw = at->hw; 804 pc->attr[i*4+c].index = at->index; 805 } else { 806 pc->p->cfg.vp.attr[aid/32] |= 807 (1 << (aid % 32)); 808 pc->attr[i*4+c].type = P_ATTR; 809 pc->attr[i*4+c].hw = aid++; 810 pc->attr[i*4+c].index = i; 811 } 812 } 813 814 if (pc->p->type != NV50_PROG_FRAGMENT) 815 continue; 816 817 emit_interp(pc, iv, iv, iv, FALSE); 818 tmp = alloc_temp(pc, NULL); 819 { 820 unsigned inst[2] = { 0, 0 }; 821 inst[0] = 0x90000000; 822 inst[0] |= (tmp->hw << 2); 823 emit(pc, inst); 824 } 825 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 826 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 827 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 828 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 829 free_temp(pc, tmp); 830 } 831 832 if (iv) 833 free_temp(pc, iv); 834 } 835 836 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 837 if (pc->result_nr) { 838 int rid = 0; 839 840 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 841 if (!pc->result) 842 goto out_err; 843 844 for (i = 0; i < pc->result_nr; i++) { 845 for (c = 0; c < 4; c++) { 846 if (pc->p->type == NV50_PROG_FRAGMENT) 847 pc->result[i*4+c].type = P_TEMP; 848 else 849 pc->result[i*4+c].type = P_RESULT; 850 pc->result[i*4+c].hw = rid++; 851 pc->result[i*4+c].index = i; 852 } 853 } 854 } 855 856 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 857 if (pc->param_nr) { 858 int rid = 0; 859 860 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 861 if (!pc->param) 862 goto out_err; 863 864 for (i = 0; i < pc->param_nr; i++) { 865 for (c = 0; c < 4; c++) { 866 pc->param[i*4+c].type = P_CONST; 867 pc->param[i*4+c].hw = rid++; 868 pc->param[i*4+c].index = i; 869 } 870 } 871 } 872 873 if (pc->immd_nr) { 874 int rid = 0; 875 876 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 877 if (!pc->immd) 878 goto out_err; 879 880 for (i = 0; i < pc->immd_nr; i++) { 881 for (c = 0; c < 4; c++) { 882 pc->immd[i*4+c].type = P_IMMD; 883 pc->immd[i*4+c].hw = rid++; 884 pc->immd[i*4+c].index = i; 885 } 886 } 887 } 888 889 ret = TRUE; 890out_err: 891 tgsi_parse_free(&p); 892 return ret; 893} 894 895static boolean 896nv50_program_tx(struct nv50_program *p) 897{ 898 struct tgsi_parse_context parse; 899 struct nv50_pc *pc; 900 boolean ret; 901 902 pc = CALLOC_STRUCT(nv50_pc); 903 if (!pc) 904 return FALSE; 905 pc->p = p; 906 pc->p->cfg.high_temp = 4; 907 908 ret = nv50_program_tx_prep(pc); 909 if (ret == FALSE) 910 goto out_cleanup; 911 912 tgsi_parse_init(&parse, pc->p->pipe.tokens); 913 while (!tgsi_parse_end_of_tokens(&parse)) { 914 const union tgsi_full_token *tok = &parse.FullToken; 915 916 tgsi_parse_token(&parse); 917 918 switch (tok->Token.Type) { 919 case TGSI_TOKEN_TYPE_INSTRUCTION: 920 ret = nv50_program_tx_insn(pc, tok); 921 if (ret == FALSE) 922 goto out_err; 923 break; 924 default: 925 break; 926 } 927 } 928 929 p->immd_nr = pc->immd_nr * 4; 930 p->immd = pc->immd_buf; 931 932out_err: 933 tgsi_parse_free(&parse); 934 935out_cleanup: 936 return ret; 937} 938 939static void 940nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 941{ 942 int i; 943 944 if (nv50_program_tx(p) == FALSE) 945 assert(0); 946 /* *not* sufficient, it's fine if last inst is long and 947 * NOT immd - otherwise it's fucked fucked fucked */ 948 p->insns[p->insns_nr - 1] |= 0x00000001; 949 950 if (p->type == NV50_PROG_VERTEX) { 951 for (i = 0; i < p->insns_nr; i++) 952 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 953 } else { 954 for (i = 0; i < p->insns_nr; i++) 955 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 956 } 957 958 p->translated = TRUE; 959} 960 961static void 962nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 963{ 964 int i; 965 966 for (i = 0; i < p->immd_nr; i++) { 967 BEGIN_RING(tesla, 0x0f00, 2); 968 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 969 OUT_RING (fui(p->immd[i])); 970 } 971} 972 973static void 974nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 975{ 976 struct pipe_winsys *ws = nv50->pipe.winsys; 977 void *map; 978 979 if (!p->buffer) 980 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 981 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 982 memcpy(map, p->insns, p->insns_nr * 4); 983 ws->buffer_unmap(ws, p->buffer); 984} 985 986void 987nv50_vertprog_validate(struct nv50_context *nv50) 988{ 989 struct nouveau_grobj *tesla = nv50->screen->tesla; 990 struct nv50_program *p = nv50->vertprog; 991 struct nouveau_stateobj *so; 992 993 if (!p->translated) { 994 nv50_program_validate(nv50, p); 995 if (!p->translated) 996 assert(0); 997 } 998 999 nv50_program_validate_data(nv50, p); 1000 nv50_program_validate_code(nv50, p); 1001 1002 so = so_new(11, 2); 1003 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 1004 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1005 NOUVEAU_BO_HIGH, 0, 0); 1006 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1007 NOUVEAU_BO_LOW, 0, 0); 1008 so_method(so, tesla, 0x1650, 2); 1009 so_data (so, p->cfg.vp.attr[0]); 1010 so_data (so, p->cfg.vp.attr[1]); 1011 so_method(so, tesla, 0x16ac, 2); 1012 so_data (so, 8); 1013 so_data (so, p->cfg.high_temp); 1014 so_method(so, tesla, 0x140c, 1); 1015 so_data (so, 0); /* program start offset */ 1016 so_emit(nv50->screen->nvws, so); 1017 so_ref(NULL, &so); 1018} 1019 1020void 1021nv50_fragprog_validate(struct nv50_context *nv50) 1022{ 1023 struct pipe_winsys *ws = nv50->pipe.winsys; 1024 struct nouveau_grobj *tesla = nv50->screen->tesla; 1025 struct nv50_program *p = nv50->fragprog; 1026 struct nouveau_stateobj *so; 1027 void *map; 1028 1029 if (!p->translated) { 1030 nv50_program_validate(nv50, p); 1031 if (!p->translated) 1032 assert(0); 1033 } 1034 1035 nv50_program_validate_data(nv50, p); 1036 nv50_program_validate_code(nv50, p); 1037 1038 so = so_new(7, 2); 1039 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 1040 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1041 NOUVEAU_BO_HIGH, 0, 0); 1042 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 1043 NOUVEAU_BO_LOW, 0, 0); 1044 so_method(so, tesla, 0x198c, 1); 1045 so_data (so, p->cfg.high_temp); 1046 so_method(so, tesla, 0x1414, 1); 1047 so_data (so, 0); /* program start offset */ 1048 so_emit(nv50->screen->nvws, so); 1049 so_ref(NULL, &so); 1050} 1051 1052void 1053nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 1054{ 1055 struct pipe_winsys *ws = nv50->pipe.winsys; 1056 1057 if (p->insns_nr) { 1058 if (p->insns) 1059 FREE(p->insns); 1060 p->insns_nr = 0; 1061 } 1062 1063 if (p->buffer) 1064 pipe_buffer_reference(ws, &p->buffer, NULL); 1065 1066 p->translated = 0; 1067} 1068 1069