nv50_program.c revision 52a69196c1680ff16d1ad1fc88e5869bc6055d00
1#include "pipe/p_context.h" 2#include "pipe/p_defines.h" 3#include "pipe/p_state.h" 4#include "pipe/p_inlines.h" 5 6#include "pipe/p_shader_tokens.h" 7#include "tgsi/util/tgsi_parse.h" 8#include "tgsi/util/tgsi_util.h" 9 10#include "nv50_context.h" 11#include "nv50_state.h" 12 13#define NV50_SU_MAX_TEMP 64 14 15struct nv50_reg { 16 enum { 17 P_TEMP, 18 P_ATTR, 19 P_RESULT, 20 P_CONST, 21 P_IMMD 22 } type; 23 int index; 24 25 int hw; 26 int neg; 27}; 28 29struct nv50_pc { 30 struct nv50_program *p; 31 32 /* hw resources */ 33 struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; 34 35 /* tgsi resources */ 36 struct nv50_reg *temp; 37 int temp_nr; 38 struct nv50_reg *attr; 39 int attr_nr; 40 struct nv50_reg *result; 41 int result_nr; 42 struct nv50_reg *param; 43 int param_nr; 44 struct nv50_reg *immd; 45 float *immd_buf; 46 int immd_nr; 47 48 struct nv50_reg *temp_temp[8]; 49 unsigned temp_temp_nr; 50}; 51 52static void 53alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) 54{ 55 int i; 56 57 if (reg->type != P_TEMP) 58 return; 59 60 if (reg->hw >= 0) { 61 /*XXX: do this here too to catch FP temp-as-attr usage.. 62 * not clean, but works */ 63 if (pc->p->cfg.high_temp < (reg->hw + 1)) 64 pc->p->cfg.high_temp = reg->hw + 1; 65 return; 66 } 67 68 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 69 if (!(pc->r_temp[i])) { 70 pc->r_temp[i] = reg; 71 reg->hw = i; 72 if (pc->p->cfg.high_temp < (i + 1)) 73 pc->p->cfg.high_temp = i + 1; 74 return; 75 } 76 } 77 78 assert(0); 79} 80 81static struct nv50_reg * 82alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) 83{ 84 struct nv50_reg *r; 85 int i; 86 87 if (dst && dst->type == P_TEMP && dst->hw == -1) 88 return dst; 89 90 for (i = 0; i < NV50_SU_MAX_TEMP; i++) { 91 if (!pc->r_temp[i]) { 92 r = CALLOC_STRUCT(nv50_reg); 93 r->type = P_TEMP; 94 r->index = -1; 95 r->hw = i; 96 pc->r_temp[i] = r; 97 return r; 98 } 99 } 100 101 assert(0); 102 return NULL; 103} 104 105static void 106free_temp(struct nv50_pc *pc, struct nv50_reg *r) 107{ 108 if (r->index == -1) { 109 FREE(pc->r_temp[r->hw]); 110 pc->r_temp[r->hw] = NULL; 111 } 112} 113 114static struct nv50_reg * 115temp_temp(struct nv50_pc *pc) 116{ 117 if (pc->temp_temp_nr >= 8) 118 assert(0); 119 120 pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); 121 return pc->temp_temp[pc->temp_temp_nr++]; 122} 123 124static void 125kill_temp_temp(struct nv50_pc *pc) 126{ 127 int i; 128 129 for (i = 0; i < pc->temp_temp_nr; i++) 130 free_temp(pc, pc->temp_temp[i]); 131 pc->temp_temp_nr = 0; 132} 133 134static struct nv50_reg * 135tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) 136{ 137 switch (dst->DstRegister.File) { 138 case TGSI_FILE_TEMPORARY: 139 return &pc->temp[dst->DstRegister.Index * 4 + c]; 140 case TGSI_FILE_OUTPUT: 141 return &pc->result[dst->DstRegister.Index * 4 + c]; 142 case TGSI_FILE_NULL: 143 return NULL; 144 default: 145 break; 146 } 147 148 return NULL; 149} 150 151static struct nv50_reg * 152tgsi_src(struct nv50_pc *pc, int c, const struct tgsi_full_src_register *src) 153{ 154 /* Handle swizzling */ 155 switch (c) { 156 case 0: c = src->SrcRegister.SwizzleX; break; 157 case 1: c = src->SrcRegister.SwizzleY; break; 158 case 2: c = src->SrcRegister.SwizzleZ; break; 159 case 3: c = src->SrcRegister.SwizzleW; break; 160 default: 161 assert(0); 162 } 163 164 switch (src->SrcRegister.File) { 165 case TGSI_FILE_INPUT: 166 return &pc->attr[src->SrcRegister.Index * 4 + c]; 167 case TGSI_FILE_TEMPORARY: 168 return &pc->temp[src->SrcRegister.Index * 4 + c]; 169 case TGSI_FILE_CONSTANT: 170 return &pc->param[src->SrcRegister.Index * 4 + c]; 171 case TGSI_FILE_IMMEDIATE: 172 return &pc->immd[src->SrcRegister.Index * 4 + c]; 173 default: 174 break; 175 } 176 177 return NULL; 178} 179 180static void 181emit(struct nv50_pc *pc, unsigned *inst) 182{ 183 struct nv50_program *p = pc->p; 184 185 if (inst[0] & 1) { 186 p->insns_nr += 2; 187 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 188 memcpy(p->insns + (p->insns_nr - 2), inst, sizeof(unsigned)*2); 189 } else { 190 p->insns_nr += 1; 191 p->insns = realloc(p->insns, sizeof(unsigned) * p->insns_nr); 192 memcpy(p->insns + (p->insns_nr - 1), inst, sizeof(unsigned)); 193 } 194} 195 196static INLINE void set_long(struct nv50_pc *, unsigned *); 197 198static boolean 199is_long(unsigned *inst) 200{ 201 if (inst[0] & 1) 202 return TRUE; 203 return FALSE; 204} 205 206static boolean 207is_immd(unsigned *inst) 208{ 209 if (is_long(inst) && (inst[1] & 3) == 3) 210 return TRUE; 211 return FALSE; 212} 213 214static INLINE void 215set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, unsigned *inst) 216{ 217 set_long(pc, inst); 218 inst[1] &= ~((0x1f << 7) | (0x3 << 12)); 219 inst[1] |= (pred << 7) | (idx << 12); 220} 221 222static INLINE void 223set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, unsigned *inst) 224{ 225 set_long(pc, inst); 226 inst[1] &= ~((0x3 << 4) | (1 << 6)); 227 inst[1] |= (idx << 4) | (on << 6); 228} 229 230static INLINE void 231set_long(struct nv50_pc *pc, unsigned *inst) 232{ 233 if (is_long(inst)) 234 return; 235 236 inst[0] |= 1; 237 set_pred(pc, 0xf, 0, inst); 238 set_pred_wr(pc, 0, 0, inst); 239} 240 241static INLINE void 242set_dst(struct nv50_pc *pc, struct nv50_reg *dst, unsigned *inst) 243{ 244 if (dst->type == P_RESULT) { 245 set_long(pc, inst); 246 inst[1] |= 0x00000008; 247 } 248 249 alloc_reg(pc, dst); 250 inst[0] |= (dst->hw << 2); 251} 252 253static INLINE void 254set_immd(struct nv50_pc *pc, struct nv50_reg *imm, unsigned *inst) 255{ 256 unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ 257 258 set_long(pc, inst); 259 /*XXX: can't be predicated - bits overlap.. catch cases where both 260 * are required and avoid them. */ 261 set_pred(pc, 0, 0, inst); 262 set_pred_wr(pc, 0, 0, inst); 263 264 inst[1] |= 0x00000002 | 0x00000001; 265 inst[0] |= (val & 0x3f) << 16; 266 inst[1] |= (val >> 6) << 2; 267} 268 269static void 270emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, 271 struct nv50_reg *src, struct nv50_reg *iv, boolean noperspective) 272{ 273 unsigned inst[2] = { 0, 0 }; 274 275 inst[0] |= 0x80000000; 276 set_dst(pc, dst, inst); 277 alloc_reg(pc, iv); 278 inst[0] |= (iv->hw << 9); 279 alloc_reg(pc, src); 280 inst[0] |= (src->hw << 16); 281 if (noperspective) 282 inst[0] |= (1 << 25); 283 284 emit(pc, inst); 285} 286 287static void 288set_cseg(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 289{ 290 set_long(pc, inst); 291 if (src->type == P_IMMD) { 292 inst[1] |= (NV50_CB_PMISC << 22); 293 } else { 294 if (pc->p->type == NV50_PROG_VERTEX) 295 inst[1] |= (NV50_CB_PVP << 22); 296 else 297 inst[1] |= (NV50_CB_PFP << 22); 298 } 299} 300 301static void 302emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) 303{ 304 unsigned inst[2] = { 0, 0 }; 305 306 inst[0] |= 0x10000000; 307 308 set_dst(pc, dst, inst); 309 310 if (dst->type != P_RESULT && src->type == P_IMMD) { 311 set_immd(pc, src, inst); 312 /*XXX: 32-bit, but steals part of "half" reg space - need to 313 * catch and handle this case if/when we do half-regs 314 */ 315 inst[0] |= 0x00008000; 316 } else 317 if (src->type == P_IMMD || src->type == P_CONST) { 318 set_long(pc, inst); 319 set_cseg(pc, src, inst); 320 inst[0] |= (src->hw << 9); 321 inst[1] |= 0x20000000; /* src0 const? */ 322 } else { 323 if (src->type == P_ATTR) { 324 set_long(pc, inst); 325 inst[1] |= 0x00200000; 326 } 327 328 alloc_reg(pc, src); 329 inst[0] |= (src->hw << 9); 330 } 331 332 /* We really should support "half" instructions here at some point, 333 * but I don't feel confident enough about them yet. 334 */ 335 set_long(pc, inst); 336 if (is_long(inst) && !is_immd(inst)) { 337 inst[1] |= 0x04000000; /* 32-bit */ 338 inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ 339 } 340 341 emit(pc, inst); 342} 343 344static boolean 345check_swap_src_0_1(struct nv50_pc *pc, 346 struct nv50_reg **s0, struct nv50_reg **s1) 347{ 348 struct nv50_reg *src0 = *s0, *src1 = *s1; 349 350 if (src0->type == P_CONST) { 351 if (src1->type != P_CONST) { 352 *s0 = src1; 353 *s1 = src0; 354 return TRUE; 355 } 356 } else 357 if (src1->type == P_ATTR) { 358 if (src0->type != P_ATTR) { 359 *s0 = src1; 360 *s1 = src0; 361 return TRUE; 362 } 363 } 364 365 return FALSE; 366} 367 368static void 369set_src_0(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 370{ 371 if (src->type == P_ATTR) { 372 set_long(pc, inst); 373 inst[1] |= 0x00200000; 374 } else 375 if (src->type == P_CONST || src->type == P_IMMD) { 376 struct nv50_reg *temp = temp_temp(pc); 377 378 emit_mov(pc, temp, src); 379 src = temp; 380 } 381 382 alloc_reg(pc, src); 383 inst[0] |= (src->hw << 9); 384} 385 386static void 387set_src_1(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 388{ 389 if (src->type == P_ATTR) { 390 struct nv50_reg *temp = temp_temp(pc); 391 392 emit_mov(pc, temp, src); 393 src = temp; 394 } else 395 if (src->type == P_CONST || src->type == P_IMMD) { 396 set_cseg(pc, src, inst); 397 inst[0] |= 0x00800000; 398 } 399 400 alloc_reg(pc, src); 401 inst[0] |= (src->hw << 16); 402} 403 404static void 405set_src_2(struct nv50_pc *pc, struct nv50_reg *src, unsigned *inst) 406{ 407 set_long(pc, inst); 408 409 if (src->type == P_ATTR) { 410 struct nv50_reg *temp = temp_temp(pc); 411 412 emit_mov(pc, temp, src); 413 src = temp; 414 } else 415 if (src->type == P_CONST || src->type == P_IMMD) { 416 set_cseg(pc, src, inst); 417 inst[0] |= 0x01000000; 418 } 419 420 alloc_reg(pc, src); 421 inst[1] |= (src->hw << 14); 422} 423 424static void 425emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 426 struct nv50_reg *src1) 427{ 428 unsigned inst[2] = { 0, 0 }; 429 430 inst[0] |= 0xc0000000; 431 432 check_swap_src_0_1(pc, &src0, &src1); 433 set_dst(pc, dst, inst); 434 set_src_0(pc, src0, inst); 435 set_src_1(pc, src1, inst); 436 437 emit(pc, inst); 438} 439 440static void 441emit_add(struct nv50_pc *pc, struct nv50_reg *dst, 442 struct nv50_reg *src0, struct nv50_reg *src1) 443{ 444 unsigned inst[2] = { 0, 0 }; 445 446 inst[0] |= 0xb0000000; 447 448 check_swap_src_0_1(pc, &src0, &src1); 449 set_dst(pc, dst, inst); 450 set_src_0(pc, src0, inst); 451 if (is_long(inst)) 452 set_src_2(pc, src1, inst); 453 else 454 set_src_1(pc, src1, inst); 455 456 emit(pc, inst); 457} 458 459static void 460emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, 461 struct nv50_reg *src0, struct nv50_reg *src1) 462{ 463 unsigned inst[2] = { 0, 0 }; 464 465 set_long(pc, inst); 466 inst[0] |= 0xb0000000; 467 inst[1] |= (sub << 29); 468 469 check_swap_src_0_1(pc, &src0, &src1); 470 set_dst(pc, dst, inst); 471 set_src_0(pc, src0, inst); 472 set_src_1(pc, src1, inst); 473 474 emit(pc, inst); 475} 476 477static void 478emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 479 struct nv50_reg *src1) 480{ 481 unsigned inst[2] = { 0, 0 }; 482 483 inst[0] |= 0xb0000000; 484 485 set_long(pc, inst); 486 if (check_swap_src_0_1(pc, &src0, &src1)) 487 inst[1] |= 0x04000000; 488 else 489 inst[1] |= 0x08000000; 490 491 set_dst(pc, dst, inst); 492 set_src_0(pc, src0, inst); 493 set_src_2(pc, src1, inst); 494 495 emit(pc, inst); 496} 497 498static void 499emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, 500 struct nv50_reg *src1, struct nv50_reg *src2) 501{ 502 unsigned inst[2] = { 0, 0 }; 503 504 inst[0] |= 0xe0000000; 505 506 check_swap_src_0_1(pc, &src0, &src1); 507 set_dst(pc, dst, inst); 508 set_src_0(pc, src0, inst); 509 set_src_1(pc, src1, inst); 510 set_src_2(pc, src2, inst); 511 512 emit(pc, inst); 513} 514 515static void 516emit_flop(struct nv50_pc *pc, unsigned sub, 517 struct nv50_reg *dst, struct nv50_reg *src) 518{ 519 unsigned inst[2] = { 0, 0 }; 520 521 inst[0] |= 0x90000000; 522 if (sub) { 523 set_long(pc, inst); 524 inst[1] |= (sub << 29); 525 } 526 527 set_dst(pc, dst, inst); 528 set_src_0(pc, src, inst); 529 530 emit(pc, inst); 531} 532 533static boolean 534nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) 535{ 536 const struct tgsi_full_instruction *inst = &tok->FullInstruction; 537 struct nv50_reg *dst[4], *src[3][4], *temp; 538 unsigned mask; 539 int i, c; 540 541 NOUVEAU_ERR("insn %p\n", tok); 542 543 mask = inst->FullDstRegisters[0].DstRegister.WriteMask; 544 545 for (c = 0; c < 4; c++) { 546 if (mask & (1 << c)) 547 dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); 548 else 549 dst[c] = NULL; 550 } 551 552 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 553 for (c = 0; c < 4; c++) 554 src[i][c] = tgsi_src(pc, c, &inst->FullSrcRegisters[i]); 555 } 556 557 switch (inst->Instruction.Opcode) { 558 case TGSI_OPCODE_ADD: 559 for (c = 0; c < 4; c++) 560 emit_add(pc, dst[c], src[0][c], src[1][c]); 561 break; 562 case TGSI_OPCODE_COS: 563 for (c = 0; c < 4; c++) 564 emit_flop(pc, 5, dst[c], src[0][c]); 565 break; 566 case TGSI_OPCODE_DP3: 567 temp = alloc_temp(pc, NULL); 568 emit_mul(pc, temp, src[0][0], src[1][0]); 569 emit_mad(pc, temp, src[0][1], src[1][1], temp); 570 emit_mad(pc, temp, src[0][2], src[1][2], temp); 571 for (c = 0; c < 4; c++) 572 emit_mov(pc, dst[c], temp); 573 free_temp(pc, temp); 574 break; 575 case TGSI_OPCODE_DP4: 576 temp = alloc_temp(pc, NULL); 577 emit_mul(pc, temp, src[0][0], src[1][0]); 578 emit_mad(pc, temp, src[0][1], src[1][1], temp); 579 emit_mad(pc, temp, src[0][2], src[1][2], temp); 580 emit_mad(pc, temp, src[0][3], src[1][3], temp); 581 for (c = 0; c < 4; c++) 582 emit_mov(pc, dst[c], temp); 583 free_temp(pc, temp); 584 break; 585 case TGSI_OPCODE_EX2: 586 for (c = 0; c < 4; c++) 587 emit_flop(pc, 6, dst[c], src[0][c]); 588 break; 589 case TGSI_OPCODE_LG2: 590 for (c = 0; c < 4; c++) 591 emit_flop(pc, 3, dst[c], src[0][c]); 592 break; 593 case TGSI_OPCODE_MAD: 594 for (c = 0; c < 4; c++) 595 emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); 596 break; 597 case TGSI_OPCODE_MAX: 598 for (c = 0; c < 4; c++) 599 emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); 600 break; 601 case TGSI_OPCODE_MIN: 602 for (c = 0; c < 4; c++) 603 emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); 604 break; 605 case TGSI_OPCODE_MOV: 606 for (c = 0; c < 4; c++) 607 emit_mov(pc, dst[c], src[0][c]); 608 break; 609 case TGSI_OPCODE_MUL: 610 for (c = 0; c < 4; c++) 611 emit_mul(pc, dst[c], src[0][c], src[1][c]); 612 break; 613 case TGSI_OPCODE_RCP: 614 for (c = 0; c < 4; c++) 615 emit_flop(pc, 0, dst[c], src[0][c]); 616 break; 617 case TGSI_OPCODE_RSQ: 618 for (c = 0; c < 4; c++) 619 emit_flop(pc, 2, dst[c], src[0][c]); 620 break; 621 case TGSI_OPCODE_SIN: 622 for (c = 0; c < 4; c++) 623 emit_flop(pc, 4, dst[c], src[0][c]); 624 break; 625 case TGSI_OPCODE_SUB: 626 for (c = 0; c < 4; c++) 627 emit_sub(pc, dst[c], src[0][c], src[1][c]); 628 break; 629 case TGSI_OPCODE_END: 630 break; 631 default: 632 NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); 633 return FALSE; 634 } 635 636 kill_temp_temp(pc); 637 return TRUE; 638} 639 640static boolean 641nv50_program_tx_prep(struct nv50_pc *pc) 642{ 643 struct tgsi_parse_context p; 644 boolean ret = FALSE; 645 unsigned i, c; 646 647 tgsi_parse_init(&p, pc->p->pipe.tokens); 648 while (!tgsi_parse_end_of_tokens(&p)) { 649 const union tgsi_full_token *tok = &p.FullToken; 650 651 tgsi_parse_token(&p); 652 switch (tok->Token.Type) { 653 case TGSI_TOKEN_TYPE_IMMEDIATE: 654 { 655 const struct tgsi_full_immediate *imm = 656 &p.FullToken.FullImmediate; 657 658 pc->immd_nr++; 659 pc->immd_buf = realloc(pc->immd_buf, 4 * pc->immd_nr * 660 sizeof(float)); 661 pc->immd_buf[4 * (pc->immd_nr - 1) + 0] = 662 imm->u.ImmediateFloat32[0].Float; 663 pc->immd_buf[4 * (pc->immd_nr - 1) + 1] = 664 imm->u.ImmediateFloat32[1].Float; 665 pc->immd_buf[4 * (pc->immd_nr - 1) + 2] = 666 imm->u.ImmediateFloat32[2].Float; 667 pc->immd_buf[4 * (pc->immd_nr - 1) + 3] = 668 imm->u.ImmediateFloat32[3].Float; 669 } 670 break; 671 case TGSI_TOKEN_TYPE_DECLARATION: 672 { 673 const struct tgsi_full_declaration *d; 674 unsigned last; 675 676 d = &p.FullToken.FullDeclaration; 677 last = d->u.DeclarationRange.Last; 678 679 switch (d->Declaration.File) { 680 case TGSI_FILE_TEMPORARY: 681 if (pc->temp_nr < (last + 1)) 682 pc->temp_nr = last + 1; 683 break; 684 case TGSI_FILE_OUTPUT: 685 if (pc->result_nr < (last + 1)) 686 pc->result_nr = last + 1; 687 break; 688 case TGSI_FILE_INPUT: 689 if (pc->attr_nr < (last + 1)) 690 pc->attr_nr = last + 1; 691 break; 692 case TGSI_FILE_CONSTANT: 693 if (pc->param_nr < (last + 1)) 694 pc->param_nr = last + 1; 695 break; 696 default: 697 NOUVEAU_ERR("bad decl file %d\n", 698 d->Declaration.File); 699 goto out_err; 700 } 701 } 702 break; 703 case TGSI_TOKEN_TYPE_INSTRUCTION: 704 break; 705 default: 706 break; 707 } 708 } 709 710 NOUVEAU_ERR("%d temps\n", pc->temp_nr); 711 if (pc->temp_nr) { 712 pc->temp = calloc(pc->temp_nr * 4, sizeof(struct nv50_reg)); 713 if (!pc->temp) 714 goto out_err; 715 716 for (i = 0; i < pc->temp_nr; i++) { 717 for (c = 0; c < 4; c++) { 718 pc->temp[i*4+c].type = P_TEMP; 719 pc->temp[i*4+c].hw = -1; 720 pc->temp[i*4+c].index = i; 721 } 722 } 723 } 724 725 NOUVEAU_ERR("%d attrib regs\n", pc->attr_nr); 726 if (pc->attr_nr) { 727 struct nv50_reg *iv = NULL, *tmp = NULL; 728 int aid = 0; 729 730 pc->attr = calloc(pc->attr_nr * 4, sizeof(struct nv50_reg)); 731 if (!pc->attr) 732 goto out_err; 733 734 if (pc->p->type == NV50_PROG_FRAGMENT) { 735 iv = alloc_temp(pc, NULL); 736 aid++; 737 } 738 739 for (i = 0; i < pc->attr_nr; i++) { 740 struct nv50_reg *a = &pc->attr[i*4]; 741 742 for (c = 0; c < 4; c++) { 743 if (pc->p->type == NV50_PROG_FRAGMENT) { 744 struct nv50_reg *at = 745 alloc_temp(pc, NULL); 746 pc->attr[i*4+c].type = at->type; 747 pc->attr[i*4+c].hw = at->hw; 748 pc->attr[i*4+c].index = at->index; 749 } else { 750 pc->p->cfg.vp.attr[aid/32] |= 751 (1 << (aid % 32)); 752 pc->attr[i*4+c].type = P_ATTR; 753 pc->attr[i*4+c].hw = aid++; 754 pc->attr[i*4+c].index = i; 755 } 756 } 757 758 if (pc->p->type != NV50_PROG_FRAGMENT) 759 continue; 760 761 emit_interp(pc, iv, iv, iv, FALSE); 762 tmp = alloc_temp(pc, NULL); 763 { 764 unsigned inst[2] = { 0, 0 }; 765 inst[0] = 0x90000000; 766 inst[0] |= (tmp->hw << 2); 767 emit(pc, inst); 768 } 769 emit_interp(pc, &a[0], &a[0], tmp, TRUE); 770 emit_interp(pc, &a[1], &a[1], tmp, TRUE); 771 emit_interp(pc, &a[2], &a[2], tmp, TRUE); 772 emit_interp(pc, &a[3], &a[3], tmp, TRUE); 773 free_temp(pc, tmp); 774 } 775 776 if (iv) 777 free_temp(pc, iv); 778 } 779 780 NOUVEAU_ERR("%d result regs\n", pc->result_nr); 781 if (pc->result_nr) { 782 int rid = 0; 783 784 pc->result = calloc(pc->result_nr * 4, sizeof(struct nv50_reg)); 785 if (!pc->result) 786 goto out_err; 787 788 for (i = 0; i < pc->result_nr; i++) { 789 for (c = 0; c < 4; c++) { 790 if (pc->p->type == NV50_PROG_FRAGMENT) 791 pc->result[i*4+c].type = P_TEMP; 792 else 793 pc->result[i*4+c].type = P_RESULT; 794 pc->result[i*4+c].hw = rid++; 795 pc->result[i*4+c].index = i; 796 } 797 } 798 } 799 800 NOUVEAU_ERR("%d param regs\n", pc->param_nr); 801 if (pc->param_nr) { 802 int rid = 0; 803 804 pc->param = calloc(pc->param_nr * 4, sizeof(struct nv50_reg)); 805 if (!pc->param) 806 goto out_err; 807 808 for (i = 0; i < pc->param_nr; i++) { 809 for (c = 0; c < 4; c++) { 810 pc->param[i*4+c].type = P_CONST; 811 pc->param[i*4+c].hw = rid++; 812 pc->param[i*4+c].index = i; 813 } 814 } 815 } 816 817 if (pc->immd_nr) { 818 int rid = 0; 819 820 pc->immd = calloc(pc->immd_nr * 4, sizeof(struct nv50_reg)); 821 if (!pc->immd) 822 goto out_err; 823 824 for (i = 0; i < pc->immd_nr; i++) { 825 for (c = 0; c < 4; c++) { 826 pc->immd[i*4+c].type = P_IMMD; 827 pc->immd[i*4+c].hw = rid++; 828 pc->immd[i*4+c].index = i; 829 } 830 } 831 } 832 833 ret = TRUE; 834out_err: 835 tgsi_parse_free(&p); 836 return ret; 837} 838 839static boolean 840nv50_program_tx(struct nv50_program *p) 841{ 842 struct tgsi_parse_context parse; 843 struct nv50_pc *pc; 844 boolean ret; 845 846 pc = CALLOC_STRUCT(nv50_pc); 847 if (!pc) 848 return FALSE; 849 pc->p = p; 850 pc->p->cfg.high_temp = 4; 851 852 ret = nv50_program_tx_prep(pc); 853 if (ret == FALSE) 854 goto out_cleanup; 855 856 tgsi_parse_init(&parse, pc->p->pipe.tokens); 857 while (!tgsi_parse_end_of_tokens(&parse)) { 858 const union tgsi_full_token *tok = &parse.FullToken; 859 860 tgsi_parse_token(&parse); 861 862 switch (tok->Token.Type) { 863 case TGSI_TOKEN_TYPE_INSTRUCTION: 864 ret = nv50_program_tx_insn(pc, tok); 865 if (ret == FALSE) 866 goto out_err; 867 break; 868 default: 869 break; 870 } 871 } 872 873 p->immd_nr = pc->immd_nr * 4; 874 p->immd = pc->immd_buf; 875 876out_err: 877 tgsi_parse_free(&parse); 878 879out_cleanup: 880 return ret; 881} 882 883static void 884nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) 885{ 886 int i; 887 888 if (nv50_program_tx(p) == FALSE) 889 assert(0); 890 /* *not* sufficient, it's fine if last inst is long and 891 * NOT immd - otherwise it's fucked fucked fucked */ 892 p->insns[p->insns_nr - 1] |= 0x00000001; 893 894 if (p->type == NV50_PROG_VERTEX) { 895 for (i = 0; i < p->insns_nr; i++) 896 NOUVEAU_ERR("VP0x%08x\n", p->insns[i]); 897 } else { 898 for (i = 0; i < p->insns_nr; i++) 899 NOUVEAU_ERR("FP0x%08x\n", p->insns[i]); 900 } 901 902 p->translated = TRUE; 903} 904 905static void 906nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) 907{ 908 int i; 909 910 for (i = 0; i < p->immd_nr; i++) { 911 BEGIN_RING(tesla, 0x0f00, 2); 912 OUT_RING ((NV50_CB_PMISC << 16) | (i << 8)); 913 OUT_RING (fui(p->immd[i])); 914 } 915} 916 917static void 918nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) 919{ 920 struct pipe_winsys *ws = nv50->pipe.winsys; 921 void *map; 922 923 if (!p->buffer) 924 p->buffer = ws->buffer_create(ws, 0x100, 0, p->insns_nr * 4); 925 map = ws->buffer_map(ws, p->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); 926 memcpy(map, p->insns, p->insns_nr * 4); 927 ws->buffer_unmap(ws, p->buffer); 928} 929 930void 931nv50_vertprog_validate(struct nv50_context *nv50) 932{ 933 struct nouveau_grobj *tesla = nv50->screen->tesla; 934 struct nv50_program *p = nv50->vertprog; 935 struct nouveau_stateobj *so; 936 937 if (!p->translated) { 938 nv50_program_validate(nv50, p); 939 if (!p->translated) 940 assert(0); 941 } 942 943 nv50_program_validate_data(nv50, p); 944 nv50_program_validate_code(nv50, p); 945 946 so = so_new(11, 2); 947 so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); 948 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 949 NOUVEAU_BO_HIGH, 0, 0); 950 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 951 NOUVEAU_BO_LOW, 0, 0); 952 so_method(so, tesla, 0x1650, 2); 953 so_data (so, p->cfg.vp.attr[0]); 954 so_data (so, p->cfg.vp.attr[1]); 955 so_method(so, tesla, 0x16ac, 2); 956 so_data (so, 8); 957 so_data (so, p->cfg.high_temp); 958 so_method(so, tesla, 0x140c, 1); 959 so_data (so, 0); /* program start offset */ 960 so_emit(nv50->screen->nvws, so); 961 so_ref(NULL, &so); 962} 963 964void 965nv50_fragprog_validate(struct nv50_context *nv50) 966{ 967 struct pipe_winsys *ws = nv50->pipe.winsys; 968 struct nouveau_grobj *tesla = nv50->screen->tesla; 969 struct nv50_program *p = nv50->fragprog; 970 struct nouveau_stateobj *so; 971 void *map; 972 973 if (!p->translated) { 974 nv50_program_validate(nv50, p); 975 if (!p->translated) 976 assert(0); 977 } 978 979 nv50_program_validate_data(nv50, p); 980 nv50_program_validate_code(nv50, p); 981 982 so = so_new(7, 2); 983 so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); 984 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 985 NOUVEAU_BO_HIGH, 0, 0); 986 so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | 987 NOUVEAU_BO_LOW, 0, 0); 988 so_method(so, tesla, 0x198c, 1); 989 so_data (so, p->cfg.high_temp); 990 so_method(so, tesla, 0x1414, 1); 991 so_data (so, 0); /* program start offset */ 992 so_emit(nv50->screen->nvws, so); 993 so_ref(NULL, &so); 994} 995 996void 997nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 998{ 999 struct pipe_winsys *ws = nv50->pipe.winsys; 1000 1001 if (p->insns_nr) { 1002 if (p->insns) 1003 FREE(p->insns); 1004 p->insns_nr = 0; 1005 } 1006 1007 if (p->buffer) 1008 pipe_buffer_reference(ws, &p->buffer, NULL); 1009 1010 p->translated = 0; 1011} 1012 1013