nv50_program.c revision 84d170bbcef8e26017ac8e2f3bacbaeb20f889d3
1/* 2 * Copyright 2010 Chrsitoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23/* #define NV50_PROGRAM_DEBUG */ 24 25#include "nv50_program.h" 26#include "nv50_pc.h" 27#include "nv50_context.h" 28 29#include "pipe/p_shader_tokens.h" 30#include "tgsi/tgsi_parse.h" 31#include "tgsi/tgsi_util.h" 32#include "tgsi/tgsi_dump.h" 33 34static INLINE unsigned 35bitcount4(const uint32_t val) 36{ 37 static const unsigned cnt[16] 38 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; 39 return cnt[val & 0xf]; 40} 41 42static unsigned 43nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) 44{ 45 unsigned mask = inst->Dst[0].Register.WriteMask; 46 47 switch (inst->Instruction.Opcode) { 48 case TGSI_OPCODE_COS: 49 case TGSI_OPCODE_SIN: 50 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); 51 case TGSI_OPCODE_DP3: 52 return 0x7; 53 case TGSI_OPCODE_DP4: 54 case TGSI_OPCODE_DPH: 55 case TGSI_OPCODE_KIL: /* WriteMask ignored */ 56 return 0xf; 57 case TGSI_OPCODE_DST: 58 return mask & (c ? 0xa : 0x6); 59 case TGSI_OPCODE_EX2: 60 case TGSI_OPCODE_EXP: 61 case TGSI_OPCODE_LG2: 62 case TGSI_OPCODE_LOG: 63 case TGSI_OPCODE_POW: 64 case TGSI_OPCODE_RCP: 65 case TGSI_OPCODE_RSQ: 66 case TGSI_OPCODE_SCS: 67 return 0x1; 68 case TGSI_OPCODE_IF: 69 return 0x1; 70 case TGSI_OPCODE_LIT: 71 return 0xb; 72 case TGSI_OPCODE_TEX: 73 case TGSI_OPCODE_TXB: 74 case TGSI_OPCODE_TXL: 75 case TGSI_OPCODE_TXP: 76 { 77 const struct tgsi_instruction_texture *tex; 78 79 assert(inst->Instruction.Texture); 80 tex = &inst->Texture; 81 82 mask = 0x7; 83 if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && 84 inst->Instruction.Opcode != TGSI_OPCODE_TXD) 85 mask |= 0x8; /* bias, lod or proj */ 86 87 switch (tex->Texture) { 88 case TGSI_TEXTURE_1D: 89 mask &= 0x9; 90 break; 91 case TGSI_TEXTURE_SHADOW1D: 92 mask &= 0x5; 93 break; 94 case TGSI_TEXTURE_2D: 95 mask &= 0xb; 96 break; 97 default: 98 break; 99 } 100 } 101 return mask; 102 case TGSI_OPCODE_XPD: 103 { 104 unsigned x = 0; 105 if (mask & 1) x |= 0x6; 106 if (mask & 2) x |= 0x5; 107 if (mask & 4) x |= 0x3; 108 return x; 109 } 110 default: 111 break; 112 } 113 114 return mask; 115} 116 117static void 118nv50_indirect_inputs(struct nv50_translation_info *ti, int id) 119{ 120 int i, c; 121 122 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) 123 for (c = 0; c < 4; ++c) 124 ti->input_access[i][c] = id; 125 126 ti->indirect_inputs = TRUE; 127} 128 129static void 130nv50_indirect_outputs(struct nv50_translation_info *ti, int id) 131{ 132 int i, c; 133 134 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) 135 for (c = 0; c < 4; ++c) 136 ti->output_access[i][c] = id; 137 138 ti->indirect_outputs = TRUE; 139} 140 141static void 142prog_inst(struct nv50_translation_info *ti, 143 const struct tgsi_full_instruction *inst, int id) 144{ 145 const struct tgsi_dst_register *dst; 146 const struct tgsi_src_register *src; 147 int s, c, k; 148 unsigned mask; 149 150 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { 151 ti->subr[ti->subr_nr].pos = id - 1; 152 ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */ 153 ++ti->subr_nr; 154 } 155 156 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { 157 dst = &inst->Dst[0].Register; 158 159 for (c = 0; c < 4; ++c) { 160 if (dst->Indirect) 161 nv50_indirect_outputs(ti, id); 162 if (!(dst->WriteMask & (1 << c))) 163 continue; 164 ti->output_access[dst->Index][c] = id; 165 } 166 167 if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && 168 inst->Src[0].Register.File == TGSI_FILE_INPUT && 169 dst->Index == ti->edgeflag_out) 170 ti->p->vp.edgeflag = inst->Src[0].Register.Index; 171 } else 172 if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { 173 if (inst->Dst[0].Register.Indirect) 174 ti->store_to_memory = TRUE; 175 } 176 177 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { 178 src = &inst->Src[s].Register; 179 if (src->File == TGSI_FILE_TEMPORARY) 180 if (inst->Src[s].Register.Indirect) 181 ti->store_to_memory = TRUE; 182 if (src->File != TGSI_FILE_INPUT) 183 continue; 184 mask = nv50_tgsi_src_mask(inst, s); 185 186 if (inst->Src[s].Register.Indirect) 187 nv50_indirect_inputs(ti, id); 188 189 for (c = 0; c < 4; ++c) { 190 if (!(mask & (1 << c))) 191 continue; 192 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); 193 if (k <= TGSI_SWIZZLE_W) 194 ti->input_access[src->Index][k] = id; 195 } 196 } 197} 198 199/* Probably should introduce something like struct tgsi_function_declaration 200 * instead of trying to guess inputs/outputs. 201 */ 202static void 203prog_subroutine_inst(struct nv50_subroutine *subr, 204 const struct tgsi_full_instruction *inst) 205{ 206 const struct tgsi_dst_register *dst; 207 const struct tgsi_src_register *src; 208 int s, c, k; 209 unsigned mask; 210 211 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { 212 src = &inst->Src[s].Register; 213 if (src->File != TGSI_FILE_TEMPORARY) 214 continue; 215 mask = nv50_tgsi_src_mask(inst, s); 216 217 assert(!inst->Src[s].Register.Indirect); 218 219 for (c = 0; c < 4; ++c) { 220 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); 221 222 if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) 223 if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) 224 subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); 225 } 226 } 227 228 if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { 229 dst = &inst->Dst[0].Register; 230 231 for (c = 0; c < 4; ++c) 232 if (dst->WriteMask & (1 << c)) 233 subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); 234 } 235} 236 237static void 238prog_immediate(struct nv50_translation_info *ti, 239 const struct tgsi_full_immediate *imm) 240{ 241 int c; 242 unsigned n = ti->immd32_nr++; 243 244 assert(ti->immd32_nr <= ti->scan.immediate_count); 245 246 for (c = 0; c < 4; ++c) 247 ti->immd32[n * 4 + c] = imm->u[c].Uint; 248 249 ti->immd32_ty[n] = imm->Immediate.DataType; 250} 251 252static INLINE unsigned 253translate_interpolate(const struct tgsi_full_declaration *decl) 254{ 255 unsigned mode; 256 257 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) 258 mode = NV50_INTERP_FLAT; 259 else 260 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 261 mode = 0; 262 else 263 mode = NV50_INTERP_LINEAR; 264 265 if (decl->Declaration.Centroid) 266 mode |= NV50_INTERP_CENTROID; 267 268 return mode; 269} 270 271static void 272prog_decl(struct nv50_translation_info *ti, 273 const struct tgsi_full_declaration *decl) 274{ 275 unsigned i, first, last, sn = 0, si = 0; 276 277 first = decl->Range.First; 278 last = decl->Range.Last; 279 280 if (decl->Declaration.Semantic) { 281 sn = decl->Semantic.Name; 282 si = decl->Semantic.Index; 283 } 284 285 switch (decl->Declaration.File) { 286 case TGSI_FILE_INPUT: 287 for (i = first; i <= last; ++i) 288 ti->interp_mode[i] = translate_interpolate(decl); 289 290 if (!decl->Declaration.Semantic) 291 break; 292 293 for (i = first; i <= last; ++i) { 294 ti->p->in[i].sn = sn; 295 ti->p->in[i].si = si; 296 } 297 298 switch (sn) { 299 case TGSI_SEMANTIC_FACE: 300 break; 301 case TGSI_SEMANTIC_COLOR: 302 if (ti->p->type == PIPE_SHADER_FRAGMENT) 303 ti->p->vp.bfc[si] = first; 304 break; 305 } 306 break; 307 case TGSI_FILE_OUTPUT: 308 if (!decl->Declaration.Semantic) 309 break; 310 311 for (i = first; i <= last; ++i) { 312 ti->p->out[i].sn = sn; 313 ti->p->out[i].si = si; 314 } 315 316 switch (sn) { 317 case TGSI_SEMANTIC_BCOLOR: 318 ti->p->vp.bfc[si] = first; 319 break; 320 case TGSI_SEMANTIC_PSIZE: 321 ti->p->vp.psiz = first; 322 break; 323 case TGSI_SEMANTIC_EDGEFLAG: 324 ti->edgeflag_out = first; 325 break; 326 default: 327 break; 328 } 329 break; 330 case TGSI_FILE_SYSTEM_VALUE: 331 switch (decl->Semantic.Name) { 332 case TGSI_SEMANTIC_FACE: 333 break; 334 case TGSI_SEMANTIC_INSTANCEID: 335 break; 336 case TGSI_SEMANTIC_PRIMID: 337 break; 338 /* 339 case TGSI_SEMANTIC_PRIMIDIN: 340 break; 341 case TGSI_SEMANTIC_VERTEXID: 342 break; 343 */ 344 default: 345 break; 346 } 347 break; 348 case TGSI_FILE_CONSTANT: 349 ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16); 350 break; 351 case TGSI_FILE_ADDRESS: 352 case TGSI_FILE_SAMPLER: 353 case TGSI_FILE_TEMPORARY: 354 break; 355 default: 356 assert(0); 357 break; 358 } 359} 360 361static int 362nv50_vertprog_prepare(struct nv50_translation_info *ti) 363{ 364 struct nv50_program *p = ti->p; 365 int i, c; 366 unsigned num_inputs = 0; 367 368 ti->input_file = NV_FILE_MEM_S; 369 ti->output_file = NV_FILE_OUT; 370 371 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { 372 p->in[i].id = i; 373 p->in[i].hw = num_inputs; 374 375 for (c = 0; c < 4; ++c) { 376 if (!ti->input_access[i][c]) 377 continue; 378 ti->input_map[i][c] = num_inputs++; 379 p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32); 380 } 381 } 382 383 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { 384 p->out[i].id = i; 385 p->out[i].hw = p->max_out; 386 387 for (c = 0; c < 4; ++c) { 388 if (!ti->output_access[i][c]) 389 continue; 390 ti->output_map[i][c] = p->max_out++; 391 p->out[i].mask |= 1 << c; 392 } 393 } 394 395 if (p->vp.psiz < 0x40) 396 p->vp.psiz = p->out[p->vp.psiz].hw; 397 398 return 0; 399} 400 401static int 402nv50_fragprog_prepare(struct nv50_translation_info *ti) 403{ 404 struct nv50_program *p = ti->p; 405 int i, j, c; 406 unsigned nvary, nintp, depr; 407 unsigned n = 0, m = 0, skip = 0; 408 ubyte sn[16], si[16]; 409 410 /* FP flags */ 411 412 if (ti->scan.writes_z) { 413 p->fp.flags[1] = 0x11; 414 p->fp.flags[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z; 415 } 416 417 if (ti->scan.uses_kill) 418 p->fp.flags[0] |= NV50TCL_FP_CONTROL_USES_KIL; 419 420 /* FP inputs */ 421 422 ti->input_file = NV_FILE_MEM_V; 423 ti->output_file = NV_FILE_GPR; 424 425 /* count non-flat inputs, save semantic info */ 426 for (i = 0; i < p->in_nr; ++i) { 427 m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1; 428 sn[i] = p->in[i].sn; 429 si[i] = p->in[i].si; 430 } 431 432 /* reorder p->in[] so that non-flat inputs are first and 433 * kick out special inputs that don't use VP/GP_RESULT_MAP 434 */ 435 nintp = 0; 436 for (i = 0; i < p->in_nr; ++i) { 437 if (sn[i] == TGSI_SEMANTIC_POSITION) { 438 for (c = 0; c < 4; ++c) { 439 ti->input_map[i][c] = nintp; 440 if (ti->input_access[i][c]) { 441 p->fp.interp |= 1 << (24 + c); 442 ++nintp; 443 } 444 } 445 skip++; 446 continue; 447 } else 448 if (sn[i] == TGSI_SEMANTIC_FACE) { 449 ti->input_map[i][0] = 255; 450 skip++; 451 continue; 452 } 453 454 j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++; 455 456 if (sn[i] == TGSI_SEMANTIC_COLOR) 457 p->vp.bfc[si[i]] = j; 458 459 p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0; 460 p->in[j].id = i; 461 p->in[j].sn = sn[i]; 462 p->in[j].si = si[i]; 463 } 464 assert(n <= m); 465 p->in_nr -= skip; 466 467 if (!(p->fp.interp & (8 << 24))) { 468 p->fp.interp |= (8 << 24); 469 ++nintp; 470 } 471 472 p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */ 473 474 for (i = 0; i < p->in_nr; ++i) { 475 int j = p->in[i].id; 476 p->in[i].hw = nintp; 477 478 for (c = 0; c < 4; ++c) { 479 if (!ti->input_access[j][c]) 480 continue; 481 p->in[i].mask |= 1 << c; 482 ti->input_map[j][c] = nintp++; 483 } 484 /* count color inputs */ 485 if (i == p->vp.bfc[0] || i == p->vp.bfc[1]) 486 p->fp.colors += bitcount4(p->in[i].mask) << 16; 487 } 488 nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */ 489 nvary = nintp; 490 if (n < m) 491 nvary -= p->in[n].hw; 492 493 p->fp.interp |= nvary << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT; 494 p->fp.interp |= nintp << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT; 495 496 /* FP outputs */ 497 498 if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0))) 499 p->fp.flags[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS; 500 501 depr = p->out_nr; 502 for (i = 0; i < p->out_nr; ++i) { 503 p->out[i].id = i; 504 if (p->out[i].sn == TGSI_SEMANTIC_POSITION) { 505 depr = i; 506 continue; 507 } 508 p->out[i].hw = p->max_out; 509 p->out[i].mask = 0xf; 510 511 for (c = 0; c < 4; ++c) 512 ti->output_map[i][c] = p->max_out++; 513 } 514 if (depr < p->out_nr) { 515 p->out[depr].mask = 0x4; 516 p->out[depr].hw = ti->output_map[depr][2] = p->max_out++; 517 } else { 518 /* allowed values are 1, 4, 5, 8, 9, ... */ 519 p->max_out = MAX2(4, p->max_out); 520 } 521 522 return 0; 523} 524 525static int 526nv50_geomprog_prepare(struct nv50_translation_info *ti) 527{ 528 ti->input_file = NV_FILE_MEM_S; 529 ti->output_file = NV_FILE_OUT; 530 531 assert(0); 532 return 1; 533} 534 535static int 536nv50_prog_scan(struct nv50_translation_info *ti) 537{ 538 struct nv50_program *p = ti->p; 539 struct tgsi_parse_context parse; 540 int ret, i; 541 542 p->vp.edgeflag = 0x40; 543 p->vp.psiz = 0x40; 544 p->vp.bfc[0] = 0x40; 545 p->vp.bfc[1] = 0x40; 546 p->gp.primid = 0x80; 547 548 tgsi_scan_shader(p->pipe.tokens, &ti->scan); 549 550#ifdef NV50_PROGRAM_DEBUG 551 tgsi_dump(p->pipe.tokens, 0); 552#endif 553 554 ti->subr = 555 CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); 556 557 ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); 558 ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); 559 560 ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0])); 561 562 tgsi_parse_init(&parse, p->pipe.tokens); 563 while (!tgsi_parse_end_of_tokens(&parse)) { 564 tgsi_parse_token(&parse); 565 566 switch (parse.FullToken.Token.Type) { 567 case TGSI_TOKEN_TYPE_IMMEDIATE: 568 prog_immediate(ti, &parse.FullToken.FullImmediate); 569 break; 570 case TGSI_TOKEN_TYPE_DECLARATION: 571 prog_decl(ti, &parse.FullToken.FullDeclaration); 572 break; 573 case TGSI_TOKEN_TYPE_INSTRUCTION: 574 ti->insns[ti->inst_nr] = parse.FullToken.FullInstruction; 575 prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr); 576 break; 577 } 578 } 579 580 /* Scan to determine which registers are inputs/outputs of a subroutine. */ 581 for (i = 0; i < ti->subr_nr; ++i) { 582 int pc = ti->subr[i].id; 583 while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) 584 prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); 585 } 586 587 p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1; 588 p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1; 589 590 switch (p->type) { 591 case PIPE_SHADER_VERTEX: 592 ret = nv50_vertprog_prepare(ti); 593 break; 594 case PIPE_SHADER_FRAGMENT: 595 ret = nv50_fragprog_prepare(ti); 596 break; 597 case PIPE_SHADER_GEOMETRY: 598 ret = nv50_geomprog_prepare(ti); 599 break; 600 default: 601 assert(!"unsupported program type"); 602 ret = -1; 603 break; 604 } 605 606 assert(!ret); 607 return ret; 608} 609 610boolean 611nv50_program_tx(struct nv50_program *p) 612{ 613 struct nv50_translation_info *ti; 614 int ret; 615 616 ti = CALLOC_STRUCT(nv50_translation_info); 617 ti->p = p; 618 619 ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; 620 621 ret = nv50_prog_scan(ti); 622 if (ret) { 623 NOUVEAU_ERR("unsupported shader program\n"); 624 goto out; 625 } 626 627 ret = nv50_generate_code(ti); 628 if (ret) { 629 NOUVEAU_ERR("error during shader translation\n"); 630 goto out; 631 } 632 633out: 634 if (ti->immd32) 635 FREE(ti->immd32); 636 if (ti->immd32_ty) 637 FREE(ti->immd32_ty); 638 if (ti->insns) 639 FREE(ti->insns); 640 if (ti->subr) 641 FREE(ti->subr); 642 FREE(ti); 643 return ret ? FALSE : TRUE; 644} 645 646void 647nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 648{ 649 nouveau_bo_ref(NULL, &p->bo); 650 651 so_ref(NULL, &p->so); 652 653 if (p->code) 654 FREE(p->code); 655 656 p->translated = FALSE; 657} 658