nv50_program.c revision 3a68fcfb6b406cf864afbf200e436fc384fd0865
1/* 2 * Copyright 2010 Chrsitoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23#include "nv50_program.h" 24#include "nv50_pc.h" 25#include "nv50_context.h" 26 27#include "pipe/p_shader_tokens.h" 28#include "tgsi/tgsi_parse.h" 29#include "tgsi/tgsi_util.h" 30#include "tgsi/tgsi_dump.h" 31 32static INLINE unsigned 33bitcount4(const uint32_t val) 34{ 35 static const unsigned cnt[16] 36 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; 37 return cnt[val & 0xf]; 38} 39 40static unsigned 41nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) 42{ 43 unsigned mask = inst->Dst[0].Register.WriteMask; 44 45 switch (inst->Instruction.Opcode) { 46 case TGSI_OPCODE_COS: 47 case TGSI_OPCODE_SIN: 48 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); 49 case TGSI_OPCODE_DP3: 50 return 0x7; 51 case TGSI_OPCODE_DP4: 52 case TGSI_OPCODE_DPH: 53 case TGSI_OPCODE_KIL: /* WriteMask ignored */ 54 return 0xf; 55 case TGSI_OPCODE_DST: 56 return mask & (c ? 0xa : 0x6); 57 case TGSI_OPCODE_EX2: 58 case TGSI_OPCODE_EXP: 59 case TGSI_OPCODE_LG2: 60 case TGSI_OPCODE_LOG: 61 case TGSI_OPCODE_POW: 62 case TGSI_OPCODE_RCP: 63 case TGSI_OPCODE_RSQ: 64 case TGSI_OPCODE_SCS: 65 return 0x1; 66 case TGSI_OPCODE_IF: 67 return 0x1; 68 case TGSI_OPCODE_LIT: 69 return 0xb; 70 case TGSI_OPCODE_TEX: 71 case TGSI_OPCODE_TXB: 72 case TGSI_OPCODE_TXL: 73 case TGSI_OPCODE_TXP: 74 { 75 const struct tgsi_instruction_texture *tex; 76 77 assert(inst->Instruction.Texture); 78 tex = &inst->Texture; 79 80 mask = 0x7; 81 if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && 82 inst->Instruction.Opcode != TGSI_OPCODE_TXD) 83 mask |= 0x8; /* bias, lod or proj */ 84 85 switch (tex->Texture) { 86 case TGSI_TEXTURE_1D: 87 mask &= 0x9; 88 break; 89 case TGSI_TEXTURE_SHADOW1D: 90 mask &= 0x5; 91 break; 92 case TGSI_TEXTURE_2D: 93 mask &= 0xb; 94 break; 95 default: 96 break; 97 } 98 } 99 return mask; 100 case TGSI_OPCODE_XPD: 101 { 102 unsigned x = 0; 103 if (mask & 1) x |= 0x6; 104 if (mask & 2) x |= 0x5; 105 if (mask & 4) x |= 0x3; 106 return x; 107 } 108 default: 109 break; 110 } 111 112 return mask; 113} 114 115static void 116nv50_indirect_inputs(struct nv50_translation_info *ti, int id) 117{ 118 int i, c; 119 120 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) 121 for (c = 0; c < 4; ++c) 122 ti->input_access[i][c] = id; 123 124 ti->indirect_inputs = TRUE; 125} 126 127static void 128nv50_indirect_outputs(struct nv50_translation_info *ti, int id) 129{ 130 int i, c; 131 132 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) 133 for (c = 0; c < 4; ++c) 134 ti->output_access[i][c] = id; 135 136 ti->indirect_outputs = TRUE; 137} 138 139static void 140prog_inst(struct nv50_translation_info *ti, 141 const struct tgsi_full_instruction *inst, int id) 142{ 143 const struct tgsi_dst_register *dst; 144 const struct tgsi_src_register *src; 145 int s, c, k; 146 unsigned mask; 147 148 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { 149 for (c = 0; c < 4; ++c) { 150 dst = &inst->Dst[0].Register; 151 if (inst->Dst[0].Register.Indirect) 152 nv50_indirect_outputs(ti, id); 153 if (!(dst->WriteMask & (1 << c))) 154 continue; 155 ti->output_access[dst->Index][c] = id; 156 } 157 158 if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && 159 inst->Src[0].Register.File == TGSI_FILE_INPUT && 160 dst->Index == ti->edgeflag_out) 161 ti->p->vp.edgeflag = inst->Src[0].Register.Index; 162 } 163 164 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { 165 src = &inst->Src[s].Register; 166 if (src->File != TGSI_FILE_INPUT) 167 continue; 168 mask = nv50_tgsi_src_mask(inst, s); 169 170 if (inst->Src[s].Register.Indirect) 171 nv50_indirect_inputs(ti, id); 172 173 for (c = 0; c < 4; ++c) { 174 if (!(mask & (1 << c))) 175 continue; 176 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); 177 if (k <= TGSI_SWIZZLE_W) 178 ti->input_access[src->Index][k] = id; 179 } 180 } 181} 182 183static void 184prog_immediate(struct nv50_translation_info *ti, 185 const struct tgsi_full_immediate *imm) 186{ 187 int c; 188 unsigned n = ++ti->immd32_nr; 189 190 tgsi_dump_immediate(imm); 191 192 if (n == (1 << (ffs(n) - 1))) 193 ti->immd32 = REALLOC(ti->immd32, (n / 2) * 16, (n * 2) * 16); 194 195 for (c = 0; c < 4; ++c) 196 ti->immd32[(n - 1) * 4 + c] = imm->u[c].Uint; 197} 198 199static INLINE unsigned 200translate_interpolate(const struct tgsi_full_declaration *decl) 201{ 202 unsigned mode; 203 204 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) 205 mode = NV50_INTERP_FLAT; 206 else 207 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 208 mode = 0; 209 else 210 mode = NV50_INTERP_LINEAR; 211 212 if (decl->Declaration.Centroid) 213 mode |= NV50_INTERP_CENTROID; 214 215 return mode; 216} 217 218static void 219prog_decl(struct nv50_translation_info *ti, 220 const struct tgsi_full_declaration *decl) 221{ 222 unsigned i, first, last, sn = 0, si = 0; 223 224 first = decl->Range.First; 225 last = decl->Range.Last; 226 227 if (decl->Declaration.Semantic) { 228 sn = decl->Semantic.Name; 229 si = decl->Semantic.Index; 230 } 231 tgsi_dump_declaration(decl); 232 233 switch (decl->Declaration.File) { 234 case TGSI_FILE_INPUT: 235 for (i = first; i <= last; ++i) 236 ti->interp_mode[i] = translate_interpolate(decl); 237 238 if (!decl->Declaration.Semantic) 239 break; 240 241 for (i = first; i <= last; ++i) { 242 ti->p->in[i].sn = sn; 243 ti->p->in[i].si = si; 244 } 245 246 switch (sn) { 247 case TGSI_SEMANTIC_FACE: 248 break; 249 case TGSI_SEMANTIC_COLOR: 250 if (ti->p->type == PIPE_SHADER_FRAGMENT) 251 ti->p->vp.bfc[si] = first; 252 break; 253 } 254 break; 255 case TGSI_FILE_OUTPUT: 256 if (!decl->Declaration.Semantic) 257 break; 258 259 for (i = first; i <= last; ++i) { 260 ti->p->out[i].sn = sn; 261 ti->p->out[i].si = si; 262 } 263 264 switch (sn) { 265 case TGSI_SEMANTIC_BCOLOR: 266 ti->p->vp.bfc[si] = first; 267 break; 268 case TGSI_SEMANTIC_PSIZE: 269 ti->p->vp.psiz = first; 270 break; 271 case TGSI_SEMANTIC_EDGEFLAG: 272 ti->edgeflag_out = first; 273 break; 274 default: 275 break; 276 } 277 break; 278 case TGSI_FILE_SYSTEM_VALUE: 279 switch (decl->Semantic.Name) { 280 case TGSI_SEMANTIC_FACE: 281 break; 282 case TGSI_SEMANTIC_INSTANCEID: 283 break; 284 case TGSI_SEMANTIC_PRIMID: 285 break; 286 /* 287 case TGSI_SEMANTIC_PRIMIDIN: 288 break; 289 case TGSI_SEMANTIC_VERTEXID: 290 break; 291 */ 292 default: 293 break; 294 } 295 break; 296 case TGSI_FILE_CONSTANT: 297 ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16); 298 break; 299 case TGSI_FILE_ADDRESS: 300 case TGSI_FILE_SAMPLER: 301 case TGSI_FILE_TEMPORARY: 302 break; 303 default: 304 assert(0); 305 break; 306 } 307} 308 309static int 310nv50_vertprog_prepare(struct nv50_translation_info *ti) 311{ 312 struct nv50_program *p = ti->p; 313 int i, c; 314 unsigned num_inputs = 0; 315 316 ti->input_file = NV_FILE_MEM_S; 317 ti->output_file = NV_FILE_OUT; 318 319 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { 320 p->in[i].id = i; 321 p->in[i].hw = num_inputs; 322 323 for (c = 0; c < 4; ++c) { 324 if (!ti->input_access[i][c]) 325 continue; 326 ti->input_map[i][c] = num_inputs++; 327 p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32); 328 } 329 } 330 331 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { 332 p->out[i].id = i; 333 p->out[i].hw = p->max_out; 334 335 for (c = 0; c < 4; ++c) { 336 if (!ti->output_access[i][c]) 337 continue; 338 ti->output_map[i][c] = p->max_out++; 339 p->out[i].mask |= 1 << c; 340 } 341 } 342 343 if (p->vp.psiz < 0x40) 344 p->vp.psiz = p->out[p->vp.psiz].hw; 345 346 return 0; 347} 348 349static int 350nv50_fragprog_prepare(struct nv50_translation_info *ti) 351{ 352 struct nv50_program *p = ti->p; 353 int i, j, c; 354 unsigned nvary, nintp, depr; 355 unsigned n = 0, m = 0, skip = 0; 356 ubyte sn[16], si[16]; 357 358 /* FP flags */ 359 360 if (ti->scan.writes_z) { 361 p->fp.flags[1] = 0x11; 362 p->fp.flags[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z; 363 } 364 365 if (ti->scan.uses_kill) 366 p->fp.flags[0] |= NV50TCL_FP_CONTROL_USES_KIL; 367 368 /* FP inputs */ 369 370 ti->input_file = NV_FILE_MEM_V; 371 ti->output_file = NV_FILE_GPR; 372 373 /* count non-flat inputs, save semantic info */ 374 for (i = 0; i < p->in_nr; ++i) { 375 m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1; 376 sn[i] = p->in[i].sn; 377 si[i] = p->in[i].si; 378 } 379 380 /* reorder p->in[] so that non-flat inputs are first and 381 * kick out special inputs that don't use VP/GP_RESULT_MAP 382 */ 383 nintp = 0; 384 for (i = 0; i < p->in_nr; ++i) { 385 if (sn[i] == TGSI_SEMANTIC_POSITION) { 386 for (c = 0; c < 4; ++c) { 387 ti->input_map[i][c] = nintp; 388 if (ti->input_access[i][c]) { 389 p->fp.interp |= 1 << (24 + c); 390 ++nintp; 391 } 392 } 393 skip++; 394 continue; 395 } else 396 if (sn[i] == TGSI_SEMANTIC_FACE) { 397 ti->input_map[i][0] = 255; 398 skip++; 399 continue; 400 } 401 402 j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++; 403 404 if (sn[i] == TGSI_SEMANTIC_COLOR) 405 p->vp.bfc[si[i]] = j; 406 407 p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0; 408 p->in[j].id = i; 409 p->in[j].sn = sn[i]; 410 p->in[j].si = si[i]; 411 } 412 assert(n <= m); 413 p->in_nr -= skip; 414 415 if (!(p->fp.interp & (8 << 24))) { 416 p->fp.interp |= (8 << 24); 417 ++nintp; 418 } 419 420 p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */ 421 422 for (i = 0; i < p->in_nr; ++i) { 423 int j = p->in[i].id; 424 p->in[i].hw = nintp; 425 426 for (c = 0; c < 4; ++c) { 427 if (!ti->input_access[j][c]) 428 continue; 429 p->in[i].mask |= 1 << c; 430 ti->input_map[j][c] = nintp++; 431 } 432 /* count color inputs */ 433 if (i == p->vp.bfc[0] || i == p->vp.bfc[1]) 434 p->fp.colors += bitcount4(p->in[i].mask) << 16; 435 } 436 nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */ 437 nvary = nintp; 438 if (n < m) 439 nvary -= p->in[n].hw; 440 441 p->fp.interp |= nvary << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT; 442 p->fp.interp |= nintp << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT; 443 444 /* FP outputs */ 445 446 if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0))) 447 p->fp.flags[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS; 448 449 depr = p->out_nr; 450 for (i = 0; i < p->out_nr; ++i) { 451 p->out[i].id = i; 452 if (p->out[i].sn == TGSI_SEMANTIC_POSITION) { 453 depr = i; 454 continue; 455 } 456 p->out[i].hw = p->max_out; 457 p->out[i].mask = 0xf; 458 459 for (c = 0; c < 4; ++c) 460 ti->output_map[i][c] = p->max_out++; 461 } 462 if (depr < p->out_nr) { 463 p->out[depr].mask = 0x4; 464 p->out[depr].hw = p->max_out++; 465 } 466 467 return 0; 468} 469 470static int 471nv50_geomprog_prepare(struct nv50_translation_info *ti) 472{ 473 ti->input_file = NV_FILE_MEM_S; 474 ti->output_file = NV_FILE_OUT; 475 476 assert(0); 477 return 1; 478} 479 480static int 481nv50_prog_scan(struct nv50_translation_info *ti) 482{ 483 struct nv50_program *p = ti->p; 484 struct tgsi_parse_context parse; 485 int ret; 486 487 p->vp.psiz = 0x40; 488 p->vp.bfc[0] = 0x40; 489 p->vp.bfc[1] = 0x40; 490 p->gp.primid = 0x80; 491 492 tgsi_scan_shader(p->pipe.tokens, &ti->scan); 493 494 tgsi_parse_init(&parse, p->pipe.tokens); 495 while (!tgsi_parse_end_of_tokens(&parse)) { 496 tgsi_parse_token(&parse); 497 498 switch (parse.FullToken.Token.Type) { 499 case TGSI_TOKEN_TYPE_IMMEDIATE: 500 prog_immediate(ti, &parse.FullToken.FullImmediate); 501 break; 502 case TGSI_TOKEN_TYPE_DECLARATION: 503 prog_decl(ti, &parse.FullToken.FullDeclaration); 504 break; 505 case TGSI_TOKEN_TYPE_INSTRUCTION: 506 prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr); 507 break; 508 } 509 } 510 511 p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1; 512 p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1; 513 514 switch (p->type) { 515 case PIPE_SHADER_VERTEX: 516 ret = nv50_vertprog_prepare(ti); 517 break; 518 case PIPE_SHADER_FRAGMENT: 519 ret = nv50_fragprog_prepare(ti); 520 break; 521 case PIPE_SHADER_GEOMETRY: 522 ret = nv50_geomprog_prepare(ti); 523 break; 524 default: 525 assert(!"unsupported program type"); 526 ret = -1; 527 break; 528 } 529 530 assert(!ret); 531 return ret; 532} 533 534boolean 535nv50_program_tx(struct nv50_program *p) 536{ 537 struct nv50_translation_info *ti; 538 int ret; 539 540 ti = CALLOC_STRUCT(nv50_translation_info); 541 ti->p = p; 542 543 ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; 544 545 ret = nv50_prog_scan(ti); 546 if (ret) { 547 NOUVEAU_ERR("unsupported shader program\n"); 548 goto out; 549 } 550 551 ret = nv50_generate_code(ti); 552 if (ret) { 553 NOUVEAU_ERR("error during shader translation\n"); 554 goto out; 555 } 556 557out: 558 if (ti->immd32) 559 FREE(ti->immd32); 560 FREE(ti); 561 return ret ? FALSE : TRUE; 562} 563 564void 565nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 566{ 567 nouveau_bo_ref(NULL, &p->bo); 568 569 so_ref(NULL, &p->so); 570 571 if (p->code) 572 FREE(p->code); 573 574 p->translated = FALSE; 575} 576