nv50_program.c revision e08f70a41d1012a0270468866614485a3415168e
1/* 2 * Copyright 2010 Chrsitoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23/* #define NV50_PROGRAM_DEBUG */ 24 25#include "nv50_program.h" 26#include "nv50_pc.h" 27#include "nv50_context.h" 28 29#include "pipe/p_shader_tokens.h" 30#include "tgsi/tgsi_parse.h" 31#include "tgsi/tgsi_util.h" 32#include "tgsi/tgsi_dump.h" 33 34static INLINE unsigned 35bitcount4(const uint32_t val) 36{ 37 static const unsigned cnt[16] 38 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; 39 return cnt[val & 0xf]; 40} 41 42static unsigned 43nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) 44{ 45 unsigned mask = inst->Dst[0].Register.WriteMask; 46 47 switch (inst->Instruction.Opcode) { 48 case TGSI_OPCODE_COS: 49 case TGSI_OPCODE_SIN: 50 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); 51 case TGSI_OPCODE_DP3: 52 return 0x7; 53 case TGSI_OPCODE_DP4: 54 case TGSI_OPCODE_DPH: 55 case TGSI_OPCODE_KIL: /* WriteMask ignored */ 56 return 0xf; 57 case TGSI_OPCODE_DST: 58 return mask & (c ? 0xa : 0x6); 59 case TGSI_OPCODE_EX2: 60 case TGSI_OPCODE_EXP: 61 case TGSI_OPCODE_LG2: 62 case TGSI_OPCODE_LOG: 63 case TGSI_OPCODE_POW: 64 case TGSI_OPCODE_RCP: 65 case TGSI_OPCODE_RSQ: 66 case TGSI_OPCODE_SCS: 67 return 0x1; 68 case TGSI_OPCODE_IF: 69 return 0x1; 70 case TGSI_OPCODE_LIT: 71 return 0xb; 72 case TGSI_OPCODE_TEX: 73 case TGSI_OPCODE_TXB: 74 case TGSI_OPCODE_TXL: 75 case TGSI_OPCODE_TXP: 76 { 77 const struct tgsi_instruction_texture *tex; 78 79 assert(inst->Instruction.Texture); 80 tex = &inst->Texture; 81 82 mask = 0x7; 83 if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && 84 inst->Instruction.Opcode != TGSI_OPCODE_TXD) 85 mask |= 0x8; /* bias, lod or proj */ 86 87 switch (tex->Texture) { 88 case TGSI_TEXTURE_1D: 89 mask &= 0x9; 90 break; 91 case TGSI_TEXTURE_SHADOW1D: 92 mask &= 0x5; 93 break; 94 case TGSI_TEXTURE_2D: 95 mask &= 0xb; 96 break; 97 default: 98 break; 99 } 100 } 101 return mask; 102 case TGSI_OPCODE_XPD: 103 { 104 unsigned x = 0; 105 if (mask & 1) x |= 0x6; 106 if (mask & 2) x |= 0x5; 107 if (mask & 4) x |= 0x3; 108 return x; 109 } 110 default: 111 break; 112 } 113 114 return mask; 115} 116 117static void 118nv50_indirect_inputs(struct nv50_translation_info *ti, int id) 119{ 120 int i, c; 121 122 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) 123 for (c = 0; c < 4; ++c) 124 ti->input_access[i][c] = id; 125 126 ti->indirect_inputs = TRUE; 127} 128 129static void 130nv50_indirect_outputs(struct nv50_translation_info *ti, int id) 131{ 132 int i, c; 133 134 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) 135 for (c = 0; c < 4; ++c) 136 ti->output_access[i][c] = id; 137 138 ti->indirect_outputs = TRUE; 139} 140 141static void 142prog_inst(struct nv50_translation_info *ti, 143 const struct tgsi_full_instruction *inst, int id) 144{ 145 const struct tgsi_dst_register *dst; 146 const struct tgsi_src_register *src; 147 int s, c, k; 148 unsigned mask; 149 150 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { 151 for (c = 0; c < 4; ++c) { 152 dst = &inst->Dst[0].Register; 153 if (inst->Dst[0].Register.Indirect) 154 nv50_indirect_outputs(ti, id); 155 if (!(dst->WriteMask & (1 << c))) 156 continue; 157 ti->output_access[dst->Index][c] = id; 158 } 159 160 if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && 161 inst->Src[0].Register.File == TGSI_FILE_INPUT && 162 dst->Index == ti->edgeflag_out) 163 ti->p->vp.edgeflag = inst->Src[0].Register.Index; 164 } 165 166 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { 167 src = &inst->Src[s].Register; 168 if (src->File != TGSI_FILE_INPUT) 169 continue; 170 mask = nv50_tgsi_src_mask(inst, s); 171 172 if (inst->Src[s].Register.Indirect) 173 nv50_indirect_inputs(ti, id); 174 175 for (c = 0; c < 4; ++c) { 176 if (!(mask & (1 << c))) 177 continue; 178 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); 179 if (k <= TGSI_SWIZZLE_W) 180 ti->input_access[src->Index][k] = id; 181 } 182 } 183} 184 185static void 186prog_immediate(struct nv50_translation_info *ti, 187 const struct tgsi_full_immediate *imm) 188{ 189 int c; 190 unsigned n = ti->immd32_nr++; 191 192 assert(ti->immd32_nr <= ti->scan.immediate_count); 193 194 for (c = 0; c < 4; ++c) 195 ti->immd32[n * 4 + c] = imm->u[c].Uint; 196 197 ti->immd32_ty[n] = imm->Immediate.DataType; 198} 199 200static INLINE unsigned 201translate_interpolate(const struct tgsi_full_declaration *decl) 202{ 203 unsigned mode; 204 205 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) 206 mode = NV50_INTERP_FLAT; 207 else 208 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 209 mode = 0; 210 else 211 mode = NV50_INTERP_LINEAR; 212 213 if (decl->Declaration.Centroid) 214 mode |= NV50_INTERP_CENTROID; 215 216 return mode; 217} 218 219static void 220prog_decl(struct nv50_translation_info *ti, 221 const struct tgsi_full_declaration *decl) 222{ 223 unsigned i, first, last, sn = 0, si = 0; 224 225 first = decl->Range.First; 226 last = decl->Range.Last; 227 228 if (decl->Declaration.Semantic) { 229 sn = decl->Semantic.Name; 230 si = decl->Semantic.Index; 231 } 232 233 switch (decl->Declaration.File) { 234 case TGSI_FILE_INPUT: 235 for (i = first; i <= last; ++i) 236 ti->interp_mode[i] = translate_interpolate(decl); 237 238 if (!decl->Declaration.Semantic) 239 break; 240 241 for (i = first; i <= last; ++i) { 242 ti->p->in[i].sn = sn; 243 ti->p->in[i].si = si; 244 } 245 246 switch (sn) { 247 case TGSI_SEMANTIC_FACE: 248 break; 249 case TGSI_SEMANTIC_COLOR: 250 if (ti->p->type == PIPE_SHADER_FRAGMENT) 251 ti->p->vp.bfc[si] = first; 252 break; 253 } 254 break; 255 case TGSI_FILE_OUTPUT: 256 if (!decl->Declaration.Semantic) 257 break; 258 259 for (i = first; i <= last; ++i) { 260 ti->p->out[i].sn = sn; 261 ti->p->out[i].si = si; 262 } 263 264 switch (sn) { 265 case TGSI_SEMANTIC_BCOLOR: 266 ti->p->vp.bfc[si] = first; 267 break; 268 case TGSI_SEMANTIC_PSIZE: 269 ti->p->vp.psiz = first; 270 break; 271 case TGSI_SEMANTIC_EDGEFLAG: 272 ti->edgeflag_out = first; 273 break; 274 default: 275 break; 276 } 277 break; 278 case TGSI_FILE_SYSTEM_VALUE: 279 switch (decl->Semantic.Name) { 280 case TGSI_SEMANTIC_FACE: 281 break; 282 case TGSI_SEMANTIC_INSTANCEID: 283 break; 284 case TGSI_SEMANTIC_PRIMID: 285 break; 286 /* 287 case TGSI_SEMANTIC_PRIMIDIN: 288 break; 289 case TGSI_SEMANTIC_VERTEXID: 290 break; 291 */ 292 default: 293 break; 294 } 295 break; 296 case TGSI_FILE_CONSTANT: 297 ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16); 298 break; 299 case TGSI_FILE_ADDRESS: 300 case TGSI_FILE_SAMPLER: 301 case TGSI_FILE_TEMPORARY: 302 break; 303 default: 304 assert(0); 305 break; 306 } 307} 308 309static int 310nv50_vertprog_prepare(struct nv50_translation_info *ti) 311{ 312 struct nv50_program *p = ti->p; 313 int i, c; 314 unsigned num_inputs = 0; 315 316 ti->input_file = NV_FILE_MEM_S; 317 ti->output_file = NV_FILE_OUT; 318 319 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { 320 p->in[i].id = i; 321 p->in[i].hw = num_inputs; 322 323 for (c = 0; c < 4; ++c) { 324 if (!ti->input_access[i][c]) 325 continue; 326 ti->input_map[i][c] = num_inputs++; 327 p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32); 328 } 329 } 330 331 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { 332 p->out[i].id = i; 333 p->out[i].hw = p->max_out; 334 335 for (c = 0; c < 4; ++c) { 336 if (!ti->output_access[i][c]) 337 continue; 338 ti->output_map[i][c] = p->max_out++; 339 p->out[i].mask |= 1 << c; 340 } 341 } 342 343 if (p->vp.psiz < 0x40) 344 p->vp.psiz = p->out[p->vp.psiz].hw; 345 346 return 0; 347} 348 349static int 350nv50_fragprog_prepare(struct nv50_translation_info *ti) 351{ 352 struct nv50_program *p = ti->p; 353 int i, j, c; 354 unsigned nvary, nintp, depr; 355 unsigned n = 0, m = 0, skip = 0; 356 ubyte sn[16], si[16]; 357 358 /* FP flags */ 359 360 if (ti->scan.writes_z) { 361 p->fp.flags[1] = 0x11; 362 p->fp.flags[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z; 363 } 364 365 if (ti->scan.uses_kill) 366 p->fp.flags[0] |= NV50TCL_FP_CONTROL_USES_KIL; 367 368 /* FP inputs */ 369 370 ti->input_file = NV_FILE_MEM_V; 371 ti->output_file = NV_FILE_GPR; 372 373 /* count non-flat inputs, save semantic info */ 374 for (i = 0; i < p->in_nr; ++i) { 375 m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1; 376 sn[i] = p->in[i].sn; 377 si[i] = p->in[i].si; 378 } 379 380 /* reorder p->in[] so that non-flat inputs are first and 381 * kick out special inputs that don't use VP/GP_RESULT_MAP 382 */ 383 nintp = 0; 384 for (i = 0; i < p->in_nr; ++i) { 385 if (sn[i] == TGSI_SEMANTIC_POSITION) { 386 for (c = 0; c < 4; ++c) { 387 ti->input_map[i][c] = nintp; 388 if (ti->input_access[i][c]) { 389 p->fp.interp |= 1 << (24 + c); 390 ++nintp; 391 } 392 } 393 skip++; 394 continue; 395 } else 396 if (sn[i] == TGSI_SEMANTIC_FACE) { 397 ti->input_map[i][0] = 255; 398 skip++; 399 continue; 400 } 401 402 j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++; 403 404 if (sn[i] == TGSI_SEMANTIC_COLOR) 405 p->vp.bfc[si[i]] = j; 406 407 p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0; 408 p->in[j].id = i; 409 p->in[j].sn = sn[i]; 410 p->in[j].si = si[i]; 411 } 412 assert(n <= m); 413 p->in_nr -= skip; 414 415 if (!(p->fp.interp & (8 << 24))) { 416 p->fp.interp |= (8 << 24); 417 ++nintp; 418 } 419 420 p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */ 421 422 for (i = 0; i < p->in_nr; ++i) { 423 int j = p->in[i].id; 424 p->in[i].hw = nintp; 425 426 for (c = 0; c < 4; ++c) { 427 if (!ti->input_access[j][c]) 428 continue; 429 p->in[i].mask |= 1 << c; 430 ti->input_map[j][c] = nintp++; 431 } 432 /* count color inputs */ 433 if (i == p->vp.bfc[0] || i == p->vp.bfc[1]) 434 p->fp.colors += bitcount4(p->in[i].mask) << 16; 435 } 436 nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */ 437 nvary = nintp; 438 if (n < m) 439 nvary -= p->in[n].hw; 440 441 p->fp.interp |= nvary << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT; 442 p->fp.interp |= nintp << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT; 443 444 /* FP outputs */ 445 446 if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0))) 447 p->fp.flags[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS; 448 449 depr = p->out_nr; 450 for (i = 0; i < p->out_nr; ++i) { 451 p->out[i].id = i; 452 if (p->out[i].sn == TGSI_SEMANTIC_POSITION) { 453 depr = i; 454 continue; 455 } 456 p->out[i].hw = p->max_out; 457 p->out[i].mask = 0xf; 458 459 for (c = 0; c < 4; ++c) 460 ti->output_map[i][c] = p->max_out++; 461 } 462 if (depr < p->out_nr) { 463 p->out[depr].mask = 0x4; 464 p->out[depr].hw = ti->output_map[depr][2] = p->max_out++; 465 } 466 467 return 0; 468} 469 470static int 471nv50_geomprog_prepare(struct nv50_translation_info *ti) 472{ 473 ti->input_file = NV_FILE_MEM_S; 474 ti->output_file = NV_FILE_OUT; 475 476 assert(0); 477 return 1; 478} 479 480static int 481nv50_prog_scan(struct nv50_translation_info *ti) 482{ 483 struct nv50_program *p = ti->p; 484 struct tgsi_parse_context parse; 485 int ret; 486 487 p->vp.edgeflag = 0x40; 488 p->vp.psiz = 0x40; 489 p->vp.bfc[0] = 0x40; 490 p->vp.bfc[1] = 0x40; 491 p->gp.primid = 0x80; 492 493 tgsi_scan_shader(p->pipe.tokens, &ti->scan); 494 495#ifdef NV50_PROGRAM_DEBUG 496 tgsi_dump(p->pipe.tokens, 0); 497#endif 498 499 ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); 500 ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); 501 502 tgsi_parse_init(&parse, p->pipe.tokens); 503 while (!tgsi_parse_end_of_tokens(&parse)) { 504 tgsi_parse_token(&parse); 505 506 switch (parse.FullToken.Token.Type) { 507 case TGSI_TOKEN_TYPE_IMMEDIATE: 508 prog_immediate(ti, &parse.FullToken.FullImmediate); 509 break; 510 case TGSI_TOKEN_TYPE_DECLARATION: 511 prog_decl(ti, &parse.FullToken.FullDeclaration); 512 break; 513 case TGSI_TOKEN_TYPE_INSTRUCTION: 514 prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr); 515 break; 516 } 517 } 518 519 p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1; 520 p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1; 521 522 switch (p->type) { 523 case PIPE_SHADER_VERTEX: 524 ret = nv50_vertprog_prepare(ti); 525 break; 526 case PIPE_SHADER_FRAGMENT: 527 ret = nv50_fragprog_prepare(ti); 528 break; 529 case PIPE_SHADER_GEOMETRY: 530 ret = nv50_geomprog_prepare(ti); 531 break; 532 default: 533 assert(!"unsupported program type"); 534 ret = -1; 535 break; 536 } 537 538 assert(!ret); 539 return ret; 540} 541 542boolean 543nv50_program_tx(struct nv50_program *p) 544{ 545 struct nv50_translation_info *ti; 546 int ret; 547 548 ti = CALLOC_STRUCT(nv50_translation_info); 549 ti->p = p; 550 551 ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; 552 553 ret = nv50_prog_scan(ti); 554 if (ret) { 555 NOUVEAU_ERR("unsupported shader program\n"); 556 goto out; 557 } 558 559 ret = nv50_generate_code(ti); 560 if (ret) { 561 NOUVEAU_ERR("error during shader translation\n"); 562 goto out; 563 } 564 565out: 566 if (ti->immd32) 567 FREE(ti->immd32); 568 if (ti->immd32_ty) 569 FREE(ti->immd32_ty); 570 FREE(ti); 571 return ret ? FALSE : TRUE; 572} 573 574void 575nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) 576{ 577 nouveau_bo_ref(NULL, &p->bo); 578 579 so_ref(NULL, &p->so); 580 581 if (p->code) 582 FREE(p->code); 583 584 p->translated = FALSE; 585} 586