nv50_shader_state.c revision 116133af3499947500a6d0c877fbc8f564ee4c76
1/* 2 * Copyright 2008 Ben Skeggs 3 * Copyright 2010 Christoph Bumiller 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "pipe/p_context.h" 25#include "pipe/p_defines.h" 26#include "pipe/p_state.h" 27#include "util/u_inlines.h" 28 29#include "nv50_context.h" 30 31void 32nv50_constbufs_validate(struct nv50_context *nv50) 33{ 34 struct nouveau_channel *chan = nv50->screen->base.channel; 35 unsigned s; 36 37 for (s = 0; s < 3; ++s) { 38 struct nv04_resource *res; 39 int i; 40 unsigned p, b; 41 42 if (s == PIPE_SHADER_FRAGMENT) 43 p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT; 44 else 45 if (s == PIPE_SHADER_GEOMETRY) 46 p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY; 47 else 48 p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX; 49 50 while (nv50->constbuf_dirty[s]) { 51 struct nouveau_bo *bo; 52 unsigned start = 0; 53 unsigned words = 0; 54 55 i = ffs(nv50->constbuf_dirty[s]) - 1; 56 nv50->constbuf_dirty[s] &= ~(1 << i); 57 58 res = nv04_resource(nv50->constbuf[s][i]); 59 if (!res) { 60 if (i != 0) { 61 BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1); 62 OUT_RING (chan, (i << 8) | p | 0); 63 } 64 continue; 65 } 66 67 if (i == 0) { 68 b = NV50_CB_PVP + s; 69 70 /* always upload GL uniforms through CB DATA */ 71 bo = nv50->screen->uniforms; 72 words = res->base.width0 / 4; 73 } else { 74 b = s * 16 + i; 75 76 assert(0); 77 78 if (!nouveau_resource_mapped_by_gpu(&res->base)) { 79 nouveau_buffer_migrate(&nv50->base, res, NOUVEAU_BO_VRAM); 80 81 BEGIN_RING(chan, RING_3D(CODE_CB_FLUSH), 1); 82 OUT_RING (chan, 0); 83 } 84 MARK_RING (chan, 6, 2); 85 BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3); 86 OUT_RESRCh(chan, res, 0, NOUVEAU_BO_RD); 87 OUT_RESRCl(chan, res, 0, NOUVEAU_BO_RD); 88 OUT_RING (chan, (b << 16) | (res->base.width0 & 0xffff)); 89 BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1); 90 OUT_RING (chan, (b << 12) | (i << 8) | p | 1); 91 92 bo = res->bo; 93 94 nv50_bufctx_add_resident(nv50, NV50_BUFCTX_CONSTANT, res, 95 res->domain | NOUVEAU_BO_RD); 96 } 97 98 if (words) { 99 MARK_RING(chan, 8, 1); 100 101 nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR); 102 } 103 104 while (words) { 105 unsigned nr = AVAIL_RING(chan); 106 107 if (nr < 16) { 108 FIRE_RING(chan); 109 nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR); 110 continue; 111 } 112 nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN); 113 114 BEGIN_RING(chan, RING_3D(CB_ADDR), 1); 115 OUT_RING (chan, (start << 8) | b); 116 BEGIN_RING_NI(chan, RING_3D(CB_DATA(0)), nr); 117 OUT_RINGp (chan, &res->data[start * 4], nr); 118 119 start += nr; 120 words -= nr; 121 } 122 } 123 } 124} 125 126static boolean 127nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) 128{ 129 struct nouveau_resource *heap; 130 int ret; 131 unsigned size; 132 133 if (prog->translated) 134 return TRUE; 135 136 prog->translated = nv50_program_translate(prog); 137 if (!prog->translated) 138 return FALSE; 139 140 if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap; 141 else 142 if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap; 143 else 144 heap = nv50->screen->vp_code_heap; 145 146 size = align(prog->code_size, 0x100); 147 148 ret = nouveau_resource_alloc(heap, size, prog, &prog->res); 149 if (ret) { 150 NOUVEAU_ERR("out of code space for shader type %i\n", prog->type); 151 return FALSE; 152 } 153 prog->code_base = prog->res->start; 154 155 nv50_relocate_program(prog, prog->code_base, 0); 156 157 nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, 158 (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, 159 NOUVEAU_BO_VRAM, prog->code_size, prog->code); 160 161 BEGIN_RING(nv50->screen->base.channel, RING_3D(CODE_CB_FLUSH), 1); 162 OUT_RING (nv50->screen->base.channel, 0); 163 164 return TRUE; 165} 166 167void 168nv50_vertprog_validate(struct nv50_context *nv50) 169{ 170 struct nouveau_channel *chan = nv50->screen->base.channel; 171 struct nv50_program *vp = nv50->vertprog; 172 173 if (nv50->clip.nr > vp->vp.clpd_nr) { 174 if (vp->translated) 175 nv50_program_destroy(nv50, vp); 176 vp->vp.clpd_nr = nv50->clip.nr; 177 } 178 179 if (!nv50_program_validate(nv50, vp)) 180 return; 181 182 BEGIN_RING(chan, RING_3D(VP_ATTR_EN(0)), 2); 183 OUT_RING (chan, vp->vp.attrs[0]); 184 OUT_RING (chan, vp->vp.attrs[1]); 185 BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_RESULT), 1); 186 OUT_RING (chan, vp->max_out); 187 BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_TEMP), 1); 188 OUT_RING (chan, vp->max_gpr); 189 BEGIN_RING(chan, RING_3D(VP_START_ID), 1); 190 OUT_RING (chan, vp->code_base); 191} 192 193void 194nv50_fragprog_validate(struct nv50_context *nv50) 195{ 196 struct nouveau_channel *chan = nv50->screen->base.channel; 197 struct nv50_program *fp = nv50->fragprog; 198 199 if (!nv50_program_validate(nv50, fp)) 200 return; 201 202 BEGIN_RING(chan, RING_3D(FP_REG_ALLOC_TEMP), 1); 203 OUT_RING (chan, fp->max_gpr); 204 BEGIN_RING(chan, RING_3D(FP_RESULT_COUNT), 1); 205 OUT_RING (chan, fp->max_out); 206 BEGIN_RING(chan, RING_3D(FP_CONTROL), 1); 207 OUT_RING (chan, fp->fp.flags[0]); 208 BEGIN_RING(chan, RING_3D(FP_CTRL_UNK196C), 1); 209 OUT_RING (chan, fp->fp.flags[1]); 210 BEGIN_RING(chan, RING_3D(FP_START_ID), 1); 211 OUT_RING (chan, fp->code_base); 212} 213 214void 215nv50_gmtyprog_validate(struct nv50_context *nv50) 216{ 217 struct nouveau_channel *chan = nv50->screen->base.channel; 218 struct nv50_program *gp = nv50->vertprog; 219 220 if (!nv50_program_validate(nv50, gp)) 221 return; 222 223 BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_TEMP), 1); 224 OUT_RING (chan, gp->max_gpr); 225 BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_RESULT), 1); 226 OUT_RING (chan, gp->max_out); 227 BEGIN_RING(chan, RING_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1); 228 OUT_RING (chan, gp->gp.prim_type); 229 BEGIN_RING(chan, RING_3D(GP_VERTEX_OUTPUT_COUNT), 1); 230 OUT_RING (chan, gp->gp.vert_count); 231 BEGIN_RING(chan, RING_3D(GP_START_ID), 1); 232 OUT_RING (chan, gp->code_base); 233} 234 235static void 236nv50_sprite_coords_validate(struct nv50_context *nv50) 237{ 238 struct nouveau_channel *chan = nv50->screen->base.channel; 239 uint32_t pntc[8], mode; 240 struct nv50_program *fp = nv50->fragprog; 241 unsigned i, c; 242 unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff; 243 244 if (!nv50->rast->pipe.point_quad_rasterization) { 245 if (nv50->state.point_sprite) { 246 BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8); 247 for (i = 0; i < 8; ++i) 248 OUT_RING(chan, 0); 249 250 nv50->state.point_sprite = FALSE; 251 } 252 return; 253 } else { 254 nv50->state.point_sprite = TRUE; 255 } 256 257 memset(pntc, 0, sizeof(pntc)); 258 259 for (i = 0; i < fp->in_nr; i++) { 260 unsigned n = util_bitcount(fp->in[i].mask); 261 262 if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) { 263 m += n; 264 continue; 265 } 266 if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) { 267 m += n; 268 continue; 269 } 270 271 for (c = 0; c < 4; ++c) { 272 if (fp->in[i].mask & (1 << c)) { 273 pntc[m / 8] |= (c + 1) << ((m % 8) * 4); 274 ++m; 275 } 276 } 277 } 278 279 if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) 280 mode = 0x00; 281 else 282 mode = 0x10; 283 284 BEGIN_RING(chan, RING_3D(POINT_SPRITE_CTRL), 1); 285 OUT_RING (chan, mode); 286 287 BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8); 288 OUT_RINGp (chan, pntc, 8); 289} 290 291/* Validate state derived from shaders and the rasterizer cso. */ 292void 293nv50_validate_derived_rs(struct nv50_context *nv50) 294{ 295 struct nouveau_channel *chan = nv50->screen->base.channel; 296 uint32_t color, psize; 297 298 nv50_sprite_coords_validate(nv50); 299 300 if (nv50->dirty & NV50_NEW_FRAGPROG) 301 return; 302 psize = nv50->state.semantic_psize & ~NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK; 303 color = nv50->state.semantic_color & ~NV50_3D_MAP_SEMANTIC_0_CLMP_EN; 304 305 if (nv50->rast->pipe.clamp_vertex_color) 306 color |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN; 307 308 if (color != nv50->state.semantic_color) { 309 nv50->state.semantic_color = color; 310 BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 1); 311 OUT_RING (chan, color); 312 } 313 314 if (nv50->rast->pipe.point_size_per_vertex) 315 psize |= NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK; 316 317 if (psize != nv50->state.semantic_psize) { 318 nv50->state.semantic_psize = psize; 319 BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_3), 1); 320 OUT_RING (chan, psize); 321 } 322} 323 324static int 325nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4], 326 struct nv50_varying *in, struct nv50_varying *out) 327{ 328 int c; 329 uint8_t mv = out->mask, mf = in->mask, oid = out->hw; 330 331 for (c = 0; c < 4; ++c) { 332 if (mf & 1) { 333 if (in->linear) 334 lin[mid / 32] |= 1 << (mid % 32); 335 if (mv & 1) 336 map[mid] = oid; 337 else 338 if (c == 3) 339 map[mid] |= 1; 340 ++mid; 341 } 342 343 oid += mv & 1; 344 mf >>= 1; 345 mv >>= 1; 346 } 347 348 return mid; 349} 350 351void 352nv50_fp_linkage_validate(struct nv50_context *nv50) 353{ 354 struct nouveau_channel *chan = nv50->screen->base.channel; 355 struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog; 356 struct nv50_program *fp = nv50->fragprog; 357 struct nv50_varying dummy; 358 int i, n, c, m; 359 uint32_t primid = 0; 360 uint32_t psiz = 0x000; 361 uint32_t interp = fp->fp.interp; 362 uint32_t colors = fp->fp.colors; 363 uint32_t lin[4]; 364 uint8_t map[64]; 365 366 memset(lin, 0x00, sizeof(lin)); 367 368 /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx 369 * or is it the first byte ? 370 */ 371 memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map)); 372 373 dummy.mask = 0xf; /* map all components of HPOS */ 374 dummy.linear = 0; 375 m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]); 376 377 for (c = 0; c < vp->vp.clpd_nr; ++c) 378 map[m++] = vp->vp.clpd + c; 379 380 colors |= m << 8; /* adjust BFC0 id */ 381 382 /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */ 383 if (nv50->rast->pipe.light_twoside) { 384 for (i = 0; i < 2; ++i) 385 m = nv50_vec4_map(map, m, lin, 386 &fp->in[fp->vp.bfc[i]], &vp->out[vp->vp.bfc[i]]); 387 } 388 colors += m - 4; /* adjust FFC0 id */ 389 interp |= m << 8; /* set map id where 'normal' FP inputs start */ 390 391 dummy.mask = 0x0; 392 for (i = 0; i < fp->in_nr; ++i) { 393 for (n = 0; n < vp->out_nr; ++n) 394 if (vp->out[n].sn == fp->in[i].sn && 395 vp->out[n].si == fp->in[i].si) 396 break; 397 m = nv50_vec4_map(map, m, lin, 398 &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy); 399 } 400 401 /* PrimitiveID either is replaced by the system value, or 402 * written by the geometry shader into an output register 403 */ 404 if (fp->gp.primid < 0x40) { 405 primid = m; 406 map[m++] = vp->gp.primid; 407 } 408 409 if (nv50->rast->pipe.point_size_per_vertex) { 410 psiz = (m << 4) | 1; 411 map[m++] = vp->vp.psiz; 412 } 413 414 if (nv50->rast->pipe.clamp_vertex_color) 415 colors |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN; 416 417 n = (m + 3) / 4; 418 assert(m <= 64); 419 420 if (unlikely(nv50->gmtyprog)) { 421 BEGIN_RING(chan, RING_3D(GP_RESULT_MAP_SIZE), 1); 422 OUT_RING (chan, m); 423 BEGIN_RING(chan, RING_3D(GP_RESULT_MAP(0)), n); 424 OUT_RINGp (chan, map, n); 425 } else { 426 BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1); 427 OUT_RING (chan, vp->vp.attrs[2]); 428 429 BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_4), 1); 430 OUT_RING (chan, primid); 431 432 BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1); 433 OUT_RING (chan, m); 434 BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n); 435 OUT_RINGp (chan, map, n); 436 } 437 438 BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 4); 439 OUT_RING (chan, colors); 440 OUT_RING (chan, (vp->vp.clpd_nr << 8) | 4); 441 OUT_RING (chan, 0); 442 OUT_RING (chan, psiz); 443 444 BEGIN_RING(chan, RING_3D(FP_INTERPOLANT_CTRL), 1); 445 OUT_RING (chan, interp); 446 447 nv50->state.interpolant_ctrl = interp; 448 449 nv50->state.semantic_color = colors; 450 nv50->state.semantic_psize = psiz; 451 452 BEGIN_RING(chan, RING_3D(NOPERSPECTIVE_BITMAP(0)), 4); 453 OUT_RINGp (chan, lin, 4); 454 455 BEGIN_RING(chan, RING_3D(GP_ENABLE), 1); 456 OUT_RING (chan, nv50->gmtyprog ? 1 : 0); 457} 458 459static int 460nv50_vp_gp_mapping(uint8_t *map, int m, 461 struct nv50_program *vp, struct nv50_program *gp) 462{ 463 int i, j, c; 464 465 for (i = 0; i < gp->in_nr; ++i) { 466 uint8_t oid = 0, mv = 0, mg = gp->in[i].mask; 467 468 for (j = 0; j < vp->out_nr; ++j) { 469 if (vp->out[j].sn == gp->in[i].sn && 470 vp->out[j].si == gp->in[i].si) { 471 mv = vp->out[j].mask; 472 oid = vp->out[j].hw; 473 break; 474 } 475 } 476 477 for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) { 478 if (mg & mv & 1) 479 map[m++] = oid; 480 else 481 if (mg & 1) 482 map[m++] = (c == 3) ? 0x41 : 0x40; 483 oid += mv & 1; 484 } 485 } 486 return m; 487} 488 489void 490nv50_gp_linkage_validate(struct nv50_context *nv50) 491{ 492 struct nouveau_channel *chan = nv50->screen->base.channel; 493 struct nv50_program *vp = nv50->vertprog; 494 struct nv50_program *gp = nv50->gmtyprog; 495 int m = 0; 496 int n; 497 uint8_t map[64]; 498 499 if (!gp) 500 return; 501 memset(map, 0, sizeof(map)); 502 503 m = nv50_vp_gp_mapping(map, m, vp, gp); 504 505 n = (m + 3) / 4; 506 507 BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1); 508 OUT_RING (chan, vp->vp.attrs[2] | gp->vp.attrs[2]); 509 510 BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1); 511 OUT_RING (chan, m); 512 BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n); 513 OUT_RINGp (chan, map, n); 514} 515