nv50_shader_state.c revision 116133af3499947500a6d0c877fbc8f564ee4c76
1/*
2 * Copyright 2008 Ben Skeggs
3 * Copyright 2010 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "pipe/p_context.h"
25#include "pipe/p_defines.h"
26#include "pipe/p_state.h"
27#include "util/u_inlines.h"
28
29#include "nv50_context.h"
30
31void
32nv50_constbufs_validate(struct nv50_context *nv50)
33{
34   struct nouveau_channel *chan = nv50->screen->base.channel;
35   unsigned s;
36
37   for (s = 0; s < 3; ++s) {
38      struct nv04_resource *res;
39      int i;
40      unsigned p, b;
41
42      if (s == PIPE_SHADER_FRAGMENT)
43         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
44      else
45      if (s == PIPE_SHADER_GEOMETRY)
46         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
47      else
48         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
49
50      while (nv50->constbuf_dirty[s]) {
51         struct nouveau_bo *bo;
52         unsigned start = 0;
53         unsigned words = 0;
54
55         i = ffs(nv50->constbuf_dirty[s]) - 1;
56         nv50->constbuf_dirty[s] &= ~(1 << i);
57
58         res = nv04_resource(nv50->constbuf[s][i]);
59         if (!res) {
60            if (i != 0) {
61               BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1);
62               OUT_RING  (chan, (i << 8) | p | 0);
63            }
64            continue;
65         }
66
67         if (i == 0) {
68            b = NV50_CB_PVP + s;
69
70            /* always upload GL uniforms through CB DATA */
71            bo = nv50->screen->uniforms;
72            words = res->base.width0 / 4;
73         } else {
74            b = s * 16 + i;
75
76            assert(0);
77
78            if (!nouveau_resource_mapped_by_gpu(&res->base)) {
79               nouveau_buffer_migrate(&nv50->base, res, NOUVEAU_BO_VRAM);
80
81               BEGIN_RING(chan, RING_3D(CODE_CB_FLUSH), 1);
82               OUT_RING  (chan, 0);
83            }
84            MARK_RING (chan, 6, 2);
85            BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3);
86            OUT_RESRCh(chan, res, 0, NOUVEAU_BO_RD);
87            OUT_RESRCl(chan, res, 0, NOUVEAU_BO_RD);
88            OUT_RING  (chan, (b << 16) | (res->base.width0 & 0xffff));
89            BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1);
90            OUT_RING  (chan, (b << 12) | (i << 8) | p | 1);
91
92            bo = res->bo;
93
94            nv50_bufctx_add_resident(nv50, NV50_BUFCTX_CONSTANT, res,
95                                     res->domain | NOUVEAU_BO_RD);
96         }
97
98         if (words) {
99            MARK_RING(chan, 8, 1);
100
101            nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR);
102         }
103
104         while (words) {
105            unsigned nr = AVAIL_RING(chan);
106
107            if (nr < 16) {
108               FIRE_RING(chan);
109               nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR);
110               continue;
111            }
112            nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
113
114            BEGIN_RING(chan, RING_3D(CB_ADDR), 1);
115            OUT_RING  (chan, (start << 8) | b);
116            BEGIN_RING_NI(chan, RING_3D(CB_DATA(0)), nr);
117            OUT_RINGp (chan, &res->data[start * 4], nr);
118
119            start += nr;
120            words -= nr;
121         }
122      }
123   }
124}
125
126static boolean
127nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
128{
129   struct nouveau_resource *heap;
130   int ret;
131   unsigned size;
132
133   if (prog->translated)
134      return TRUE;
135
136   prog->translated = nv50_program_translate(prog);
137   if (!prog->translated)
138      return FALSE;
139
140   if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap;
141   else
142   if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap;
143   else
144      heap = nv50->screen->vp_code_heap;
145
146   size = align(prog->code_size, 0x100);
147
148   ret = nouveau_resource_alloc(heap, size, prog, &prog->res);
149   if (ret) {
150      NOUVEAU_ERR("out of code space for shader type %i\n", prog->type);
151      return FALSE;
152   }
153   prog->code_base = prog->res->start;
154
155   nv50_relocate_program(prog, prog->code_base, 0);
156
157   nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
158                       (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
159                       NOUVEAU_BO_VRAM, prog->code_size, prog->code);
160
161   BEGIN_RING(nv50->screen->base.channel, RING_3D(CODE_CB_FLUSH), 1);
162   OUT_RING  (nv50->screen->base.channel, 0);
163
164   return TRUE;
165}
166
167void
168nv50_vertprog_validate(struct nv50_context *nv50)
169{
170   struct nouveau_channel *chan = nv50->screen->base.channel;
171   struct nv50_program *vp = nv50->vertprog;
172
173   if (nv50->clip.nr > vp->vp.clpd_nr) {
174      if (vp->translated)
175         nv50_program_destroy(nv50, vp);
176      vp->vp.clpd_nr = nv50->clip.nr;
177   }
178
179   if (!nv50_program_validate(nv50, vp))
180         return;
181
182   BEGIN_RING(chan, RING_3D(VP_ATTR_EN(0)), 2);
183   OUT_RING  (chan, vp->vp.attrs[0]);
184   OUT_RING  (chan, vp->vp.attrs[1]);
185   BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_RESULT), 1);
186   OUT_RING  (chan, vp->max_out);
187   BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_TEMP), 1);
188   OUT_RING  (chan, vp->max_gpr);
189   BEGIN_RING(chan, RING_3D(VP_START_ID), 1);
190   OUT_RING  (chan, vp->code_base);
191}
192
193void
194nv50_fragprog_validate(struct nv50_context *nv50)
195{
196   struct nouveau_channel *chan = nv50->screen->base.channel;
197   struct nv50_program *fp = nv50->fragprog;
198
199   if (!nv50_program_validate(nv50, fp))
200         return;
201
202   BEGIN_RING(chan, RING_3D(FP_REG_ALLOC_TEMP), 1);
203   OUT_RING  (chan, fp->max_gpr);
204   BEGIN_RING(chan, RING_3D(FP_RESULT_COUNT), 1);
205   OUT_RING  (chan, fp->max_out);
206   BEGIN_RING(chan, RING_3D(FP_CONTROL), 1);
207   OUT_RING  (chan, fp->fp.flags[0]);
208   BEGIN_RING(chan, RING_3D(FP_CTRL_UNK196C), 1);
209   OUT_RING  (chan, fp->fp.flags[1]);
210   BEGIN_RING(chan, RING_3D(FP_START_ID), 1);
211   OUT_RING  (chan, fp->code_base);
212}
213
214void
215nv50_gmtyprog_validate(struct nv50_context *nv50)
216{
217   struct nouveau_channel *chan = nv50->screen->base.channel;
218   struct nv50_program *gp = nv50->vertprog;
219
220   if (!nv50_program_validate(nv50, gp))
221         return;
222
223   BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_TEMP), 1);
224   OUT_RING  (chan, gp->max_gpr);
225   BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_RESULT), 1);
226   OUT_RING  (chan, gp->max_out);
227   BEGIN_RING(chan, RING_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
228   OUT_RING  (chan, gp->gp.prim_type);
229   BEGIN_RING(chan, RING_3D(GP_VERTEX_OUTPUT_COUNT), 1);
230   OUT_RING  (chan, gp->gp.vert_count);
231   BEGIN_RING(chan, RING_3D(GP_START_ID), 1);
232   OUT_RING  (chan, gp->code_base);
233}
234
235static void
236nv50_sprite_coords_validate(struct nv50_context *nv50)
237{
238   struct nouveau_channel *chan = nv50->screen->base.channel;
239   uint32_t pntc[8], mode;
240   struct nv50_program *fp = nv50->fragprog;
241   unsigned i, c;
242   unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
243
244   if (!nv50->rast->pipe.point_quad_rasterization) {
245      if (nv50->state.point_sprite) {
246         BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8);
247         for (i = 0; i < 8; ++i)
248            OUT_RING(chan, 0);
249
250         nv50->state.point_sprite = FALSE;
251      }
252      return;
253   } else {
254      nv50->state.point_sprite = TRUE;
255   }
256
257   memset(pntc, 0, sizeof(pntc));
258
259   for (i = 0; i < fp->in_nr; i++) {
260      unsigned n = util_bitcount(fp->in[i].mask);
261
262      if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
263         m += n;
264         continue;
265      }
266      if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
267         m += n;
268         continue;
269      }
270
271      for (c = 0; c < 4; ++c) {
272         if (fp->in[i].mask & (1 << c)) {
273            pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
274            ++m;
275         }
276      }
277   }
278
279   if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
280      mode = 0x00;
281   else
282      mode = 0x10;
283
284   BEGIN_RING(chan, RING_3D(POINT_SPRITE_CTRL), 1);
285   OUT_RING  (chan, mode);
286
287   BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8);
288   OUT_RINGp (chan, pntc, 8);
289}
290
291/* Validate state derived from shaders and the rasterizer cso. */
292void
293nv50_validate_derived_rs(struct nv50_context *nv50)
294{
295   struct nouveau_channel *chan = nv50->screen->base.channel;
296   uint32_t color, psize;
297
298   nv50_sprite_coords_validate(nv50);
299
300   if (nv50->dirty & NV50_NEW_FRAGPROG)
301      return;
302   psize = nv50->state.semantic_psize & ~NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK;
303   color = nv50->state.semantic_color & ~NV50_3D_MAP_SEMANTIC_0_CLMP_EN;
304
305   if (nv50->rast->pipe.clamp_vertex_color)
306      color |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN;
307
308   if (color != nv50->state.semantic_color) {
309      nv50->state.semantic_color = color;
310      BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 1);
311      OUT_RING  (chan, color);
312   }
313
314   if (nv50->rast->pipe.point_size_per_vertex)
315      psize |= NV50_3D_MAP_SEMANTIC_3_PTSZ_EN__MASK;
316
317   if (psize != nv50->state.semantic_psize) {
318      nv50->state.semantic_psize = psize;
319      BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_3), 1);
320      OUT_RING  (chan, psize);
321   }
322}
323
324static int
325nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
326              struct nv50_varying *in, struct nv50_varying *out)
327{
328   int c;
329   uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
330
331   for (c = 0; c < 4; ++c) {
332      if (mf & 1) {
333         if (in->linear)
334            lin[mid / 32] |= 1 << (mid % 32);
335         if (mv & 1)
336            map[mid] = oid;
337         else
338         if (c == 3)
339            map[mid] |= 1;
340         ++mid;
341      }
342
343      oid += mv & 1;
344      mf >>= 1;
345      mv >>= 1;
346   }
347
348   return mid;
349}
350
351void
352nv50_fp_linkage_validate(struct nv50_context *nv50)
353{
354   struct nouveau_channel *chan = nv50->screen->base.channel;
355   struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
356   struct nv50_program *fp = nv50->fragprog;
357   struct nv50_varying dummy;
358   int i, n, c, m;
359   uint32_t primid = 0;
360   uint32_t psiz = 0x000;
361   uint32_t interp = fp->fp.interp;
362   uint32_t colors = fp->fp.colors;
363   uint32_t lin[4];
364   uint8_t map[64];
365
366   memset(lin, 0x00, sizeof(lin));
367
368   /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
369    *  or is it the first byte ?
370    */
371   memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
372
373   dummy.mask = 0xf; /* map all components of HPOS */
374   dummy.linear = 0;
375   m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
376
377   for (c = 0; c < vp->vp.clpd_nr; ++c)
378      map[m++] = vp->vp.clpd + c;
379
380   colors |= m << 8; /* adjust BFC0 id */
381
382   /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
383   if (nv50->rast->pipe.light_twoside) {
384      for (i = 0; i < 2; ++i)
385         m = nv50_vec4_map(map, m, lin,
386                           &fp->in[fp->vp.bfc[i]], &vp->out[vp->vp.bfc[i]]);
387   }
388   colors += m - 4; /* adjust FFC0 id */
389   interp |= m << 8; /* set map id where 'normal' FP inputs start */
390
391   dummy.mask = 0x0;
392   for (i = 0; i < fp->in_nr; ++i) {
393      for (n = 0; n < vp->out_nr; ++n)
394         if (vp->out[n].sn == fp->in[i].sn &&
395             vp->out[n].si == fp->in[i].si)
396            break;
397      m = nv50_vec4_map(map, m, lin,
398                        &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
399   }
400
401   /* PrimitiveID either is replaced by the system value, or
402    * written by the geometry shader into an output register
403    */
404   if (fp->gp.primid < 0x40) {
405      primid = m;
406      map[m++] = vp->gp.primid;
407   }
408
409   if (nv50->rast->pipe.point_size_per_vertex) {
410      psiz = (m << 4) | 1;
411      map[m++] = vp->vp.psiz;
412   }
413
414   if (nv50->rast->pipe.clamp_vertex_color)
415      colors |= NV50_3D_MAP_SEMANTIC_0_CLMP_EN;
416
417   n = (m + 3) / 4;
418   assert(m <= 64);
419
420   if (unlikely(nv50->gmtyprog)) {
421      BEGIN_RING(chan, RING_3D(GP_RESULT_MAP_SIZE), 1);
422      OUT_RING  (chan, m);
423      BEGIN_RING(chan, RING_3D(GP_RESULT_MAP(0)), n);
424      OUT_RINGp (chan, map, n);
425   } else {
426      BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1);
427      OUT_RING  (chan, vp->vp.attrs[2]);
428
429      BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_4), 1);
430      OUT_RING  (chan, primid);
431
432      BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1);
433      OUT_RING  (chan, m);
434      BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n);
435      OUT_RINGp (chan, map, n);
436   }
437
438   BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 4);
439   OUT_RING  (chan, colors);
440   OUT_RING  (chan, (vp->vp.clpd_nr << 8) | 4);
441   OUT_RING  (chan, 0);
442   OUT_RING  (chan, psiz);
443
444   BEGIN_RING(chan, RING_3D(FP_INTERPOLANT_CTRL), 1);
445   OUT_RING  (chan, interp);
446
447   nv50->state.interpolant_ctrl = interp;
448
449   nv50->state.semantic_color = colors;
450   nv50->state.semantic_psize = psiz;
451
452   BEGIN_RING(chan, RING_3D(NOPERSPECTIVE_BITMAP(0)), 4);
453   OUT_RINGp (chan, lin, 4);
454
455   BEGIN_RING(chan, RING_3D(GP_ENABLE), 1);
456   OUT_RING  (chan, nv50->gmtyprog ? 1 : 0);
457}
458
459static int
460nv50_vp_gp_mapping(uint8_t *map, int m,
461                   struct nv50_program *vp, struct nv50_program *gp)
462{
463   int i, j, c;
464
465   for (i = 0; i < gp->in_nr; ++i) {
466      uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
467
468      for (j = 0; j < vp->out_nr; ++j) {
469         if (vp->out[j].sn == gp->in[i].sn &&
470             vp->out[j].si == gp->in[i].si) {
471            mv = vp->out[j].mask;
472            oid = vp->out[j].hw;
473            break;
474         }
475      }
476
477      for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
478         if (mg & mv & 1)
479            map[m++] = oid;
480         else
481         if (mg & 1)
482            map[m++] = (c == 3) ? 0x41 : 0x40;
483         oid += mv & 1;
484      }
485   }
486   return m;
487}
488
489void
490nv50_gp_linkage_validate(struct nv50_context *nv50)
491{
492   struct nouveau_channel *chan = nv50->screen->base.channel;
493   struct nv50_program *vp = nv50->vertprog;
494   struct nv50_program *gp = nv50->gmtyprog;
495   int m = 0;
496   int n;
497   uint8_t map[64];
498
499   if (!gp)
500      return;
501   memset(map, 0, sizeof(map));
502
503   m = nv50_vp_gp_mapping(map, m, vp, gp);
504
505   n = (m + 3) / 4;
506
507   BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1);
508   OUT_RING  (chan, vp->vp.attrs[2] | gp->vp.attrs[2]);
509
510   BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1);
511   OUT_RING  (chan, m);
512   BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n);
513   OUT_RINGp (chan, map, n);
514}
515