nv50_shader_state.c revision fa94f8b209c111f2c3cd9250d9fec6cd03726114
1/*
2 * Copyright 2008 Ben Skeggs
3 * Copyright 2010 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "pipe/p_context.h"
25#include "pipe/p_defines.h"
26#include "pipe/p_state.h"
27#include "util/u_inlines.h"
28
29#include "nv50_context.h"
30
31void
32nv50_constbufs_validate(struct nv50_context *nv50)
33{
34   struct nouveau_channel *chan = nv50->screen->base.channel;
35   unsigned s;
36
37   for (s = 0; s < 3; ++s) {
38      struct nv04_resource *res;
39      int i;
40      unsigned p, b;
41
42      if (s == PIPE_SHADER_FRAGMENT)
43         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT;
44      else
45      if (s == PIPE_SHADER_GEOMETRY)
46         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY;
47      else
48         p = NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX;
49
50      while (nv50->constbuf_dirty[s]) {
51         struct nouveau_bo *bo;
52         unsigned start = 0;
53         unsigned words = 0;
54
55         i = ffs(nv50->constbuf_dirty[s]) - 1;
56         nv50->constbuf_dirty[s] &= ~(1 << i);
57
58         res = nv04_resource(nv50->constbuf[s][i]);
59         if (!res) {
60            if (i != 0) {
61               BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1);
62               OUT_RING  (chan, (i << 8) | p | 0);
63            }
64            continue;
65         }
66
67         if (i == 0) {
68            b = NV50_CB_PVP + s;
69
70            /* always upload GL uniforms through CB DATA */
71            bo = nv50->screen->uniforms;
72            words = res->base.width0 / 4;
73         } else {
74            b = s * 16 + i;
75
76            assert(0);
77
78            if (!nouveau_resource_mapped_by_gpu(&res->base)) {
79               nouveau_buffer_migrate(&nv50->base, res, NOUVEAU_BO_VRAM);
80
81               BEGIN_RING(chan, RING_3D(CODE_CB_FLUSH), 1);
82               OUT_RING  (chan, 0);
83            }
84            MARK_RING (chan, 6, 2);
85            BEGIN_RING(chan, RING_3D(CB_DEF_ADDRESS_HIGH), 3);
86            OUT_RESRCh(chan, res, 0, NOUVEAU_BO_RD);
87            OUT_RESRCl(chan, res, 0, NOUVEAU_BO_RD);
88            OUT_RING  (chan, (b << 16) | (res->base.width0 & 0xffff));
89            BEGIN_RING(chan, RING_3D(SET_PROGRAM_CB), 1);
90            OUT_RING  (chan, (b << 12) | (i << 8) | p | 1);
91
92            bo = res->bo;
93
94            nv50_bufctx_add_resident(nv50, NV50_BUFCTX_CONSTANT, res,
95                                     res->domain | NOUVEAU_BO_RD);
96         }
97
98         if (words) {
99            MARK_RING(chan, 8, 1);
100
101            nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR);
102         }
103
104         while (words) {
105            unsigned nr = AVAIL_RING(chan);
106
107            if (nr < 16) {
108               FIRE_RING(chan);
109               nouveau_bo_validate(chan, bo, res->domain | NOUVEAU_BO_WR);
110               continue;
111            }
112            nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
113
114            BEGIN_RING(chan, RING_3D(CB_ADDR), 1);
115            OUT_RING  (chan, (start << 8) | b);
116            BEGIN_RING_NI(chan, RING_3D(CB_DATA(0)), nr);
117            OUT_RINGp (chan, &res->data[start * 4], nr);
118
119            start += nr;
120            words -= nr;
121         }
122      }
123   }
124}
125
126static boolean
127nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
128{
129   struct nouveau_resource *heap;
130   int ret;
131   unsigned size;
132
133   if (prog->translated)
134      return TRUE;
135
136   prog->translated = nv50_program_translate(prog);
137   if (!prog->translated)
138      return FALSE;
139
140   if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap;
141   if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap;
142   else
143      heap = nv50->screen->vp_code_heap;
144
145   size = align(prog->code_size, 0x100);
146
147   ret = nouveau_resource_alloc(heap, size, prog, &prog->res);
148   if (ret)
149      return FALSE;
150   prog->code_base = prog->res->start;
151
152   nv50_relocate_program(prog, prog->code_base, 0);
153
154   nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
155                       (prog->type << 16) + prog->code_base,
156                       NOUVEAU_BO_VRAM, prog->code_size, prog->code);
157
158   BEGIN_RING(nv50->screen->base.channel, RING_3D(CODE_CB_FLUSH), 1);
159   OUT_RING  (nv50->screen->base.channel, 0);
160
161   return TRUE;
162}
163
164void
165nv50_vertprog_validate(struct nv50_context *nv50)
166{
167   struct nouveau_channel *chan = nv50->screen->base.channel;
168   struct nv50_program *vp = nv50->vertprog;
169
170   if (!nv50_program_validate(nv50, vp))
171         return;
172
173   BEGIN_RING(chan, RING_3D(VP_ATTR_EN(0)), 2);
174   OUT_RING  (chan, vp->vp.attrs[0]);
175   OUT_RING  (chan, vp->vp.attrs[1]);
176   BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_RESULT), 1);
177   OUT_RING  (chan, vp->max_out);
178   BEGIN_RING(chan, RING_3D(VP_REG_ALLOC_TEMP), 1);
179   OUT_RING  (chan, vp->max_gpr);
180   BEGIN_RING(chan, RING_3D(VP_START_ID), 1);
181   OUT_RING  (chan, vp->code_base);
182}
183
184void
185nv50_fragprog_validate(struct nv50_context *nv50)
186{
187   struct nouveau_channel *chan = nv50->screen->base.channel;
188   struct nv50_program *fp = nv50->fragprog;
189
190   if (!nv50_program_validate(nv50, fp))
191         return;
192
193   BEGIN_RING(chan, RING_3D(FP_REG_ALLOC_TEMP), 1);
194   OUT_RING  (chan, fp->max_gpr);
195   BEGIN_RING(chan, RING_3D(FP_RESULT_COUNT), 1);
196   OUT_RING  (chan, fp->max_out);
197   BEGIN_RING(chan, RING_3D(FP_CONTROL), 1);
198   OUT_RING  (chan, fp->fp.flags[0]);
199   BEGIN_RING(chan, RING_3D(FP_CTRL_UNK196C), 1);
200   OUT_RING  (chan, fp->fp.flags[1]);
201   BEGIN_RING(chan, RING_3D(FP_START_ID), 1);
202   OUT_RING  (chan, fp->code_base);
203}
204
205void
206nv50_gmtyprog_validate(struct nv50_context *nv50)
207{
208   struct nouveau_channel *chan = nv50->screen->base.channel;
209   struct nv50_program *gp = nv50->vertprog;
210
211   if (!nv50_program_validate(nv50, gp))
212         return;
213
214   BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_TEMP), 1);
215   OUT_RING  (chan, gp->max_gpr);
216   BEGIN_RING(chan, RING_3D(GP_REG_ALLOC_RESULT), 1);
217   OUT_RING  (chan, gp->max_out);
218   BEGIN_RING(chan, RING_3D(GP_OUTPUT_PRIMITIVE_TYPE), 1);
219   OUT_RING  (chan, gp->gp.prim_type);
220   BEGIN_RING(chan, RING_3D(GP_VERTEX_OUTPUT_COUNT), 1);
221   OUT_RING  (chan, gp->gp.vert_count);
222   BEGIN_RING(chan, RING_3D(GP_START_ID), 1);
223   OUT_RING  (chan, gp->code_base);
224}
225
226void
227nv50_sprite_coords_validate(struct nv50_context *nv50)
228{
229   struct nouveau_channel *chan = nv50->screen->base.channel;
230   uint32_t pntc[8], mode;
231   struct nv50_program *fp = nv50->fragprog;
232   unsigned i, c;
233   unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff;
234
235   if (!nv50->rast->pipe.point_quad_rasterization) {
236      if (nv50->state.point_sprite) {
237         BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8);
238         for (i = 0; i < 8; ++i)
239            OUT_RING(chan, 0);
240
241         nv50->state.point_sprite = FALSE;
242      }
243      return;
244   } else {
245      nv50->state.point_sprite = TRUE;
246   }
247
248   memset(pntc, 0, sizeof(pntc));
249
250   for (i = 0; i < fp->in_nr; i++) {
251      unsigned n = util_bitcount(fp->in[i].mask);
252
253      if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) {
254         m += n;
255         continue;
256      }
257      if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) {
258         m += n;
259         continue;
260      }
261
262      for (c = 0; c < 4; ++c) {
263         if (fp->in[i].mask & (1 << c)) {
264            pntc[m / 8] |= (c + 1) << ((m % 8) * 4);
265            ++m;
266         }
267      }
268   }
269
270   if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
271      mode = 0x00;
272   else
273      mode = 0x10;
274
275   BEGIN_RING(chan, RING_3D(POINT_SPRITE_CTRL), 1);
276   OUT_RING  (chan, mode);
277
278   BEGIN_RING(chan, RING_3D(POINT_COORD_REPLACE_MAP(0)), 8);
279   OUT_RINGp (chan, pntc, 8);
280}
281
282static int
283nv50_vec4_map(uint8_t *map, int mid, uint32_t lin[4],
284              struct nv50_varying *in, struct nv50_varying *out)
285{
286   int c;
287   uint8_t mv = out->mask, mf = in->mask, oid = out->hw;
288
289   for (c = 0; c < 4; ++c) {
290      if (mf & 1) {
291         if (in->linear)
292            lin[mid / 32] |= 1 << (mid % 32);
293         if (mv & 1)
294            map[mid] = oid;
295         else
296         if (c == 3)
297            map[mid] |= 1;
298         ++mid;
299      }
300
301      oid += mv & 1;
302      mf >>= 1;
303      mv >>= 1;
304   }
305
306   return mid;
307}
308
309void
310nv50_fp_linkage_validate(struct nv50_context *nv50)
311{
312   struct nouveau_channel *chan = nv50->screen->base.channel;
313   struct nv50_program *vp = nv50->gmtyprog ? nv50->gmtyprog : nv50->vertprog;
314   struct nv50_program *fp = nv50->fragprog;
315   struct nv50_varying dummy;
316   int i, n, c, m;
317   uint32_t primid = 0;
318   uint32_t psiz = 0x000;
319   uint32_t interp = fp->fp.interp;
320   uint32_t colors = fp->fp.colors;
321   uint32_t lin[4];
322   uint8_t map[64];
323
324   memset(lin, 0x00, sizeof(lin));
325
326   /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
327    *  or is it the first byte ?
328    */
329   memset(map, nv50->gmtyprog ? 0x80 : 0x40, sizeof(map));
330
331   dummy.mask = 0xf; /* map all components of HPOS */
332   dummy.linear = 0;
333   m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);
334
335   for (c = 0; c < vp->vp.clpd_nr; ++c)
336      map[m++] |= vp->vp.clpd + c;
337
338   colors |= m << 8; /* adjust BFC0 id */
339
340   /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
341   if (nv50->rast->pipe.light_twoside) {
342      for (i = 0; i < 2; ++i)
343         m = nv50_vec4_map(map, m, lin,
344                           &fp->in[fp->vp.bfc[i]], &vp->out[vp->vp.bfc[i]]);
345   }
346   colors += m - 4; /* adjust FFC0 id */
347   interp |= m << 8; /* set map id where 'normal' FP inputs start */
348
349   dummy.mask = 0x0;
350   for (i = 0; i < fp->in_nr; ++i) {
351      for (n = 0; n < vp->out_nr; ++n)
352         if (vp->out[n].sn == fp->in[i].sn &&
353             vp->out[n].si == fp->in[i].si)
354            break;
355      m = nv50_vec4_map(map, m, lin,
356                        &fp->in[i], (n < vp->out_nr) ? &vp->out[n] : &dummy);
357   }
358
359   /* PrimitiveID either is replaced by the system value, or
360    * written by the geometry shader into an output register
361    */
362   if (fp->gp.primid < 0x40) {
363      primid = m;
364      map[m++] = vp->gp.primid;
365   }
366
367   if (nv50->rast->pipe.point_size_per_vertex) {
368      psiz = (m << 4) | 1;
369      map[m++] = vp->vp.psiz;
370   }
371
372   n = (m + 3) / 4;
373   assert(m <= 64);
374
375   if (unlikely(nv50->gmtyprog)) {
376      BEGIN_RING(chan, RING_3D(GP_RESULT_MAP_SIZE), 1);
377      OUT_RING  (chan, m);
378      BEGIN_RING(chan, RING_3D(GP_RESULT_MAP(0)), n);
379      OUT_RINGp (chan, map, n);
380   } else {
381      BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1);
382      OUT_RING  (chan, vp->vp.attrs[2]);
383
384      BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_4), 1);
385      OUT_RING  (chan, primid);
386
387      BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1);
388      OUT_RING  (chan, m);
389      BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n);
390      OUT_RINGp (chan, map, n);
391   }
392
393   BEGIN_RING(chan, RING_3D(MAP_SEMANTIC_0), 4);
394   OUT_RING  (chan, colors);
395   OUT_RING  (chan, (vp->vp.clpd_nr << 8) | 4);
396   OUT_RING  (chan, 0);
397   OUT_RING  (chan, psiz);
398
399   BEGIN_RING(chan, RING_3D(FP_INTERPOLANT_CTRL), 1);
400   OUT_RING  (chan, interp);
401
402   nv50->state.interpolant_ctrl = interp;
403
404   BEGIN_RING(chan, RING_3D(NOPERSPECTIVE_BITMAP(0)), 4);
405   OUT_RINGp (chan, lin, 4);
406
407   BEGIN_RING(chan, RING_3D(GP_ENABLE), 1);
408   OUT_RING  (chan, nv50->gmtyprog ? 1 : 0);
409}
410
411static int
412nv50_vp_gp_mapping(uint8_t *map, int m,
413                   struct nv50_program *vp, struct nv50_program *gp)
414{
415   int i, j, c;
416
417   for (i = 0; i < gp->in_nr; ++i) {
418      uint8_t oid = 0, mv = 0, mg = gp->in[i].mask;
419
420      for (j = 0; j < vp->out_nr; ++j) {
421         if (vp->out[j].sn == gp->in[i].sn &&
422             vp->out[j].si == gp->in[i].si) {
423            mv = vp->out[j].mask;
424            oid = vp->out[j].hw;
425            break;
426         }
427      }
428
429      for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) {
430         if (mg & mv & 1)
431            map[m++] = oid;
432         else
433         if (mg & 1)
434            map[m++] = (c == 3) ? 0x41 : 0x40;
435         oid += mv & 1;
436      }
437   }
438   return m;
439}
440
441void
442nv50_gp_linkage_validate(struct nv50_context *nv50)
443{
444   struct nouveau_channel *chan = nv50->screen->base.channel;
445   struct nv50_program *vp = nv50->vertprog;
446   struct nv50_program *gp = nv50->gmtyprog;
447   int m = 0;
448   int n;
449   uint8_t map[64];
450
451   if (!gp)
452      return;
453   memset(map, 0, sizeof(map));
454
455   m = nv50_vp_gp_mapping(map, m, vp, gp);
456
457   n = (m + 3) / 4;
458
459   BEGIN_RING(chan, RING_3D(VP_GP_BUILTIN_ATTR_EN), 1);
460   OUT_RING  (chan, vp->vp.attrs[2] | gp->vp.attrs[2]);
461
462   BEGIN_RING(chan, RING_3D(VP_RESULT_MAP_SIZE), 1);
463   OUT_RING  (chan, m);
464   BEGIN_RING(chan, RING_3D(VP_RESULT_MAP(0)), n);
465   OUT_RINGp (chan, map, n);
466}
467