nvc0_shader_state.c revision 36158c199448ce038d9fe913d972f29a655aecab
1/*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "pipe/p_context.h"
24#include "pipe/p_defines.h"
25#include "pipe/p_state.h"
26#include "util/u_inlines.h"
27
28#include "nvc0_context.h"
29
30static INLINE void
31nvc0_program_update_context_state(struct nvc0_context *nvc0,
32                                  struct nvc0_program *prog, int stage)
33{
34   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
35
36   if (prog && prog->need_tls) {
37      const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
38      if (!nvc0->state.tls_required)
39         BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
40      nvc0->state.tls_required |= 1 << stage;
41   } else {
42      if (nvc0->state.tls_required == (1 << stage))
43         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS);
44      nvc0->state.tls_required &= ~(1 << stage);
45   }
46
47   if (prog && prog->immd_size) {
48      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
49      /* NOTE: may overlap code of a different shader */
50      PUSH_DATA (push, align(prog->immd_size, 0x100));
51      PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base);
52      PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base);
53      BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
54      PUSH_DATA (push, (14 << 4) | 1);
55
56      nvc0->state.c14_bound |= 1 << stage;
57   } else
58   if (nvc0->state.c14_bound & (1 << stage)) {
59      BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
60      PUSH_DATA (push, (14 << 4) | 0);
61
62      nvc0->state.c14_bound &= ~(1 << stage);
63   }
64}
65
66static INLINE boolean
67nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
68{
69   if (prog->mem)
70      return TRUE;
71
72   if (!prog->translated) {
73      prog->translated = nvc0_program_translate(prog);
74      if (!prog->translated)
75         return FALSE;
76   }
77
78   if (likely(prog->code_size))
79      return nvc0_program_upload_code(nvc0, prog);
80   return TRUE; /* stream output info only */
81}
82
83void
84nvc0_vertprog_validate(struct nvc0_context *nvc0)
85{
86   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
87   struct nvc0_program *vp = nvc0->vertprog;
88
89   if (!nvc0_program_validate(nvc0, vp))
90         return;
91   nvc0_program_update_context_state(nvc0, vp, 0);
92
93   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
94   PUSH_DATA (push, 0x11);
95   PUSH_DATA (push, vp->code_base);
96   BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
97   PUSH_DATA (push, vp->max_gpr);
98
99   // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
100   // PUSH_DATA (push, 0);
101}
102
103void
104nvc0_fragprog_validate(struct nvc0_context *nvc0)
105{
106   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
107   struct nvc0_program *fp = nvc0->fragprog;
108
109   if (!nvc0_program_validate(nvc0, fp))
110         return;
111   nvc0_program_update_context_state(nvc0, fp, 4);
112
113   if (fp->fp.early_z != nvc0->state.early_z_forced) {
114      nvc0->state.early_z_forced = fp->fp.early_z;
115      IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
116   }
117
118   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
119   PUSH_DATA (push, 0x51);
120   PUSH_DATA (push, fp->code_base);
121   BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
122   PUSH_DATA (push, fp->max_gpr);
123
124   BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
125   PUSH_DATA (push, 0x20164010);
126   PUSH_DATA (push, 0x20);
127   BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
128   PUSH_DATA (push, fp->flags[0]);
129}
130
131void
132nvc0_tctlprog_validate(struct nvc0_context *nvc0)
133{
134   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
135   struct nvc0_program *tp = nvc0->tctlprog;
136
137   if (tp && nvc0_program_validate(nvc0, tp)) {
138      if (tp->tp.tess_mode != ~0) {
139         BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
140         PUSH_DATA (push, tp->tp.tess_mode);
141      }
142      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
143      PUSH_DATA (push, 0x21);
144      PUSH_DATA (push, tp->code_base);
145      BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
146      PUSH_DATA (push, tp->max_gpr);
147
148      if (tp->tp.input_patch_size <= 32)
149         IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
150   } else {
151      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
152      PUSH_DATA (push, 0x20);
153   }
154   nvc0_program_update_context_state(nvc0, tp, 1);
155}
156
157void
158nvc0_tevlprog_validate(struct nvc0_context *nvc0)
159{
160   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
161   struct nvc0_program *tp = nvc0->tevlprog;
162
163   if (tp && nvc0_program_validate(nvc0, tp)) {
164      if (tp->tp.tess_mode != ~0) {
165         BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
166         PUSH_DATA (push, tp->tp.tess_mode);
167      }
168      BEGIN_NVC0(push, NVC0_3D(TEP_SELECT), 1);
169      PUSH_DATA (push, 0x31);
170      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
171      PUSH_DATA (push, tp->code_base);
172      BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
173      PUSH_DATA (push, tp->max_gpr);
174   } else {
175      BEGIN_NVC0(push, NVC0_3D(TEP_SELECT), 1);
176      PUSH_DATA (push, 0x30);
177   }
178   nvc0_program_update_context_state(nvc0, tp, 2);
179}
180
181void
182nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
183{
184   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
185   struct nvc0_program *gp = nvc0->gmtyprog;
186
187   if (gp)
188      nvc0_program_validate(nvc0, gp);
189
190   /* we allow GPs with no code for specifying stream output state only */
191   if (gp && gp->code_size) {
192      const boolean gp_selects_layer = gp->hdr[13] & (1 << 9);
193
194      BEGIN_NVC0(push, NVC0_3D(GP_SELECT), 1);
195      PUSH_DATA (push, 0x41);
196      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
197      PUSH_DATA (push, gp->code_base);
198      BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
199      PUSH_DATA (push, gp->max_gpr);
200      BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
201      PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
202   } else {
203      IMMED_NVC0(push, NVC0_3D(LAYER), 0);
204      BEGIN_NVC0(push, NVC0_3D(GP_SELECT), 1);
205      PUSH_DATA (push, 0x40);
206   }
207   nvc0_program_update_context_state(nvc0, gp, 3);
208}
209
210void
211nvc0_tfb_validate(struct nvc0_context *nvc0)
212{
213   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
214   struct nvc0_transform_feedback_state *tfb;
215   unsigned b;
216
217   if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
218   else
219   if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
220   else
221      tfb = nvc0->vertprog->tfb;
222
223   IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
224
225   if (tfb && tfb != nvc0->state.tfb) {
226      for (b = 0; b < 4; ++b) {
227         if (tfb->varying_count[b]) {
228            unsigned n = (tfb->varying_count[b] + 3) / 4;
229
230            BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
231            PUSH_DATA (push, 0);
232            PUSH_DATA (push, tfb->varying_count[b]);
233            PUSH_DATA (push, tfb->stride[b]);
234            BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
235            PUSH_DATAp(push, tfb->varying_index[b], n);
236
237            if (nvc0->tfbbuf[b])
238               nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
239         } else {
240            IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
241         }
242      }
243   }
244   nvc0->state.tfb = tfb;
245
246   if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
247      return;
248   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
249
250   for (b = 0; b < nvc0->num_tfbbufs; ++b) {
251      struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
252      struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
253
254      if (tfb)
255         targ->stride = tfb->stride[b];
256
257      if (!(nvc0->tfbbuf_dirty & (1 << b)))
258         continue;
259
260      if (!targ->clean)
261         nvc0_query_fifo_wait(push, targ->pq);
262      BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
263      PUSH_DATA (push, 1);
264      PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
265      PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
266      PUSH_DATA (push, targ->pipe.buffer_size);
267      if (!targ->clean) {
268         nvc0_query_pushbuf_submit(push, targ->pq, 0x4);
269      } else {
270         PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
271         targ->clean = FALSE;
272      }
273      BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
274   }
275   for (; b < 4; ++b)
276      IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
277}
278